From 9ede2d5e786a0069e2f16aa8abf277131428cb7c Mon Sep 17 00:00:00 2001 From: Eric Mc Sween Date: Wed, 22 Feb 2023 10:09:18 -0500 Subject: [PATCH] Merge pull request #11890 from overleaf/em-fix-deleted-chunks-indexes Add a partial index for pending chunks GitOrigin-RevId: ec0705d1de7ffacb2cb88a8e5e1ff9f05c5acf88 --- .../storage/lib/chunk_store/mongo.js | 52 +++++++++++++------ .../storage/chunk_store_mongo_backend.test.js | 52 +++++++++++++++++++ ...205311_fix_deleted_history_chunks_index.js | 24 +++++++++ 3 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js create mode 100644 services/web/migrations/20230217205311_fix_deleted_history_chunks_index.js diff --git a/services/history-v1/storage/lib/chunk_store/mongo.js b/services/history-v1/storage/lib/chunk_store/mongo.js index 0e58e7a2a4..4025e866c7 100644 --- a/services/history-v1/storage/lib/chunk_store/mongo.js +++ b/services/history-v1/storage/lib/chunk_store/mongo.js @@ -197,29 +197,49 @@ async function deleteProjectChunks(projectId) { */ async function getOldChunksBatch(count, minAgeSecs) { const maxUpdatedAt = new Date(Date.now() - minAgeSecs * 1000) - const cursor = mongodb.chunks.find( - { - state: { $in: ['deleted', 'pending'] }, - updatedAt: { $lt: maxUpdatedAt }, - }, - { - limit: count, - projection: { _id: 1, projectId: 1 }, + const batch = [] + + // We need to fetch one state at a time to take advantage of the partial + // indexes on the chunks collection. + // + // Mongo 6.0 allows partial indexes that use the $in operator. When we reach + // that Mongo version, we can create a partial index on both the deleted and + // pending states and simplify this logic a bit. + for (const state of ['deleted', 'pending']) { + if (count === 0) { + // There's no more space in the batch + break } - ) - return await cursor - .map(record => ({ - chunkId: record._id, - projectId: record.projectId, - })) - .toArray() + + const cursor = mongodb.chunks + .find( + { state, updatedAt: { $lt: maxUpdatedAt } }, + { + limit: count, + projection: { _id: 1, projectId: 1 }, + } + ) + .map(record => ({ + chunkId: record._id.toString(), + projectId: record.projectId.toString(), + })) + + for await (const record of cursor) { + batch.push(record) + count -= 1 + } + } + return batch } /** * Delete a batch of old chunks from the database */ async function deleteOldChunks(chunkIds) { - await mongodb.chunks.deleteMany({ _id: { $in: chunkIds }, state: 'deleted' }) + await mongodb.chunks.deleteMany({ + _id: { $in: chunkIds.map(ObjectId) }, + state: { $in: ['deleted', 'pending'] }, + }) } /** diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js new file mode 100644 index 0000000000..24ffe69fd4 --- /dev/null +++ b/services/history-v1/test/acceptance/js/storage/chunk_store_mongo_backend.test.js @@ -0,0 +1,52 @@ +const { expect } = require('chai') +const { ObjectId } = require('mongodb') +const { Chunk, Snapshot, History } = require('overleaf-editor-core') +const cleanup = require('./support/cleanup') +const backend = require('../../../../storage/lib/chunk_store/mongo') + +describe('chunk store Mongo backend', function () { + beforeEach(cleanup.everything) + + describe('garbage collection', function () { + it('deletes pending and deleted chunks', async function () { + const projectId = ObjectId().toString() + + // Create a pending chunk + const pendingChunk = makeChunk([], 0) + const pendingChunkId = await backend.insertPendingChunk( + projectId, + pendingChunk + ) + + // Create a deleted chunk + const deletedChunk = makeChunk([], 0) + const deletedChunkId = await backend.insertPendingChunk( + projectId, + deletedChunk + ) + await backend.confirmCreate(projectId, deletedChunk, deletedChunkId) + await backend.deleteChunk(projectId, deletedChunkId) + + // Check that both chunks are ready to be deleted + let oldChunks = await backend.getOldChunksBatch(100, 0) + expect(oldChunks).to.have.deep.members([ + { projectId, chunkId: pendingChunkId }, + { projectId, chunkId: deletedChunkId }, + ]) + + // Delete old chunks + await backend.deleteOldChunks(oldChunks.map(chunk => chunk.chunkId)) + + // Check that there are no more chunks to be deleted + oldChunks = await backend.getOldChunksBatch(100, 0) + expect(oldChunks).to.deep.equal([]) + }) + }) +}) + +function makeChunk(changes, versionNumber) { + const snapshot = Snapshot.fromRaw({ files: {} }) + const history = new History(snapshot, []) + const chunk = new Chunk(history, versionNumber) + return chunk +} diff --git a/services/web/migrations/20230217205311_fix_deleted_history_chunks_index.js b/services/web/migrations/20230217205311_fix_deleted_history_chunks_index.js new file mode 100644 index 0000000000..f13eab5b53 --- /dev/null +++ b/services/web/migrations/20230217205311_fix_deleted_history_chunks_index.js @@ -0,0 +1,24 @@ +const Helpers = require('./lib/helpers') + +exports.tags = ['server-ce', 'server-pro', 'saas'] + +const indexes = [ + { + // The { state: -1 } sort order works around a restriction of Mongo 4.4 + // where it doesn't allow multiple indexes with the same keys and different + // options. The restriction has been lifted in Mongo 5.0 + // + // See https://www.mongodb.com/docs/manual/core/index-partial/#restrictions + key: { state: -1 }, + name: 'state_pending', + partialFilterExpression: { state: 'pending' }, + }, +] + +exports.migrate = async ({ db }) => { + await Helpers.addIndexesToCollection(db.projectHistoryChunks, indexes) +} + +exports.rollback = async ({ db }) => { + await Helpers.dropIndexesFromCollection(db.projectHistoryChunks, indexes) +}