From 4996f8bbcd6df543c509a51e77cb1d0c4ed3d34a Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Tue, 8 Jun 2021 12:59:13 +0100 Subject: [PATCH 1/2] [DocArchiveManager] (un-)archive docs in batches and let db filter docs Also drop the broken 404 logic after switching to db-side filtering. --- services/docstore/app/js/DocArchiveManager.js | 55 +++++++++++-------- services/docstore/app/js/MongoManager.js | 16 ++++-- services/docstore/config/settings.defaults.js | 2 + .../test/unit/js/DocArchiveManagerTests.js | 30 +++++----- 4 files changed, 59 insertions(+), 44 deletions(-) diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js index 13bf8db6cd..7b4bf4611f 100644 --- a/services/docstore/app/js/DocArchiveManager.js +++ b/services/docstore/app/js/DocArchiveManager.js @@ -10,6 +10,8 @@ const PersistorManager = require('./PersistorManager') const pMap = require('p-map') const PARALLEL_JOBS = settings.parallelArchiveJobs +const ARCHIVE_BATCH_SIZE = settings.archiveBatchSize +const UN_ARCHIVE_BATCH_SIZE = settings.unArchiveBatchSize const DESTROY_BATCH_SIZE = settings.destroyBatchSize const DESTROY_RETRY_COUNT = settings.destroyRetryCount @@ -33,20 +35,19 @@ module.exports = { } async function archiveAllDocs(projectId) { - const docs = await MongoManager.getProjectsDocs( - projectId, - { include_deleted: true }, - { lines: true, ranges: true, rev: true, inS3: true } - ) + while (true) { + const docs = await MongoManager.getNNonArchivedProjectDocs( + projectId, + ARCHIVE_BATCH_SIZE + ) + if (!docs || docs.length === 0) { + break + } - if (!docs) { - throw new Errors.NotFoundError(`No docs for project ${projectId}`) + await pMap(docs, (doc) => archiveDoc(projectId, doc), { + concurrency: PARALLEL_JOBS + }) } - - const docsToArchive = docs.filter((doc) => !doc.inS3) - await pMap(docsToArchive, (doc) => archiveDoc(projectId, doc), { - concurrency: PARALLEL_JOBS - }) } async function archiveDocById(projectId, docId) { @@ -102,18 +103,26 @@ async function archiveDoc(projectId, doc) { } async function unArchiveAllDocs(projectId) { - let docs - if (settings.docstore.keepSoftDeletedDocsArchived) { - docs = await MongoManager.getNonDeletedArchivedProjectDocs(projectId) - } else { - docs = await MongoManager.getArchivedProjectDocs(projectId) + while (true) { + let docs + if (settings.docstore.keepSoftDeletedDocsArchived) { + docs = await MongoManager.getNNonDeletedArchivedProjectDocs( + projectId, + UN_ARCHIVE_BATCH_SIZE + ) + } else { + docs = await MongoManager.getNArchivedProjectDocs( + projectId, + UN_ARCHIVE_BATCH_SIZE + ) + } + if (!docs || docs.length === 0) { + break + } + await pMap(docs, (doc) => unarchiveDoc(projectId, doc._id), { + concurrency: PARALLEL_JOBS + }) } - if (!docs) { - throw new Errors.NotFoundError(`No docs for project ${projectId}`) - } - await pMap(docs, (doc) => unarchiveDoc(projectId, doc._id), { - concurrency: PARALLEL_JOBS - }) } async function unarchiveDoc(projectId, docId) { diff --git a/services/docstore/app/js/MongoManager.js b/services/docstore/app/js/MongoManager.js index 7bfdf82aef..e62f7e6eaf 100644 --- a/services/docstore/app/js/MongoManager.js +++ b/services/docstore/app/js/MongoManager.js @@ -64,21 +64,29 @@ module.exports = MongoManager = { db.docs.find(query, queryOptions).toArray(callback) }, - getArchivedProjectDocs(project_id, callback) { + getNArchivedProjectDocs(project_id, n, callback) { const query = { project_id: ObjectId(project_id.toString()), inS3: true } - db.docs.find(query).toArray(callback) + db.docs.find(query, { projection: { _id: 1 }, limit: n }).toArray(callback) }, - getNonDeletedArchivedProjectDocs(project_id, callback) { + getNNonArchivedProjectDocs(project_id, n, callback) { + const query = { + project_id: ObjectId(project_id.toString()), + inS3: { $ne: true } + } + db.docs.find(query, { limit: n }).toArray(callback) + }, + + getNNonDeletedArchivedProjectDocs(project_id, n, callback) { const query = { project_id: ObjectId(project_id.toString()), deleted: { $ne: true }, inS3: true } - db.docs.find(query).toArray(callback) + db.docs.find(query, { projection: { _id: 1 }, limit: n }).toArray(callback) }, upsertIntoDocCollection(project_id, doc_id, updates, callback) { diff --git a/services/docstore/config/settings.defaults.js b/services/docstore/config/settings.defaults.js index 8d3ecdc9f1..0f1c1ab42b 100644 --- a/services/docstore/config/settings.defaults.js +++ b/services/docstore/config/settings.defaults.js @@ -37,6 +37,8 @@ const Settings = { max_doc_length: parseInt(process.env.MAX_DOC_LENGTH) || 2 * 1024 * 1024, // 2mb + archiveBatchSize: parseInt(process.env.ARCHIVE_BATCH_SIZE, 10) || 50, + unArchiveBatchSize: parseInt(process.env.UN_ARCHIVE_BATCH_SIZE, 10) || 50, destroyBatchSize: parseInt(process.env.DESTROY_BATCH_SIZE, 10) || 2000, destroyRetryCount: parseInt(process.env.DESTROY_RETRY_COUNT || '3', 10), parallelArchiveJobs: parseInt(process.env.PARALLEL_ARCHIVE_JOBS, 10) || 5 diff --git a/services/docstore/test/unit/js/DocArchiveManagerTests.js b/services/docstore/test/unit/js/DocArchiveManagerTests.js index a324cfe916..e902622db1 100644 --- a/services/docstore/test/unit/js/DocArchiveManagerTests.js +++ b/services/docstore/test/unit/js/DocArchiveManagerTests.js @@ -116,12 +116,24 @@ describe('DocArchiveManager', function () { deleteObject: sinon.stub().resolves() } + const getNNonArchivedProjectDocs = sinon.stub() + getNNonArchivedProjectDocs + .onCall(0) + .resolves(mongoDocs.filter((doc) => !doc.inS3)) + getNNonArchivedProjectDocs.onCall(1).resolves([]) + + const getNArchivedProjectDocs = sinon.stub() + getNArchivedProjectDocs.onCall(0).resolves(archivedDocs) + getNArchivedProjectDocs.onCall(1).resolves([]) + MongoManager = { promises: { markDocAsArchived: sinon.stub().resolves(), upsertIntoDocCollection: sinon.stub().resolves(), getProjectsDocs: sinon.stub().resolves(mongoDocs), - getArchivedProjectDocs: sinon.stub().resolves(archivedDocs), + getNNonDeletedArchivedProjectDocs: getNArchivedProjectDocs, + getNNonArchivedProjectDocs, + getNArchivedProjectDocs, findDoc: sinon.stub().rejects(new Errors.NotFoundError()), destroyDoc: sinon.stub().resolves() } @@ -519,14 +531,6 @@ describe('DocArchiveManager', function () { MongoManager.promises.markDocAsArchived ).not.to.have.been.calledWith(mongoDocs[3]._id) }) - - it('should return error if the project has no docs', async function () { - MongoManager.promises.getProjectsDocs.resolves(null) - - await expect( - DocArchiveManager.promises.archiveAllDocs(projectId) - ).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError) - }) }) describe('unArchiveAllDocs', function () { @@ -545,14 +549,6 @@ describe('DocArchiveManager', function () { ) } }) - - it('should return error if the project has no docs', async function () { - MongoManager.promises.getArchivedProjectDocs.resolves(null) - - await expect( - DocArchiveManager.promises.unArchiveAllDocs(projectId) - ).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError) - }) }) describe('destroyAllDocs', function () { From d69c29e4dcef97774fe955358612b2ec8504e31a Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Tue, 8 Jun 2021 19:29:10 +0100 Subject: [PATCH 2/2] [misc] apply review feedback on naming functions and parameters Co-Authored-By: Simon Detheridge --- services/docstore/app/js/DocArchiveManager.js | 6 +++--- services/docstore/app/js/MongoManager.js | 16 ++++++++++------ .../test/unit/js/DocArchiveManagerTests.js | 18 +++++++++--------- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js index 7b4bf4611f..7a062bc739 100644 --- a/services/docstore/app/js/DocArchiveManager.js +++ b/services/docstore/app/js/DocArchiveManager.js @@ -36,7 +36,7 @@ module.exports = { async function archiveAllDocs(projectId) { while (true) { - const docs = await MongoManager.getNNonArchivedProjectDocs( + const docs = await MongoManager.getNonArchivedProjectDocs( projectId, ARCHIVE_BATCH_SIZE ) @@ -106,12 +106,12 @@ async function unArchiveAllDocs(projectId) { while (true) { let docs if (settings.docstore.keepSoftDeletedDocsArchived) { - docs = await MongoManager.getNNonDeletedArchivedProjectDocs( + docs = await MongoManager.getNonDeletedArchivedProjectDocs( projectId, UN_ARCHIVE_BATCH_SIZE ) } else { - docs = await MongoManager.getNArchivedProjectDocs( + docs = await MongoManager.getArchivedProjectDocs( projectId, UN_ARCHIVE_BATCH_SIZE ) diff --git a/services/docstore/app/js/MongoManager.js b/services/docstore/app/js/MongoManager.js index e62f7e6eaf..385245c9e3 100644 --- a/services/docstore/app/js/MongoManager.js +++ b/services/docstore/app/js/MongoManager.js @@ -64,29 +64,33 @@ module.exports = MongoManager = { db.docs.find(query, queryOptions).toArray(callback) }, - getNArchivedProjectDocs(project_id, n, callback) { + getArchivedProjectDocs(project_id, maxResults, callback) { const query = { project_id: ObjectId(project_id.toString()), inS3: true } - db.docs.find(query, { projection: { _id: 1 }, limit: n }).toArray(callback) + db.docs + .find(query, { projection: { _id: 1 }, limit: maxResults }) + .toArray(callback) }, - getNNonArchivedProjectDocs(project_id, n, callback) { + getNonArchivedProjectDocs(project_id, maxResults, callback) { const query = { project_id: ObjectId(project_id.toString()), inS3: { $ne: true } } - db.docs.find(query, { limit: n }).toArray(callback) + db.docs.find(query, { limit: maxResults }).toArray(callback) }, - getNNonDeletedArchivedProjectDocs(project_id, n, callback) { + getNonDeletedArchivedProjectDocs(project_id, maxResults, callback) { const query = { project_id: ObjectId(project_id.toString()), deleted: { $ne: true }, inS3: true } - db.docs.find(query, { projection: { _id: 1 }, limit: n }).toArray(callback) + db.docs + .find(query, { projection: { _id: 1 }, limit: maxResults }) + .toArray(callback) }, upsertIntoDocCollection(project_id, doc_id, updates, callback) { diff --git a/services/docstore/test/unit/js/DocArchiveManagerTests.js b/services/docstore/test/unit/js/DocArchiveManagerTests.js index e902622db1..41fe75a8df 100644 --- a/services/docstore/test/unit/js/DocArchiveManagerTests.js +++ b/services/docstore/test/unit/js/DocArchiveManagerTests.js @@ -116,24 +116,24 @@ describe('DocArchiveManager', function () { deleteObject: sinon.stub().resolves() } - const getNNonArchivedProjectDocs = sinon.stub() - getNNonArchivedProjectDocs + const getNonArchivedProjectDocs = sinon.stub() + getNonArchivedProjectDocs .onCall(0) .resolves(mongoDocs.filter((doc) => !doc.inS3)) - getNNonArchivedProjectDocs.onCall(1).resolves([]) + getNonArchivedProjectDocs.onCall(1).resolves([]) - const getNArchivedProjectDocs = sinon.stub() - getNArchivedProjectDocs.onCall(0).resolves(archivedDocs) - getNArchivedProjectDocs.onCall(1).resolves([]) + const getArchivedProjectDocs = sinon.stub() + getArchivedProjectDocs.onCall(0).resolves(archivedDocs) + getArchivedProjectDocs.onCall(1).resolves([]) MongoManager = { promises: { markDocAsArchived: sinon.stub().resolves(), upsertIntoDocCollection: sinon.stub().resolves(), getProjectsDocs: sinon.stub().resolves(mongoDocs), - getNNonDeletedArchivedProjectDocs: getNArchivedProjectDocs, - getNNonArchivedProjectDocs, - getNArchivedProjectDocs, + getNonDeletedArchivedProjectDocs: getArchivedProjectDocs, + getNonArchivedProjectDocs, + getArchivedProjectDocs, findDoc: sinon.stub().rejects(new Errors.NotFoundError()), destroyDoc: sinon.stub().resolves() }