From d2738fda7380e59a8186e56aae3d9abd9a1d0bd1 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Thu, 13 Feb 2025 09:18:33 +0000 Subject: [PATCH] Merge pull request #23565 from overleaf/bg-fix-history-metadata-in-projects-collection fix history metadata in projects collection GitOrigin-RevId: 18c821ef5966a8470b24dfa60313b09bdda9707d --- package-lock.json | 24 ++ services/history-v1/package.json | 1 + .../storage/lib/chunk_store/mongo.js | 7 +- .../storage/lib/chunk_store/postgres.js | 1 + .../js/storage/back_fill_file_hash.test.mjs | 237 ++++++++++-------- .../back_fill_file_hash_fix_up.test.mjs | 182 +++++++------- .../acceptance/js/storage/chunk_store.test.js | 59 +++++ .../20250212144722_clear_history_metadata.mjs | 30 +++ 8 files changed, 342 insertions(+), 199 deletions(-) create mode 100644 services/web/migrations/20250212144722_clear_history_metadata.mjs diff --git a/package-lock.json b/package-lock.json index 80210fbee5..9d4052868d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -42103,6 +42103,7 @@ "benny": "^3.7.1", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", + "chai-exclude": "^2.1.1", "mocha": "^10.2.0", "node-fetch": "^2.7.0", "sinon": "^9.0.2", @@ -42122,6 +42123,19 @@ "node": ">=0.12.0" } }, + "services/history-v1/node_modules/chai-exclude": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/chai-exclude/-/chai-exclude-2.1.1.tgz", + "integrity": "sha512-IHgNmgAFOkyRPnmOtZio9UsOHQ6RnzVr2LOs+5V9urYYqjhV/ERLQapC0Eq2DmID5eDWyngAcBxNUm0ZK0QbrQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "fclone": "^1.0.11" + }, + "peerDependencies": { + "chai": ">= 4.0.0 < 5" + } + }, "services/history-v1/node_modules/check-types": { "version": "11.1.2", "resolved": "https://registry.npmjs.org/check-types/-/check-types-11.1.2.tgz", @@ -71053,6 +71067,7 @@ "bunyan": "^1.8.12", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", + "chai-exclude": "^2.1.1", "check-types": "^11.1.2", "command-line-args": "^3.0.3", "config": "^1.19.0", @@ -71090,6 +71105,15 @@ "typical": "^2.6.0" } }, + "chai-exclude": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/chai-exclude/-/chai-exclude-2.1.1.tgz", + "integrity": "sha512-IHgNmgAFOkyRPnmOtZio9UsOHQ6RnzVr2LOs+5V9urYYqjhV/ERLQapC0Eq2DmID5eDWyngAcBxNUm0ZK0QbrQ==", + "dev": true, + "requires": { + "fclone": "^1.0.11" + } + }, "check-types": { "version": "11.1.2", "resolved": "https://registry.npmjs.org/check-types/-/check-types-11.1.2.tgz", diff --git a/services/history-v1/package.json b/services/history-v1/package.json index f960255497..28cd09da14 100644 --- a/services/history-v1/package.json +++ b/services/history-v1/package.json @@ -47,6 +47,7 @@ "benny": "^3.7.1", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", + "chai-exclude": "^2.1.1", "mocha": "^10.2.0", "node-fetch": "^2.7.0", "sinon": "^9.0.2", diff --git a/services/history-v1/storage/lib/chunk_store/mongo.js b/services/history-v1/storage/lib/chunk_store/mongo.js index e70bff3179..e7953f8df3 100644 --- a/services/history-v1/storage/lib/chunk_store/mongo.js +++ b/services/history-v1/storage/lib/chunk_store/mongo.js @@ -151,6 +151,7 @@ async function confirmCreate(projectId, chunk, chunkId, mongoOpts = {}) { if (result.matchedCount === 0) { throw new OError('pending chunk not found', { projectId, chunkId }) } + await updateProjectRecord(projectId, chunk, mongoOpts) } /** @@ -171,7 +172,10 @@ async function updateProjectRecord(projectId, chunk, mongoOpts = {}) { }, // store the first pending change timestamp for the chunk, this will // be cleared every time a backup is completed. - $min: { 'overleaf.backup.pendingChangeAt': chunk.getEndTimestamp() }, + $min: { + 'overleaf.backup.pendingChangeAt': + chunk.getEndTimestamp() || new Date(), + }, }, mongoOpts ) @@ -191,7 +195,6 @@ async function confirmUpdate(projectId, oldChunkId, newChunk, newChunkId) { await session.withTransaction(async () => { await deleteChunk(projectId, oldChunkId, { session }) await confirmCreate(projectId, newChunk, newChunkId, { session }) - await updateProjectRecord(projectId, newChunk, { session }) }) } finally { await session.endSession() diff --git a/services/history-v1/storage/lib/chunk_store/postgres.js b/services/history-v1/storage/lib/chunk_store/postgres.js index d53c796c0b..c06f3d3459 100644 --- a/services/history-v1/storage/lib/chunk_store/postgres.js +++ b/services/history-v1/storage/lib/chunk_store/postgres.js @@ -135,6 +135,7 @@ async function confirmCreate(projectId, chunk, chunkId) { _deletePendingChunk(tx, projectId, chunkId), _insertChunk(tx, projectId, chunk, chunkId), ]) + await updateProjectRecord(projectId, chunk) }) } diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs index 77dc65c77e..fad87b4703 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs @@ -12,7 +12,8 @@ import { import cleanup from './support/cleanup.js' import testProjects from '../api/support/test_projects.js' import { execFile } from 'node:child_process' -import { expect } from 'chai' +import chai, { expect } from 'chai' +import chaiExclude from 'chai-exclude' import config from 'config' import ObjectPersistor from '@overleaf/object-persistor' import { WritableBuffer } from '@overleaf/stream-utils' @@ -26,6 +27,7 @@ import { makeProjectKey, } from '../../../../storage/lib/blob_store/index.js' +chai.use(chaiExclude) const TIMEOUT = 20 * 1_000 const { deksBucket } = config.get('backupStore') @@ -660,116 +662,129 @@ describe('back_fill_file_hash script', function () { return !hasHash // only files without hash processed }) it('should update mongo', async function () { - expect(await projectsCollection.find({}).toArray()).to.deep.equal([ - { - _id: projectId0, - rootFolder: [ - { - fileRefs: [ - { _id: fileId8, hash: gitBlobHash(fileId8) }, - { _id: fileId0, hash: gitBlobHash(fileId0) }, - { _id: fileId6, hash: gitBlobHash(fileId6) }, - { _id: fileId7, hash: hashFile7 }, - ], - folders: [{ fileRefs: [], folders: [] }], - }, - ], - overleaf: { history: { id: historyId0 } }, - }, - { - _id: projectId1, - rootFolder: [ - { - fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }], - folders: [ - { - fileRefs: [], - folders: [ - { - fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }], - folders: [], - }, - ], - }, - ], - }, - ], - overleaf: { history: { id: historyId1 } }, - }, - { - _id: projectId2, - rootFolder: [ - { - fileRefs: [], - folders: [ - { - fileRefs: [], - folders: [ - { - fileRefs: [{ _id: fileId2, hash: gitBlobHash(fileId2) }], - folders: [], - }, - ], - }, - ], - }, - ], - overleaf: { history: { id: historyId2 } }, - }, - { - _id: projectId3, - rootFolder: [ - { - fileRefs: [], - folders: [ - { - fileRefs: [], - folders: [ - { - fileRefs: [{ _id: fileId3, hash: gitBlobHash(fileId3) }], - folders: [], - }, - ], - }, - ], - }, - ], - overleaf: { history: { id: historyId3 } }, - }, - { - _id: projectIdNoHistory, - rootFolder: [{ fileRefs: [], folders: [] }], - overleaf: { history: { conversionFailed: true } }, - }, - { - _id: projectIdNoOverleaf, - rootFolder: [{ fileRefs: [], folders: [] }], - }, - { - _id: projectIdBadFileTree0, - overleaf: { history: { id: historyIdBadFileTree0 } }, - }, - { - _id: projectIdBadFileTree1, - rootFolder: [], - overleaf: { history: { id: historyIdBadFileTree1 } }, - }, - { - _id: projectIdBadFileTree2, - rootFolder: [{ fileRefs: [{ _id: null }] }], - overleaf: { history: { id: historyIdBadFileTree2 } }, - }, - { - _id: projectIdBadFileTree3, - rootFolder: [ - { - folders: [null, { folders: {}, fileRefs: 13 }], - fileRefs: [{ _id: fileId9, hash: gitBlobHash(fileId9) }], - }, - ], - overleaf: { history: { id: historyIdBadFileTree3 } }, - }, - ]) + expect(await projectsCollection.find({}).toArray()) + .excludingEvery([ + 'currentEndTimestamp', + 'currentEndVersion', + 'updatedAt', + 'backup', + ]) + .to.deep.equal([ + { + _id: projectId0, + rootFolder: [ + { + fileRefs: [ + { _id: fileId8, hash: gitBlobHash(fileId8) }, + { _id: fileId0, hash: gitBlobHash(fileId0) }, + { _id: fileId6, hash: gitBlobHash(fileId6) }, + { _id: fileId7, hash: hashFile7 }, + ], + folders: [{ fileRefs: [], folders: [] }], + }, + ], + overleaf: { history: { id: historyId0 } }, + }, + { + _id: projectId1, + rootFolder: [ + { + fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }], + folders: [ + { + fileRefs: [], + folders: [ + { + fileRefs: [ + { _id: fileId1, hash: gitBlobHash(fileId1) }, + ], + folders: [], + }, + ], + }, + ], + }, + ], + overleaf: { history: { id: historyId1 } }, + }, + { + _id: projectId2, + rootFolder: [ + { + fileRefs: [], + folders: [ + { + fileRefs: [], + folders: [ + { + fileRefs: [ + { _id: fileId2, hash: gitBlobHash(fileId2) }, + ], + folders: [], + }, + ], + }, + ], + }, + ], + overleaf: { history: { id: historyId2 } }, + }, + { + _id: projectId3, + rootFolder: [ + { + fileRefs: [], + folders: [ + { + fileRefs: [], + folders: [ + { + fileRefs: [ + { _id: fileId3, hash: gitBlobHash(fileId3) }, + ], + folders: [], + }, + ], + }, + ], + }, + ], + overleaf: { history: { id: historyId3 } }, + }, + { + _id: projectIdNoHistory, + rootFolder: [{ fileRefs: [], folders: [] }], + overleaf: { history: { conversionFailed: true } }, + }, + { + _id: projectIdNoOverleaf, + rootFolder: [{ fileRefs: [], folders: [] }], + }, + { + _id: projectIdBadFileTree0, + overleaf: { history: { id: historyIdBadFileTree0 } }, + }, + { + _id: projectIdBadFileTree1, + rootFolder: [], + overleaf: { history: { id: historyIdBadFileTree1 } }, + }, + { + _id: projectIdBadFileTree2, + rootFolder: [{ fileRefs: [{ _id: null }] }], + overleaf: { history: { id: historyIdBadFileTree2 } }, + }, + { + _id: projectIdBadFileTree3, + rootFolder: [ + { + folders: [null, { folders: {}, fileRefs: 13 }], + fileRefs: [{ _id: fileId9, hash: gitBlobHash(fileId9) }], + }, + ], + overleaf: { history: { id: historyIdBadFileTree3 } }, + }, + ]) expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([ { _id: deleteProjectsRecordId0, diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs index ac37570bce..ceafa24c3a 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash_fix_up.test.mjs @@ -8,7 +8,8 @@ import { backedUpBlobs, blobs, db } from '../../../../storage/lib/mongodb.js' import cleanup from './support/cleanup.js' import testProjects from '../api/support/test_projects.js' import { execFile } from 'node:child_process' -import { expect } from 'chai' +import chai, { expect } from 'chai' +import chaiExclude from 'chai-exclude' import config from 'config' import { WritableBuffer } from '@overleaf/stream-utils' import { @@ -22,6 +23,8 @@ import { } from '../../../../storage/lib/blob_store/index.js' import ObjectPersistor from '@overleaf/object-persistor' +chai.use(chaiExclude) + const TIMEOUT = 20 * 1_000 const { deksBucket } = config.get('backupStore') @@ -604,91 +607,98 @@ describe('back_fill_file_hash_fix_up script', function () { expect(line).to.include(gitBlobHash(fileIdBlobExistsInGCSCorrupted)) }) it('should update mongo', async function () { - expect(await projectsCollection.find({}).toArray()).to.deep.equal([ - { - _id: projectId0, - rootFolder: [ - { - fileRefs: [ - // Removed - // { _id: fileIdMissing0 }, - // Removed - // { _id: fileIdMissing2 }, - // Added hash - { - _id: fileIdHashMissing0, - hash: gitBlobHash(fileIdHashMissing0), - }, - // Added hash - { - _id: fileIdHashMissing1, - hash: gitBlobHash(fileIdHashMissing1), - }, - // No change, should warn about the find. - { - _id: fileIdWithDifferentHashFound, - hash: gitBlobHash(fileIdInGoodState), - }, - // No change, should warn about the need to restore. - { - _id: fileIdWithDifferentHashRestore, - hash: gitBlobHash(fileIdMissing0), - }, - ], - folders: [ - { - docs: [], - }, - null, - { - fileRefs: [ - null, - // No change - { - _id: fileIdInGoodState, - hash: gitBlobHash(fileIdInGoodState), - }, - // Updated hash - { - _id: fileIdWithDifferentHashNotFound0, - hash: gitBlobHash(fileIdWithDifferentHashNotFound0), - }, - // Updated hash - { - _id: fileIdRestoreFromFilestore0, - hash: gitBlobHash(fileIdRestoreFromFilestore0), - }, - // Added hash - { - _id: fileIdRestoreFromFilestore1, - hash: gitBlobHash(fileIdRestoreFromFilestore1), - }, - // No change, blob created - { - _id: fileIdBlobExistsInGCS0, - hash: gitBlobHash(fileIdBlobExistsInGCS0), - }, - // No change, flagged - { - _id: fileIdBlobExistsInGCSCorrupted, - hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted), - }, - // Added hash - { - _id: fileIdBlobExistsInGCS1, - hash: gitBlobHash(fileIdBlobExistsInGCS1), - }, - ], - folders: [], - }, - ], - }, - ], - overleaf: { history: { id: historyId0 } }, - // Incremented when removing file/updating hash - version: 8, - }, - ]) + expect(await projectsCollection.find({}).toArray()) + .excludingEvery([ + 'currentEndTimestamp', + 'currentEndVersion', + 'updatedAt', + 'backup', + ]) + .to.deep.equal([ + { + _id: projectId0, + rootFolder: [ + { + fileRefs: [ + // Removed + // { _id: fileIdMissing0 }, + // Removed + // { _id: fileIdMissing2 }, + // Added hash + { + _id: fileIdHashMissing0, + hash: gitBlobHash(fileIdHashMissing0), + }, + // Added hash + { + _id: fileIdHashMissing1, + hash: gitBlobHash(fileIdHashMissing1), + }, + // No change, should warn about the find. + { + _id: fileIdWithDifferentHashFound, + hash: gitBlobHash(fileIdInGoodState), + }, + // No change, should warn about the need to restore. + { + _id: fileIdWithDifferentHashRestore, + hash: gitBlobHash(fileIdMissing0), + }, + ], + folders: [ + { + docs: [], + }, + null, + { + fileRefs: [ + null, + // No change + { + _id: fileIdInGoodState, + hash: gitBlobHash(fileIdInGoodState), + }, + // Updated hash + { + _id: fileIdWithDifferentHashNotFound0, + hash: gitBlobHash(fileIdWithDifferentHashNotFound0), + }, + // Updated hash + { + _id: fileIdRestoreFromFilestore0, + hash: gitBlobHash(fileIdRestoreFromFilestore0), + }, + // Added hash + { + _id: fileIdRestoreFromFilestore1, + hash: gitBlobHash(fileIdRestoreFromFilestore1), + }, + // No change, blob created + { + _id: fileIdBlobExistsInGCS0, + hash: gitBlobHash(fileIdBlobExistsInGCS0), + }, + // No change, flagged + { + _id: fileIdBlobExistsInGCSCorrupted, + hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted), + }, + // Added hash + { + _id: fileIdBlobExistsInGCS1, + hash: gitBlobHash(fileIdBlobExistsInGCS1), + }, + ], + folders: [], + }, + ], + }, + ], + overleaf: { history: { id: historyId0 } }, + // Incremented when removing file/updating hash + version: 8, + }, + ]) expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([ { _id: deleteProjectsRecordId0, diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store.test.js index 68b721e1f7..676e9a52ea 100644 --- a/services/history-v1/test/acceptance/js/storage/chunk_store.test.js +++ b/services/history-v1/test/acceptance/js/storage/chunk_store.test.js @@ -5,6 +5,7 @@ const fixtures = require('./support/fixtures') const { expect } = require('chai') const sinon = require('sinon') const { ObjectId } = require('mongodb') +const { projects } = require('../../../../storage/lib/mongodb') const { Chunk, @@ -27,20 +28,27 @@ describe('chunkStore', function () { { description: 'Postgres backend', createProject: chunkStore.initializeProject, + idMapping: id => parseInt(id, 10), }, { description: 'Mongo backend', createProject: () => chunkStore.initializeProject(new ObjectId().toString()), + idMapping: id => id, }, ] for (const scenario of scenarios) { describe(scenario.description, function () { let projectId + let projectRecord beforeEach(async function () { projectId = await scenario.createProject() + // create a record in the mongo projects collection + projectRecord = await projects.insertOne({ + overleaf: { history: { id: scenario.idMapping(projectId) } }, + }) }) it('loads empty latest chunk for a new project', async function () { @@ -114,6 +122,19 @@ describe('chunkStore', function () { expect(editFile).to.be.an.instanceof(EditFileOperation) expect(editFile.getPathname()).to.equal(testPathname) }) + + it('updates the project record with the current version and timestamps', async function () { + const project = await projects.findOne({ + _id: new ObjectId(projectRecord.insertedId), + }) + expect(project.overleaf.history.currentEndVersion).to.equal(2) + expect(project.overleaf.history.currentEndTimestamp).to.deep.equal( + lastChangeTimestamp + ) + expect(project.overleaf.backup.pendingChangeAt).to.deep.equal( + lastChangeTimestamp + ) + }) }) describe('multiple chunks', async function () { @@ -232,6 +253,25 @@ describe('chunkStore', function () { expect(chunk).to.deep.equal(thirdChunk) }) + it('updates the project record to match the last chunk', async function () { + const project = await projects.findOne({ + _id: new ObjectId(projectRecord.insertedId), + }) + expect(project.overleaf.history.currentEndVersion).to.equal(5) + expect(project.overleaf.history.currentEndTimestamp).to.deep.equal( + thirdChunkTimestamp + ) + }) + + it('updates the pending change timestamp to match the first chunk', async function () { + const project = await projects.findOne({ + _id: new ObjectId(projectRecord.insertedId), + }) + expect(project.overleaf.backup.pendingChangeAt).to.deep.equal( + firstChunkTimestamp + ) + }) + describe('after updating the last chunk', function () { let newChunk @@ -266,6 +306,25 @@ describe('chunkStore', function () { ) expect(chunk).to.deep.equal(newChunk) }) + + it('updates the project record to match the latest version and timestamp', async function () { + const project = await projects.findOne({ + _id: new ObjectId(projectRecord.insertedId), + }) + expect(project.overleaf.history.currentEndVersion).to.equal(6) + expect(project.overleaf.history.currentEndTimestamp).to.deep.equal( + thirdChunkTimestamp + ) + }) + + it('does not modify the existing pending change timestamp in the project record', async function () { + const project = await projects.findOne({ + _id: new ObjectId(projectRecord.insertedId), + }) + expect(project.overleaf.backup.pendingChangeAt).to.deep.equal( + firstChunkTimestamp + ) + }) }) }) diff --git a/services/web/migrations/20250212144722_clear_history_metadata.mjs b/services/web/migrations/20250212144722_clear_history_metadata.mjs new file mode 100644 index 0000000000..025c65b550 --- /dev/null +++ b/services/web/migrations/20250212144722_clear_history_metadata.mjs @@ -0,0 +1,30 @@ +/* eslint-disable no-unused-vars */ + +import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js' + +const tags = ['saas'] + +const migrate = async client => { + const { db } = client + + await batchedUpdate( + db.projects, + { 'overleaf.history.currentEndVersion': { $exists: true } }, + { + $unset: { + 'overleaf.history.currentEndVersion': true, + 'overleaf.history.currentEndTimestamp': true, + 'overleaf.history.updatedAt': true, + 'overleaf.backup.pendingChangeAt': true, + }, + } + ) +} + +const rollback = async client => {} + +export default { + tags, + migrate, + rollback, +}