Merge pull request #23565 from overleaf/bg-fix-history-metadata-in-projects-collection

fix history metadata in projects collection

GitOrigin-RevId: 18c821ef5966a8470b24dfa60313b09bdda9707d
This commit is contained in:
Brian Gough
2025-02-13 09:18:33 +00:00
committed by Copybot
parent 6bea6a887d
commit d2738fda73
8 changed files with 342 additions and 199 deletions

24
package-lock.json generated
View File

@@ -42103,6 +42103,7 @@
"benny": "^3.7.1",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"chai-exclude": "^2.1.1",
"mocha": "^10.2.0",
"node-fetch": "^2.7.0",
"sinon": "^9.0.2",
@@ -42122,6 +42123,19 @@
"node": ">=0.12.0"
}
},
"services/history-v1/node_modules/chai-exclude": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/chai-exclude/-/chai-exclude-2.1.1.tgz",
"integrity": "sha512-IHgNmgAFOkyRPnmOtZio9UsOHQ6RnzVr2LOs+5V9urYYqjhV/ERLQapC0Eq2DmID5eDWyngAcBxNUm0ZK0QbrQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"fclone": "^1.0.11"
},
"peerDependencies": {
"chai": ">= 4.0.0 < 5"
}
},
"services/history-v1/node_modules/check-types": {
"version": "11.1.2",
"resolved": "https://registry.npmjs.org/check-types/-/check-types-11.1.2.tgz",
@@ -71053,6 +71067,7 @@
"bunyan": "^1.8.12",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"chai-exclude": "^2.1.1",
"check-types": "^11.1.2",
"command-line-args": "^3.0.3",
"config": "^1.19.0",
@@ -71090,6 +71105,15 @@
"typical": "^2.6.0"
}
},
"chai-exclude": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/chai-exclude/-/chai-exclude-2.1.1.tgz",
"integrity": "sha512-IHgNmgAFOkyRPnmOtZio9UsOHQ6RnzVr2LOs+5V9urYYqjhV/ERLQapC0Eq2DmID5eDWyngAcBxNUm0ZK0QbrQ==",
"dev": true,
"requires": {
"fclone": "^1.0.11"
}
},
"check-types": {
"version": "11.1.2",
"resolved": "https://registry.npmjs.org/check-types/-/check-types-11.1.2.tgz",

View File

@@ -47,6 +47,7 @@
"benny": "^3.7.1",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"chai-exclude": "^2.1.1",
"mocha": "^10.2.0",
"node-fetch": "^2.7.0",
"sinon": "^9.0.2",

View File

@@ -151,6 +151,7 @@ async function confirmCreate(projectId, chunk, chunkId, mongoOpts = {}) {
if (result.matchedCount === 0) {
throw new OError('pending chunk not found', { projectId, chunkId })
}
await updateProjectRecord(projectId, chunk, mongoOpts)
}
/**
@@ -171,7 +172,10 @@ async function updateProjectRecord(projectId, chunk, mongoOpts = {}) {
},
// store the first pending change timestamp for the chunk, this will
// be cleared every time a backup is completed.
$min: { 'overleaf.backup.pendingChangeAt': chunk.getEndTimestamp() },
$min: {
'overleaf.backup.pendingChangeAt':
chunk.getEndTimestamp() || new Date(),
},
},
mongoOpts
)
@@ -191,7 +195,6 @@ async function confirmUpdate(projectId, oldChunkId, newChunk, newChunkId) {
await session.withTransaction(async () => {
await deleteChunk(projectId, oldChunkId, { session })
await confirmCreate(projectId, newChunk, newChunkId, { session })
await updateProjectRecord(projectId, newChunk, { session })
})
} finally {
await session.endSession()

View File

@@ -135,6 +135,7 @@ async function confirmCreate(projectId, chunk, chunkId) {
_deletePendingChunk(tx, projectId, chunkId),
_insertChunk(tx, projectId, chunk, chunkId),
])
await updateProjectRecord(projectId, chunk)
})
}

View File

@@ -12,7 +12,8 @@ import {
import cleanup from './support/cleanup.js'
import testProjects from '../api/support/test_projects.js'
import { execFile } from 'node:child_process'
import { expect } from 'chai'
import chai, { expect } from 'chai'
import chaiExclude from 'chai-exclude'
import config from 'config'
import ObjectPersistor from '@overleaf/object-persistor'
import { WritableBuffer } from '@overleaf/stream-utils'
@@ -26,6 +27,7 @@ import {
makeProjectKey,
} from '../../../../storage/lib/blob_store/index.js'
chai.use(chaiExclude)
const TIMEOUT = 20 * 1_000
const { deksBucket } = config.get('backupStore')
@@ -660,116 +662,129 @@ describe('back_fill_file_hash script', function () {
return !hasHash // only files without hash processed
})
it('should update mongo', async function () {
expect(await projectsCollection.find({}).toArray()).to.deep.equal([
{
_id: projectId0,
rootFolder: [
{
fileRefs: [
{ _id: fileId8, hash: gitBlobHash(fileId8) },
{ _id: fileId0, hash: gitBlobHash(fileId0) },
{ _id: fileId6, hash: gitBlobHash(fileId6) },
{ _id: fileId7, hash: hashFile7 },
],
folders: [{ fileRefs: [], folders: [] }],
},
],
overleaf: { history: { id: historyId0 } },
},
{
_id: projectId1,
rootFolder: [
{
fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyId1 } },
},
{
_id: projectId2,
rootFolder: [
{
fileRefs: [],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [{ _id: fileId2, hash: gitBlobHash(fileId2) }],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyId2 } },
},
{
_id: projectId3,
rootFolder: [
{
fileRefs: [],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [{ _id: fileId3, hash: gitBlobHash(fileId3) }],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyId3 } },
},
{
_id: projectIdNoHistory,
rootFolder: [{ fileRefs: [], folders: [] }],
overleaf: { history: { conversionFailed: true } },
},
{
_id: projectIdNoOverleaf,
rootFolder: [{ fileRefs: [], folders: [] }],
},
{
_id: projectIdBadFileTree0,
overleaf: { history: { id: historyIdBadFileTree0 } },
},
{
_id: projectIdBadFileTree1,
rootFolder: [],
overleaf: { history: { id: historyIdBadFileTree1 } },
},
{
_id: projectIdBadFileTree2,
rootFolder: [{ fileRefs: [{ _id: null }] }],
overleaf: { history: { id: historyIdBadFileTree2 } },
},
{
_id: projectIdBadFileTree3,
rootFolder: [
{
folders: [null, { folders: {}, fileRefs: 13 }],
fileRefs: [{ _id: fileId9, hash: gitBlobHash(fileId9) }],
},
],
overleaf: { history: { id: historyIdBadFileTree3 } },
},
])
expect(await projectsCollection.find({}).toArray())
.excludingEvery([
'currentEndTimestamp',
'currentEndVersion',
'updatedAt',
'backup',
])
.to.deep.equal([
{
_id: projectId0,
rootFolder: [
{
fileRefs: [
{ _id: fileId8, hash: gitBlobHash(fileId8) },
{ _id: fileId0, hash: gitBlobHash(fileId0) },
{ _id: fileId6, hash: gitBlobHash(fileId6) },
{ _id: fileId7, hash: hashFile7 },
],
folders: [{ fileRefs: [], folders: [] }],
},
],
overleaf: { history: { id: historyId0 } },
},
{
_id: projectId1,
rootFolder: [
{
fileRefs: [{ _id: fileId1, hash: gitBlobHash(fileId1) }],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [
{ _id: fileId1, hash: gitBlobHash(fileId1) },
],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyId1 } },
},
{
_id: projectId2,
rootFolder: [
{
fileRefs: [],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [
{ _id: fileId2, hash: gitBlobHash(fileId2) },
],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyId2 } },
},
{
_id: projectId3,
rootFolder: [
{
fileRefs: [],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [
{ _id: fileId3, hash: gitBlobHash(fileId3) },
],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyId3 } },
},
{
_id: projectIdNoHistory,
rootFolder: [{ fileRefs: [], folders: [] }],
overleaf: { history: { conversionFailed: true } },
},
{
_id: projectIdNoOverleaf,
rootFolder: [{ fileRefs: [], folders: [] }],
},
{
_id: projectIdBadFileTree0,
overleaf: { history: { id: historyIdBadFileTree0 } },
},
{
_id: projectIdBadFileTree1,
rootFolder: [],
overleaf: { history: { id: historyIdBadFileTree1 } },
},
{
_id: projectIdBadFileTree2,
rootFolder: [{ fileRefs: [{ _id: null }] }],
overleaf: { history: { id: historyIdBadFileTree2 } },
},
{
_id: projectIdBadFileTree3,
rootFolder: [
{
folders: [null, { folders: {}, fileRefs: 13 }],
fileRefs: [{ _id: fileId9, hash: gitBlobHash(fileId9) }],
},
],
overleaf: { history: { id: historyIdBadFileTree3 } },
},
])
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
{
_id: deleteProjectsRecordId0,

View File

@@ -8,7 +8,8 @@ import { backedUpBlobs, blobs, db } from '../../../../storage/lib/mongodb.js'
import cleanup from './support/cleanup.js'
import testProjects from '../api/support/test_projects.js'
import { execFile } from 'node:child_process'
import { expect } from 'chai'
import chai, { expect } from 'chai'
import chaiExclude from 'chai-exclude'
import config from 'config'
import { WritableBuffer } from '@overleaf/stream-utils'
import {
@@ -22,6 +23,8 @@ import {
} from '../../../../storage/lib/blob_store/index.js'
import ObjectPersistor from '@overleaf/object-persistor'
chai.use(chaiExclude)
const TIMEOUT = 20 * 1_000
const { deksBucket } = config.get('backupStore')
@@ -604,91 +607,98 @@ describe('back_fill_file_hash_fix_up script', function () {
expect(line).to.include(gitBlobHash(fileIdBlobExistsInGCSCorrupted))
})
it('should update mongo', async function () {
expect(await projectsCollection.find({}).toArray()).to.deep.equal([
{
_id: projectId0,
rootFolder: [
{
fileRefs: [
// Removed
// { _id: fileIdMissing0 },
// Removed
// { _id: fileIdMissing2 },
// Added hash
{
_id: fileIdHashMissing0,
hash: gitBlobHash(fileIdHashMissing0),
},
// Added hash
{
_id: fileIdHashMissing1,
hash: gitBlobHash(fileIdHashMissing1),
},
// No change, should warn about the find.
{
_id: fileIdWithDifferentHashFound,
hash: gitBlobHash(fileIdInGoodState),
},
// No change, should warn about the need to restore.
{
_id: fileIdWithDifferentHashRestore,
hash: gitBlobHash(fileIdMissing0),
},
],
folders: [
{
docs: [],
},
null,
{
fileRefs: [
null,
// No change
{
_id: fileIdInGoodState,
hash: gitBlobHash(fileIdInGoodState),
},
// Updated hash
{
_id: fileIdWithDifferentHashNotFound0,
hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
},
// Updated hash
{
_id: fileIdRestoreFromFilestore0,
hash: gitBlobHash(fileIdRestoreFromFilestore0),
},
// Added hash
{
_id: fileIdRestoreFromFilestore1,
hash: gitBlobHash(fileIdRestoreFromFilestore1),
},
// No change, blob created
{
_id: fileIdBlobExistsInGCS0,
hash: gitBlobHash(fileIdBlobExistsInGCS0),
},
// No change, flagged
{
_id: fileIdBlobExistsInGCSCorrupted,
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
},
// Added hash
{
_id: fileIdBlobExistsInGCS1,
hash: gitBlobHash(fileIdBlobExistsInGCS1),
},
],
folders: [],
},
],
},
],
overleaf: { history: { id: historyId0 } },
// Incremented when removing file/updating hash
version: 8,
},
])
expect(await projectsCollection.find({}).toArray())
.excludingEvery([
'currentEndTimestamp',
'currentEndVersion',
'updatedAt',
'backup',
])
.to.deep.equal([
{
_id: projectId0,
rootFolder: [
{
fileRefs: [
// Removed
// { _id: fileIdMissing0 },
// Removed
// { _id: fileIdMissing2 },
// Added hash
{
_id: fileIdHashMissing0,
hash: gitBlobHash(fileIdHashMissing0),
},
// Added hash
{
_id: fileIdHashMissing1,
hash: gitBlobHash(fileIdHashMissing1),
},
// No change, should warn about the find.
{
_id: fileIdWithDifferentHashFound,
hash: gitBlobHash(fileIdInGoodState),
},
// No change, should warn about the need to restore.
{
_id: fileIdWithDifferentHashRestore,
hash: gitBlobHash(fileIdMissing0),
},
],
folders: [
{
docs: [],
},
null,
{
fileRefs: [
null,
// No change
{
_id: fileIdInGoodState,
hash: gitBlobHash(fileIdInGoodState),
},
// Updated hash
{
_id: fileIdWithDifferentHashNotFound0,
hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
},
// Updated hash
{
_id: fileIdRestoreFromFilestore0,
hash: gitBlobHash(fileIdRestoreFromFilestore0),
},
// Added hash
{
_id: fileIdRestoreFromFilestore1,
hash: gitBlobHash(fileIdRestoreFromFilestore1),
},
// No change, blob created
{
_id: fileIdBlobExistsInGCS0,
hash: gitBlobHash(fileIdBlobExistsInGCS0),
},
// No change, flagged
{
_id: fileIdBlobExistsInGCSCorrupted,
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
},
// Added hash
{
_id: fileIdBlobExistsInGCS1,
hash: gitBlobHash(fileIdBlobExistsInGCS1),
},
],
folders: [],
},
],
},
],
overleaf: { history: { id: historyId0 } },
// Incremented when removing file/updating hash
version: 8,
},
])
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
{
_id: deleteProjectsRecordId0,

View File

@@ -5,6 +5,7 @@ const fixtures = require('./support/fixtures')
const { expect } = require('chai')
const sinon = require('sinon')
const { ObjectId } = require('mongodb')
const { projects } = require('../../../../storage/lib/mongodb')
const {
Chunk,
@@ -27,20 +28,27 @@ describe('chunkStore', function () {
{
description: 'Postgres backend',
createProject: chunkStore.initializeProject,
idMapping: id => parseInt(id, 10),
},
{
description: 'Mongo backend',
createProject: () =>
chunkStore.initializeProject(new ObjectId().toString()),
idMapping: id => id,
},
]
for (const scenario of scenarios) {
describe(scenario.description, function () {
let projectId
let projectRecord
beforeEach(async function () {
projectId = await scenario.createProject()
// create a record in the mongo projects collection
projectRecord = await projects.insertOne({
overleaf: { history: { id: scenario.idMapping(projectId) } },
})
})
it('loads empty latest chunk for a new project', async function () {
@@ -114,6 +122,19 @@ describe('chunkStore', function () {
expect(editFile).to.be.an.instanceof(EditFileOperation)
expect(editFile.getPathname()).to.equal(testPathname)
})
it('updates the project record with the current version and timestamps', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(2)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
lastChangeTimestamp
)
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
lastChangeTimestamp
)
})
})
describe('multiple chunks', async function () {
@@ -232,6 +253,25 @@ describe('chunkStore', function () {
expect(chunk).to.deep.equal(thirdChunk)
})
it('updates the project record to match the last chunk', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(5)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
thirdChunkTimestamp
)
})
it('updates the pending change timestamp to match the first chunk', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
firstChunkTimestamp
)
})
describe('after updating the last chunk', function () {
let newChunk
@@ -266,6 +306,25 @@ describe('chunkStore', function () {
)
expect(chunk).to.deep.equal(newChunk)
})
it('updates the project record to match the latest version and timestamp', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(6)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
thirdChunkTimestamp
)
})
it('does not modify the existing pending change timestamp in the project record', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
firstChunkTimestamp
)
})
})
})

View File

@@ -0,0 +1,30 @@
/* eslint-disable no-unused-vars */
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
const tags = ['saas']
const migrate = async client => {
const { db } = client
await batchedUpdate(
db.projects,
{ 'overleaf.history.currentEndVersion': { $exists: true } },
{
$unset: {
'overleaf.history.currentEndVersion': true,
'overleaf.history.currentEndTimestamp': true,
'overleaf.history.updatedAt': true,
'overleaf.backup.pendingChangeAt': true,
},
}
)
}
const rollback = async client => {}
export default {
tags,
migrate,
rollback,
}