From a98fefd24b159d4e6e786c7e0b231375f802dac4 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Mon, 26 Aug 2024 15:09:25 +0200 Subject: [PATCH] Merge pull request #20129 from overleaf/jpa-handle-partial-deletion [misc] improve handling of document deletion GitOrigin-RevId: bd6b225b91ab38365e9ff272c50ece995e767bf2 --- .../scripts/check_redis_mongo_sync_state.js | 124 +++++++++++++++++- .../js/CheckRedisMongoSyncStateTests.js | 105 +++++++++++++++ .../acceptance/js/helpers/MockDocstoreApi.js | 111 ++++++++++++++++ .../Project/ProjectEntityUpdateHandler.js | 4 + .../ProjectEntityUpdateHandlerTests.js | 6 + 5 files changed, 346 insertions(+), 4 deletions(-) create mode 100644 services/document-updater/test/acceptance/js/helpers/MockDocstoreApi.js diff --git a/services/document-updater/scripts/check_redis_mongo_sync_state.js b/services/document-updater/scripts/check_redis_mongo_sync_state.js index c04794d3a7..0ca2bb4e8b 100644 --- a/services/document-updater/scripts/check_redis_mongo_sync_state.js +++ b/services/document-updater/scripts/check_redis_mongo_sync_state.js @@ -3,15 +3,22 @@ const Path = require('path') const _ = require('lodash') const logger = require('@overleaf/logger') const OError = require('@overleaf/o-error') +const Errors = require('../app/js/Errors') const LockManager = require('../app/js/LockManager') const PersistenceManager = require('../app/js/PersistenceManager') const ProjectFlusher = require('../app/js/ProjectFlusher') const ProjectManager = require('../app/js/ProjectManager') const RedisManager = require('../app/js/RedisManager') const Settings = require('@overleaf/settings') +const request = require('requestretry').defaults({ + maxAttempts: 2, + retryDelay: 10, +}) const AUTO_FIX_VERSION_MISMATCH = process.env.AUTO_FIX_VERSION_MISMATCH === 'true' +const AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA = + process.env.AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA === 'true' const SCRIPT_LOG_LEVEL = process.env.SCRIPT_LOG_LEVEL || 'warn' const FLUSH_IN_SYNC_PROJECTS = process.env.FLUSH_IN_SYNC_PROJECTS === 'true' const FOLDER = @@ -32,6 +39,7 @@ const COMPARE_AND_SET = * @property {Array} lines * @property {string} pathname * @property {Object} ranges + * @property {boolean} [partiallyDeleted] */ class TryAgainError extends Error {} @@ -66,6 +74,101 @@ async function updateDocVersionInRedis(docId, redisDoc, mongoDoc) { } } +async function fixPartiallyDeletedDocMetadata(projectId, docId, pathname) { + await new Promise((resolve, reject) => { + request( + { + method: 'PATCH', + url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}`, + timeout: 60 * 1000, + json: { + name: Path.basename(pathname), + deleted: true, + deletedAt: new Date(), + }, + }, + (err, res, body) => { + if (err) return reject(err) + const { statusCode } = res + if (statusCode !== 204) { + return reject( + new OError('patch request to docstore failed', { + statusCode, + body, + }) + ) + } + resolve() + } + ) + }) +} + +async function getDocFromMongo(projectId, docId) { + try { + return await PersistenceManager.promises.getDoc(projectId, docId) + } catch (err) { + if (!(err instanceof Errors.NotFoundError)) { + throw err + } + } + const docstoreDoc = await new Promise((resolve, reject) => { + request( + { + url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}/peek`, + timeout: 60 * 1000, + json: true, + }, + (err, res, body) => { + if (err) return reject(err) + const { statusCode } = res + if (statusCode !== 200) { + return reject( + new OError('fallback request to docstore failed', { + statusCode, + body, + }) + ) + } + resolve(body) + } + ) + }) + const deletedDocName = await new Promise((resolve, reject) => { + request( + { + url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc-deleted`, + timeout: 60 * 1000, + json: true, + }, + (err, res, body) => { + if (err) return reject(err) + const { statusCode } = res + if (statusCode !== 200) { + return reject( + new OError('list deleted docs request to docstore failed', { + statusCode, + body, + }) + ) + } + resolve(body.find(doc => doc._id === docId)?.name) + } + ) + }) + if (docstoreDoc.deleted && deletedDocName) { + return { + ...docstoreDoc, + pathname: deletedDocName, + } + } + return { + ...docstoreDoc, + pathname: `/partially-deleted-doc-with-unknown-name-and-id-${docId}.txt`, + partiallyDeleted: true, + } +} + /** * @param {string} projectId * @param {string} docId @@ -76,10 +179,20 @@ async function processDoc(projectId, docId) { projectId, docId ) - const mongoDoc = /** @type Doc */ await PersistenceManager.promises.getDoc( - projectId, - docId - ) + const mongoDoc = /** @type Doc */ await getDocFromMongo(projectId, docId) + + if (mongoDoc.partiallyDeleted) { + if (AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA) { + console.log( + `Found partially deleted doc ${docId} in project ${projectId}: fixing metadata` + ) + await fixPartiallyDeletedDocMetadata(projectId, docId, redisDoc.pathname) + } else { + console.log( + `Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata` + ) + } + } if (mongoDoc.version < redisDoc.version) { // mongo is behind, we can flush to mongo when all docs are processed. @@ -121,6 +234,9 @@ async function processDoc(projectId, docId) { if (!WRITE_CONTENT) return true console.log(`pathname: ${mongoDoc.pathname}`) + if (mongoDoc.pathname !== redisDoc.pathname) { + console.log(`pathname redis: ${redisDoc.pathname}`) + } console.log(`mongo version: ${mongoDoc.version}`) console.log(`redis version: ${redisDoc.version}`) diff --git a/services/document-updater/test/acceptance/js/CheckRedisMongoSyncStateTests.js b/services/document-updater/test/acceptance/js/CheckRedisMongoSyncStateTests.js index 8738dcf500..cd4f07bde7 100644 --- a/services/document-updater/test/acceptance/js/CheckRedisMongoSyncStateTests.js +++ b/services/document-updater/test/acceptance/js/CheckRedisMongoSyncStateTests.js @@ -7,6 +7,8 @@ const { expect } = require('chai') const Settings = require('@overleaf/settings') const fs = require('fs') const Path = require('path') +const MockDocstoreApi = require('./helpers/MockDocstoreApi') +const sinon = require('sinon') const rclient = require('@overleaf/redis-wrapper').createClient( Settings.redis.documentupdater @@ -20,6 +22,14 @@ describe('CheckRedisMongoSyncState', function () { await rclient.flushall() }) + let peekDocumentInDocstore + beforeEach(function () { + peekDocumentInDocstore = sinon.spy(MockDocstoreApi, 'peekDocument') + }) + afterEach(function () { + peekDocumentInDocstore.restore() + }) + async function runScript(options) { let result try { @@ -67,6 +77,8 @@ describe('CheckRedisMongoSyncState', function () { expect(result.stdout).to.include( 'Found 0 projects with 0 out of sync docs' ) + + expect(peekDocumentInDocstore).to.not.have.been.called }) describe('with out of sync lines', function () { @@ -263,4 +275,97 @@ describe('CheckRedisMongoSyncState', function () { expect(result.stdout).to.include('Processed 20 projects') }) }) + + describe('with partially deleted doc', function () { + let projectId, docId + beforeEach(function (done) { + projectId = DocUpdaterClient.randomId() + docId = DocUpdaterClient.randomId() + MockWebApi.insertDoc(projectId, docId, { + lines: ['mongo', 'lines'], + version: 1, + }) + MockDocstoreApi.insertDoc(projectId, docId, { + lines: ['mongo', 'lines'], + version: 1, + }) + DocUpdaterClient.getDoc(projectId, docId, err => { + MockWebApi.clearDocs() + done(err) + }) + }) + describe('with only the file-tree entry deleted', function () { + it('should flag the partial deletion', async function () { + const result = await runScript({}) + expect(result.code).to.equal(0) + expect(result.stdout).to.include('Processed 1 projects') + expect(result.stdout).to.include( + `Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata` + ) + expect(result.stdout).to.include( + 'Found 0 projects with 0 out of sync docs' + ) + expect(MockDocstoreApi.getDoc(projectId, docId)).to.not.include({ + deleted: true, + name: 'c.tex', + }) + expect(peekDocumentInDocstore).to.have.been.called + }) + it('should autofix the partial deletion', async function () { + const result = await runScript({ + AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA: 'true', + }) + expect(result.code).to.equal(0) + expect(result.stdout).to.include('Processed 1 projects') + expect(result.stdout).to.include( + `Found partially deleted doc ${docId} in project ${projectId}: fixing metadata` + ) + expect(result.stdout).to.include( + 'Found 0 projects with 0 out of sync docs' + ) + + expect(MockDocstoreApi.getDoc(projectId, docId)).to.include({ + deleted: true, + name: 'c.tex', + }) + + const result2 = await runScript({}) + expect(result2.code).to.equal(0) + expect(result2.stdout).to.include('Processed 1 projects') + expect(result2.stdout).to.not.include( + `Found partially deleted doc ${docId} in project ${projectId}` + ) + expect(result2.stdout).to.include( + 'Found 0 projects with 0 out of sync docs' + ) + }) + }) + describe('with docstore metadata updated', function () { + beforeEach(function (done) { + MockDocstoreApi.patchDocument( + projectId, + docId, + { + deleted: true, + deletedAt: new Date(), + name: 'c.tex', + }, + done + ) + }) + + it('should work when in sync', async function () { + const result = await runScript({}) + expect(result.code).to.equal(0) + expect(result.stdout).to.include('Processed 1 projects') + expect(result.stdout).to.not.include( + `Found partially deleted doc ${docId} in project ${projectId}` + ) + expect(result.stdout).to.include( + 'Found 0 projects with 0 out of sync docs' + ) + expect(peekDocumentInDocstore).to.have.been.called + }) + }) + }) }) diff --git a/services/document-updater/test/acceptance/js/helpers/MockDocstoreApi.js b/services/document-updater/test/acceptance/js/helpers/MockDocstoreApi.js new file mode 100644 index 0000000000..a2cd915b9f --- /dev/null +++ b/services/document-updater/test/acceptance/js/helpers/MockDocstoreApi.js @@ -0,0 +1,111 @@ +const express = require('express') +const bodyParser = require('body-parser') +const app = express() +const MAX_REQUEST_SIZE = 2 * (2 * 1024 * 1024 + 64 * 1024) + +const MockDocstoreApi = { + docs: {}, + + clearDocs() { + this.docs = {} + }, + + getDoc(projectId, docId) { + return this.docs[`${projectId}:${docId}`] + }, + + insertDoc(projectId, docId, doc) { + if (doc.version == null) { + doc.version = 0 + } + if (doc.lines == null) { + doc.lines = [] + } + this.docs[`${projectId}:${docId}`] = doc + }, + + patchDocument(projectId, docId, meta, callback) { + Object.assign(this.docs[`${projectId}:${docId}`], meta) + callback(null) + }, + + peekDocument(projectId, docId, callback) { + callback(null, this.docs[`${projectId}:${docId}`]) + }, + + getAllDeletedDocs(projectId, callback) { + callback( + null, + Object.entries(this.docs) + .filter(([key, doc]) => key.startsWith(projectId) && doc.deleted) + .map(([key, doc]) => { + return { + _id: key.split(':')[1], + name: doc.name, + deletedAt: doc.deletedAt, + } + }) + ) + }, + + run() { + app.get('/project/:project_id/doc-deleted', (req, res, next) => { + this.getAllDeletedDocs(req.params.project_id, (error, docs) => { + if (error) { + res.sendStatus(500) + } else { + res.json(docs) + } + }) + }) + + app.get('/project/:project_id/doc/:doc_id/peek', (req, res, next) => { + this.peekDocument( + req.params.project_id, + req.params.doc_id, + (error, doc) => { + if (error) { + res.sendStatus(500) + } else if (doc) { + res.json(doc) + } else { + res.sendStatus(404) + } + } + ) + }) + + app.patch( + '/project/:project_id/doc/:doc_id', + bodyParser.json({ limit: MAX_REQUEST_SIZE }), + (req, res, next) => { + MockDocstoreApi.patchDocument( + req.params.project_id, + req.params.doc_id, + req.body, + error => { + if (error) { + res.sendStatus(500) + } else { + res.sendStatus(204) + } + } + ) + } + ) + + app + .listen(3016, error => { + if (error) { + throw error + } + }) + .on('error', error => { + console.error('error starting MockDocstoreApi:', error.message) + process.exit(1) + }) + }, +} + +MockDocstoreApi.run() +module.exports = MockDocstoreApi diff --git a/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js b/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js index 5ac1c435ee..14756ab0c5 100644 --- a/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js +++ b/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js @@ -774,6 +774,10 @@ const deleteEntity = wrapWithLock( throw new Error('No entityType set') } entityType = entityType.toLowerCase() + + // Flush the entire project to avoid leaving partially deleted docs in redis. + await DocumentUpdaterHandler.promises.flushProjectToMongo(projectId) + const { entity, path, projectBeforeDeletion, newProject } = await ProjectEntityMongoUpdateHandler.promises.deleteEntity( projectId, diff --git a/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js b/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js index 400ade10a4..35a4913c83 100644 --- a/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js +++ b/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js @@ -1731,6 +1731,12 @@ describe('ProjectEntityUpdateHandler', function () { ) }) + it('flushes the project to mongo', function () { + this.DocumentUpdaterHandler.promises.flushProjectToMongo.should.have.been.calledWith( + projectId + ) + }) + it('deletes the entity in mongo', function () { this.ProjectEntityMongoUpdateHandler.promises.deleteEntity .calledWith(projectId, docId, 'doc')