From 6787e9c50df03c3dbd1305d69f3bfe1e2db050c2 Mon Sep 17 00:00:00 2001 From: Miguel Serrano Date: Fri, 27 Jan 2023 12:48:40 +0100 Subject: [PATCH] Merge pull request #11490 from overleaf/msm-migrate-history-fix [web/scripts] `history/migrate_history.js` fixes GitOrigin-RevId: 249e9a3f1dbf89d46335ee208f5922905477845c --- services/web/.gitignore | 2 +- .../app/src/HistoryUpgradeHelper.js | 2 +- .../app/src/ProjectHistoryController.js | 1321 +++++++++++++++++ .../unit/src/ProjectHistoryControllerTests.js | 346 +++++ .../migrate-project-history.snapshot.json | 50 + .../unit/src/data/track-changes-project.zip | Bin 0 -> 3527 bytes .../web/scripts/history/migrate_history.js | 4 +- ...pgrade_v1_with_conversion_if_sl_history.js | 2 +- 8 files changed, 1723 insertions(+), 4 deletions(-) create mode 100644 services/web/modules/history-migration/app/src/ProjectHistoryController.js create mode 100644 services/web/modules/history-migration/test/unit/src/ProjectHistoryControllerTests.js create mode 100644 services/web/modules/history-migration/test/unit/src/data/migrate-project-history.snapshot.json create mode 100644 services/web/modules/history-migration/test/unit/src/data/track-changes-project.zip diff --git a/services/web/.gitignore b/services/web/.gitignore index 460e02f3c7..91e07e3fad 100644 --- a/services/web/.gitignore +++ b/services/web/.gitignore @@ -86,7 +86,7 @@ cypress/downloads/ cypress/results/ # Test fixture zip -!modules/admin-panel/test/unit/src/data/track-changes-project.zip +!modules/history-migration/test/unit/src/data/track-changes-project.zip # Ace themes for conversion modules/source-editor/frontend/js/themes/ace/ diff --git a/services/web/modules/history-migration/app/src/HistoryUpgradeHelper.js b/services/web/modules/history-migration/app/src/HistoryUpgradeHelper.js index 11055eb896..750166bf92 100644 --- a/services/web/modules/history-migration/app/src/HistoryUpgradeHelper.js +++ b/services/web/modules/history-migration/app/src/HistoryUpgradeHelper.js @@ -4,7 +4,7 @@ const Settings = require('@overleaf/settings') const ProjectHistoryHandler = require('../../../../app/src/Features/Project/ProjectHistoryHandler') const HistoryManager = require('../../../../app/src/Features/History/HistoryManager') -const ProjectHistoryController = require('../../../admin-panel/app/src/ProjectHistoryController') +const ProjectHistoryController = require('./ProjectHistoryController') const ProjectEntityHandler = require('../../../../app/src/Features/Project/ProjectEntityHandler') const ProjectEntityUpdateHandler = require('../../../../app/src/Features/Project/ProjectEntityUpdateHandler') diff --git a/services/web/modules/history-migration/app/src/ProjectHistoryController.js b/services/web/modules/history-migration/app/src/ProjectHistoryController.js new file mode 100644 index 0000000000..cf4a5700ee --- /dev/null +++ b/services/web/modules/history-migration/app/src/ProjectHistoryController.js @@ -0,0 +1,1321 @@ +const _ = require('lodash') +const settings = require('@overleaf/settings') +const OError = require('@overleaf/o-error') +const fs = require('fs') +const fse = require('fs-extra') +const { ObjectId } = require('mongodb') +const request = require('request') +const { pipeline } = require('stream') +const unzipper = require('unzipper') +const util = require('util') +const logger = require('@overleaf/logger') +const path = require('path') +const { + FileTooLargeError, + InvalidNameError, +} = require('../../../../app/src/Features/Errors/Errors') +const FilestoreHandler = require('../../../../app/src/Features/FileStore/FileStoreHandler') +const ProjectGetter = require('../../../../app/src/Features/Project/ProjectGetter') +const RedisWrapper = require('../../../../app/src/infrastructure/RedisWrapper') +const HistoryManager = require('../../../../app/src/Features/History/HistoryManager') +const ProjectHistoryHandler = require('../../../../app/src/Features/Project/ProjectHistoryHandler') +const ProjectUpdateHandler = require('../../../../app/src/Features/Project/ProjectUpdateHandler') +const DocumentUpdaterHandler = require('../../../../app/src/Features/DocumentUpdater/DocumentUpdaterHandler') +const ProjectEntityHandler = require('../../../../app/src/Features/Project/ProjectEntityHandler') +const ProjectEntityUpdateHandler = require('../../../../app/src/Features/Project/ProjectEntityUpdateHandler') +const SafePath = require('../../../../app/src/Features/Project/SafePath') +const { DeletedFile } = require('../../../../app/src/models/DeletedFile') +const { Doc } = require('../../../../app/src/models/Doc') +const { + iterablePaths, +} = require('../../../../app/src/Features/Project/IterablePath') + +const rclient = RedisWrapper.client('project_history_migration') + +module.exports = { deleteProjectHistory, migrateProjectHistory } + +/** + * @typedef {Object} UpdateMeta + * @property {string | null} user_id the id of the user that performed the update + * @property {number} ts the timestamp of the update + */ + +/** + * @typedef {UpdateMeta} EditDocUpdateMeta + * @property {string | null} user_id + * @property {number} ts + * @property {string} pathname the doc pathname + * @property {number} doc_length the length of the doc + */ + +/** + * @typedef {Object} Update + * @property {string} pathname the path in the file tree + * @property {UpdateMeta} meta + // * @property {string} version a two-part version. The first part is the project version after the updates, as recorded in Mongo. The second part is a counter that increments for each update in this batch. + * @property {string} projectHistoryId the v1 history id for this project + * @property {number} v + */ + +/** + * @typedef {Update} FileUpdate + * @property {string} pathname + * @property {UpdateMeta} meta + * @property {string} projectHistoryId + * @property {number} v + * @property {string} file + */ + +/** + * @typedef {FileUpdate} AddFileUpdate + * @property {string} pathname + * @property {UpdateMeta} meta + * @property {string} projectHistoryId + * @property {number} v + * @property {string} file + * @property {string} url + */ + +/** + * @typedef {Update} DocUpdate + * @property {UpdateMeta} meta + * @property {string} projectHistoryId + * @property {number} v + * @property {string} doc + */ + +/** + * @typedef {DocUpdate} AddDocUpdate + * @property {string} pathname + * @property {UpdateMeta} meta + * @property {string} projectHistoryId + * @property {number} v + * @property {string} doc + * @property {string} docLines + * @property {string} docLinesId + * @property {boolean} contentStored + */ + +/** + * @typedef {DocUpdate} EditDocUpdate + * @property {EditDocUpdateMeta} meta + * @property {string} projectHistoryId + * @property {number} v + * @property {number} lastV + * @property {string} doc + * @property {Array} op + */ + +/** + * @typedef {AddDocUpdate | AddFileUpdate} AddUpdate + */ + +/** + * @typedef {DocUpdate | FileUpdate} DeleteUpdate + * @property {string} pathname + * @property {UpdateMeta} meta + * @property {string} projectHistoryId + * @property {number} v + * @property {string} doc + * @property {string} new_pathname + */ + +/** + * @typedef {Update} EditDocUpdateStub + * @property {true} stub + * @property {string} path + * @property {string} pathname + * @property {number} v + * @property {number} doc_length + */ + +/** + * @typedef {AddUpdate | DeleteUpdate | EditDocUpdate | EditDocUpdateStub } AnyUpdate + */ + +/** + * @typedef {Object} Project + * @property {string} _id the id of the user that performed the update + * @property {Object} overleaf + */ + +/** + * @typedef ManifestUpdate + * @property {string} path + * @property {number} doc_length + * @property {number} ts + * @property {number} version + */ + +/** + * @typedef ManifestContent + * @property {number} start + */ + +/** + * @typedef ManifestDoc + * @property {string} id + * @property {ManifestContent} content + * @property {Array} updates + */ + +/** + * @typedef {Object} Manifest + * @property {string} projectId + * @property {Array} docs + */ + +/** + * @typedef Entity + * @property {string} type + * @property {string} path + * @property {string} docLines + * @property {string} deletedAt + * @property {boolean} deleted + */ + +/** + * Iterate recursively through the folders in project.rootFolder, + * building a map of all the docs (with content as a docLines string) + * and files (with content as a filestore URL). + * + * @param {Object} project + * @returns {Promise>} + */ +async function processRootFolder(project) { + const entities = new Map() + + async function processFolder(folder, root = '') { + for (const item of iterablePaths(folder, 'docs')) { + const doc = await Doc.findOne( + item._id, + // only read the fields we need to save memory + { _id: 1, inS3: 1, lines: 1, name: 1 } + ).lean() + + // skip malformed doc entries + if (!doc?._id) { + logger.warn({ doc }, 'skipping doc with missing id') + continue + } + const id = doc._id.toString() + const docIsInS3 = !!doc.inS3 + let docLines + + if (docIsInS3) { + const docPeek = await ProjectEntityHandler.promises.getDoc( + project._id, + item._id, + { peek: true } + ) + docLines = docPeek.lines + } else { + docLines = doc.lines + } + + if (!docLines) { + throw new Error(`no doc lines for doc ${id} (inS3: ${docIsInS3})`) + } + + entities.set(id, { + path: `${root}/${item.name}`, // NOTE: not doc.name, which is "new doc", + type: 'doc', + docLines: docLines.join('\n'), + }) + } + + for (const item of iterablePaths(folder, 'fileRefs')) { + const path = `${root}/${item.name}` + + // skip malformed file entries + if (!item?._id) { + logger.warn({ item }, 'skipping fileRef with missing id') + continue + } + const id = item._id.toString() + + entities.set(id, { + path, + type: 'file', + url: FilestoreHandler._buildUrl(project._id.toString(), id), + }) + } + + for (const subfolder of iterablePaths(folder, 'folders')) { + const path = `${root}/${subfolder.name}` + await processFolder(subfolder, path) + } + } + + for (const folder of project.rootFolder) { + await processFolder(folder) + } + + return entities +} + +/** + * Read docs deleted from a project, from the Doc collection, + * and add them to the entities map with the content in a docLines string. + * + * These entities have a `deleted` property set to `true` and a `deletedAt` date. + * + * @param {Map} entities + * @param {string} projectId + * @returns {Promise} + */ +async function readDeletedDocs(entities, projectId) { + // NOTE: could call DocstoreManager.promises.getAllDeletedDocs(projectId) instead + + // Look for all docs, since some deleted docs are found in track-changes manifest, + // but do not have deleted flag set for reasons that are unclear + // (we will not add docs to entities if they were previously added by processRootFolder) + const deletedDocsCursor = Doc.find( + { + project_id: ObjectId(projectId), + }, + // only read the fields we need to save memory + { _id: 1, inS3: 1, lines: 1, name: 1, deletedAt: 1 } + ) + .lean() + .cursor() + for await (const doc of deletedDocsCursor) { + // skip malformed deleted doc entries + if (!doc?._id) { + logger.warn({ doc }, 'skipping deleted doc with missing id') + continue + } + const id = doc._id.toString() + // Skip doc if we already have an entry in entities + if (!entities.has(id)) { + const docIsInS3 = !!doc.inS3 + let docLines + + if (docIsInS3) { + const docPeek = await ProjectEntityHandler.promises.getDoc( + ObjectId(projectId), + doc._id, + { peek: true } + ) + docLines = docPeek.lines + } else { + docLines = doc.lines + } + + if (!docLines) { + throw new Error(`no doc lines for doc ${id} (inS3: ${docIsInS3})`) + } + + // const ts = Number( + // doc.deletedAt ? new Date(doc.deletedAt) : Date.now() + // ) + + if (doc.name && !SafePath.isCleanFilename(doc.name)) { + const newName = SafePath.clean(doc.name) + logger.warn( + { projectId, docId: id, origName: doc.name, newName }, + 'renaming invalid deleted file' + ) + doc.name = newName + } + + entities.set(id, { + // NOTE: adding the doc id to the file path to avoid collisions + path: `/_deleted/${id}/${doc.name}`, + name: doc.name || 'unnamed', // fallback for improperly deleted docs + deleted: true, + type: 'doc', + deletedAt: doc.deletedAt, + docLines: docLines.join('\n'), + }) + } + } +} + +/** + * Read files deleted from a project, from the DeletedFile collection, + * and add them to the entities map. + * + * These entities have a `deleted` property set to `true` and a `deletedAt` date. + * The url is built later, from the project id and file id. + * + * @param {Map} entities + * @param {string} projectId + * @returns {Promise} + */ +async function readDeletedFiles(entities, projectId) { + const deletedFilesCursor = DeletedFile.find( + { + projectId: ObjectId(projectId), + }, + // only read the fields we need to save memory + { _id: 1, name: 1, deletedAt: 1 } + ) + .lean() + .cursor() + + for await (const file of deletedFilesCursor) { + // skip malformed deleted file entries + if (!file?._id) { + logger.warn({ file }, 'skipping deleted file with missing id') + continue + } + const id = file._id.toString() + // TODO: check if it already exists? + if (!entities.has(id)) { + // const ts = Number( + // file.deletedAt ? new Date(file.deletedAt) : Date.now() + // ) + + // TODO: would the hash be useful here? + + if (file.name && !SafePath.isCleanFilename(file.name)) { + const newName = SafePath.clean(file.name) + logger.warn( + { projectId, fileId: id, origName: file.name, newName }, + 'renaming invalid deleted file' + ) + file.name = newName + } + + entities.set(id, { + // NOTE: adding the doc id to the file path to avoid collisions + path: `/_deleted/${id}/${file.name}`, + name: file.name, + deleted: true, + type: 'file', + deletedAt: file.deletedAt, + }) + } + } +} + +/** + * Iterate through the sorted array of updates, pushing each one to Redis. + * + * In batches, tell project-history to pull the updates from Redis and process them, + * so the process fails early if something can't be processed. + * + * @param {Array} updates + * @param {string} projectId + * @param {string} projectHistoryId + * @param {Map.} fileMap + * @returns {Promise} + */ +async function sendUpdatesToProjectHistory( + updates, + projectId, + projectHistoryId, + fileMap +) { + let multi = rclient.multi() + let counter = 0 + let processed = 0 + let size = 0 + + const projectHistoryKey = + settings.redis.project_history_migration.key_schema.projectHistoryOps({ + projectId, + }) + + // clear out anything in the Redis queue for this project's history + multi.del(projectHistoryKey) + + for (let update of updates) { + // read the content for each update stub from the archive + if (update.stub) { + update = await buildEditDocUpdate(projectHistoryId, update, fileMap) + } + + // non-edit doc updates need string timestamps, not numbers + if (!('op' in update)) { + update.meta.ts = new Date(update.meta.ts).toISOString() + } + + const updateJSON = JSON.stringify(update) + multi.rpush(projectHistoryKey, updateJSON) + counter++ + processed++ + size += updateJSON.length + + // flush the history after every 1000 updates and start a new transaction + if (counter === 1000) { + logger.debug( + { processed, total: updates.length }, + 'sending updates to project history' + ) + // execute the transaction + await util.promisify(multi.exec)() + // tell project-history to pull the updates from the Redis queue + await HistoryManager.promises.flushProject(projectId) // TODO: roll back if this fails? + counter = 0 + size = 0 + multi = rclient.multi() + } else if (size > 1024 * 1024) { + // queue entries in redis more frequently to reduce memory usage + await util.promisify(multi.exec)() + size = 0 + multi = rclient.multi() + } + } + + if (counter > 0) { + // execute the transaction + await util.promisify(multi.exec)() + // tell project-history to pull the updates from the Redis queue + await HistoryManager.promises.flushProject(projectId) // TODO: roll back if this fails? + } + + // return the queue length so we can check that it is empty + const queueLength = await rclient.llen(projectHistoryKey) + return queueLength +} + +/** + * Compare two arrays of updates, with the earliest timestamp at the end first. + * + * @param {Array} a + * @param {Array} b + * @returns {number} + */ +function earliestTimestampFirst(a, b) { + // both arrays are empty, leave them + if (!a.length && !b.length) { + return 0 + } + + // a is empty, move b before a + if (!a.length) { + return 1 + } + + // b is empty, don't move b before a + if (!b.length) { + return -1 + } + + const tsB = b[b.length - 1].meta.ts + const tsA = a[a.length - 1].meta.ts + // if the last item in b has a lower timestamp that the last item in a, move b above a + if (tsB < tsA) { + return 1 + } + if (tsB > tsA) { + return -1 + } + // use pathnames as secondary sort key, to make order deterministic for + // updates with the same timestamp + const pathnameB = b[b.length - 1].pathname + const pathnameA = a[a.length - 1].pathname + if (pathnameB < pathnameA) { + return 1 + } + if (pathnameB > pathnameA) { + return -1 + } + return 0 // shouldn't happen, because pathnames must be distinct +} + +/** + * Compare two updates, with the highest version number first + * + * @param {AnyUpdate} a + * @param {AnyUpdate} b + * @returns {number} + */ +function decreasingDocVersion(a, b) { + if (b.v === a.v) { + throw new Error(`Matching version: ${b.v} ${a.v}`) + // return 0 + } + // if b.v is greater than a.v, sort b above a + return b.v > a.v ? 1 : -1 +} + +/** + * Create an array of queued updates for each doc/file, sorted by version + * + * @param {Array} updates + * @returns {Promise>} + */ +async function sortUpdatesByQueue(updates) { + // build a queue of updates for each doc/file + const queues = {} + + for (const update of updates) { + const docId = update.doc || update.file + + if (!(docId in queues)) { + queues[docId] = [] + } + + queues[docId].push(update) + } + + // convert the map to an array of queues + const values = Object.values(queues) + + for (const queue of values) { + // sort each queue in place, with each update in decreasing version ofder + queue.sort(decreasingDocVersion) + } + + return values +} + +/** + * Fetch all the content and updates for this project from track-changes, as a zip archive. + * + * @param {string} projectId + * @param {string} tempFilePath + * @returns + */ +async function fetchTrackChangesArchive(projectId, tempFilePath) { + const writeStream = fs.createWriteStream(tempFilePath) + + const url = `${settings.apis.trackchanges.url}/project/${projectId}/zip` + + // exposed for debugging during full-project-history migration + const timeout = + parseInt(process.env.FETCH_TRACK_CHANGES_TIMEOUT, 10) || 2 * 60 * 1000 + + try { + await util.promisify(pipeline)(request(url, { timeout }), writeStream) + } catch (err) { + logger.error({ err }, 'Error fetching track changes archive') + throw err + } + + const { size } = await fs.promises.stat(tempFilePath) + logger.info({ projectId, size }, 'fetched zip file from track-changes') +} + +/** + * Open the zip archive and build a Map of each entry in the archive, with the path as the key + * + * @param {string} filePath + * @returns {Promise>} + */ + +async function openTrackChangesArchive(filePath) { + const directory = await unzipper.Open.file(filePath) + return new Map(directory.files.map(file => [file.path, file])) +} + +/** + * Read the manifest data from the zip archive + * + * @param {Map} fileMap + * @returns {Promise} + */ +async function readTrackChangesManifest(fileMap) { + const manifestBuffer = await fileMap.get('manifest.json').buffer() + + return JSON.parse(manifestBuffer.toString()) +} + +/** + * Check that entities conform to the pathnames allowed by project history + * + * @param {Map} entities + * @param {string} projectId + */ +function validatePaths(entities, projectId) { + const pathErrors = [] + for (const [id, entity] of entities) { + if (!SafePath.isCleanPath(entity.path)) { + pathErrors.push( + `${entity.type}:${id}${entity.deleted ? ' (deleted)' : ''} path:${ + entity.path + }` + ) + } + } + if (pathErrors.length) { + throw new OError('Invalid path in history migration', { + projectId, + pathErrors, + }) + } +} + +/** + * Build an "add" update for an entity, with docLines or url set for the content. + * This represents a doc or file being added to a project. + * + * @param {Object} entity + * @param {string} entityId + * @param {string} projectId + * @param {string} projectHistoryId + * + * @returns {AddDocUpdate | AddFileUpdate} + */ +function buildAddUpdate(entity, entityId, projectId, projectHistoryId) { + const ts = new ObjectId(entityId).getTimestamp() + + const update = { + pathname: entity.path, + v: 0, // NOTE: only for sorting + meta: { + // source? + user_id: null, // TODO: assign the update to a system user? + ts: Number(ts), + origin: { kind: 'history-migration' }, + }, + projectHistoryId, + } + + switch (entity.type) { + case 'doc': { + return { + doc: entityId, + ...update, + docLines: entity.docLines, + } + } + + case 'file': { + // TODO: set a hash here? + return { + // type: 'external', + file: entityId, + ...update, + url: FilestoreHandler._buildUrl(projectId, entityId), + } + } + + default: + throw new Error('Unknown entity type') + } +} + +/** + * Build a "delete" update for an entity, with new_pathname set to an empty string. + * This represents a doc or file being deleted from a project. + * + * @param {Object} entity + * @param {string} entityId + * @param {string} projectId + * @param {string} projectHistoryId + * @returns DeleteUpdate + */ +function buildDeleteUpdate(entity, entityId, projectId, projectHistoryId) { + const ts = entity.deletedAt || new Date() + + const update = { + pathname: entity.path, + new_pathname: '', // empty path = deletion + v: Infinity, // NOTE: only for sorting + meta: { + user_id: null, // TODO: assign this to a system user? + ts: Number(ts), + origin: { kind: 'history-migration' }, + }, + projectHistoryId, + } + + switch (entity.type) { + case 'doc': + return { + doc: entityId, + ...update, + } + + case 'file': + return { + file: entityId, + ...update, + } + + default: + throw new Error(`Unknown entity type ${entity.type}`) + } +} + +/** + * @typedef TrackedDocUpdateMeta + * @property {string} user_id + * @property {number} start_ts + */ + +/** + * @typedef TrackedDocUpdate + * @property {string} doc_id + * @property {Array} op + * @property {number} v + * @property {TrackedDocUpdateMeta} meta + */ + +/** + * Build an "edit" update, with op set to an array of operations from track-changes. + * + * This represents the contents of a doc being edited in a project. + * + * @param {string} projectHistoryId + * @param {EditDocUpdateStub} updateStub + * @param {Map.} fileMap + * + * @returns {Promise} + */ +async function buildEditDocUpdate(projectHistoryId, updateStub, fileMap) { + const buffer = await fileMap.get(updateStub.path).buffer() + + /** + * @type TrackedDocUpdate + */ + const data = JSON.parse(buffer.toString()) + let userId = data.meta.user_id + if (userId === 'anonymous-user' || userId === 'null') { + userId = null + } + if (userId != null && !/^[0-9a-f]{24}$/.test(userId)) { + throw new OError('Bad user id in ShareLaTeX history edit update', { + userId, + }) + } + + return { + doc: data.doc_id, + op: data.op, // NOTE: this is an array of operations + v: data.v, + lastV: data.v - 1, + meta: { + user_id: userId, + ts: data.meta.start_ts, // TODO: use data.meta.end_ts or update.ts? + pathname: updateStub.pathname, + doc_length: updateStub.doc_length, + origin: { kind: 'history-migration' }, + }, + projectHistoryId, + } +} + +/** + * Build a stub for an "edit" update, with all the metadata but not the actual operations. + * + * This represents a doc being edited in a project, with enough information for sorting, + * but avoids loading the actual operations from the zip archive until they're needed, + * so as not to run out of memory if the project's history is large. + * + * @param {ManifestUpdate} update + * @param {Entity} entity + * @param {string} docId + * @returns {EditDocUpdateStub} + */ +function buildEditUpdateStub(update, entity, docId) { + return { + stub: true, + doc: docId, + v: update.version, + path: update.path, + pathname: entity.path, + doc_length: update.doc_length, + meta: { + ts: update.ts, + origin: { kind: 'history-migration' }, + }, + } +} + +/** + * Build the sorted array of updates to be sent to project-history. + * + * 1. Process all the added and edited files from the track-changes archive. + * 2. Process the other files from the project that have been added, and maybe deleted, without any edits. + * + * @param {string} projectId + * @param {string} projectHistoryId + * @param {Manifest} manifest + * @param {Map.} entities + * @param {Map.} fileMap + * @returns {Promise>} + */ +async function buildUpdates( + projectId, + projectHistoryId, + manifest, + entities, + fileMap +) { + /** + * @type Array + */ + const updates = [] + + // keep a list of doc ids which have updates in track-changes + const updatedDocs = new Set() + + // process the existing docs with updates, from track-changes + for (const doc of manifest.docs) { + const entity = entities.get(doc.id) + + if (!entity) { + throw new Error(`Entity not found for ${doc.id}`) + } + + if (!entity.path) { + throw new Error(`Path not found for ${doc.id}`) + } + + // add the initial content + const contentStart = doc.content.start + + const buffer = await fileMap.get(contentStart.path).buffer() + + /** + * @type AddDocUpdate + */ + const update = { + doc: doc.id, + pathname: entity.path, + v: contentStart.version - 1, + meta: { + user_id: null, // TODO: assign this to a system user? + ts: Number(ObjectId(doc.id).getTimestamp()), + origin: { kind: 'history-migration' }, + }, + projectHistoryId, + docLines: buffer.toString(), + } + + updates.push(update) + + // push the update onto the array of updates + for (const update of doc.updates) { + updates.push(buildEditUpdateStub(update, entity, doc.id)) + } + + updatedDocs.add(doc.id) + } + + // process the docs which have been added/deleted without any updates being recorded + for (const [id, entity] of entities.entries()) { + if (entity.deleted) { + // deleted entity + + // add the doc/file + if (!updatedDocs.has(id)) { + updates.push(buildAddUpdate(entity, id, projectId, projectHistoryId)) + } + + // delete the doc/file again (there may be updates added between adding and deleting) + updates.push(buildDeleteUpdate(entity, id, projectId, projectHistoryId)) + } else { + if (!updatedDocs.has(id)) { + // add "not deleted" doc that isn't in the manifest either + updates.push(buildAddUpdate(entity, id, projectId, projectHistoryId)) + } + } + } + + return updates +} + +/** + * Remove the `overleaf.history` object from the project and tell project-history to delete everything for this project. + * (note: project-history may not delete the actual history data yet, but it will at least delete the cached history id) + * + * @param {string} projectId + * @returns {Promise} + */ +async function deleteProjectHistory(projectId) { + await HistoryManager.promises.deleteProjectHistory(projectId) + // TODO: send a message to document-updater? + await ProjectHistoryHandler.unsetHistory(projectId) +} + +/** + * Send the updates from the track changes zip file to project history + * + * @param {string} projectId + * @param {string} projectHistoryId + * @param {Array} updates + * @param {Map.} fileMap + */ +async function migrateTrackChangesUpdates( + projectId, + projectHistoryId, + updates, + fileMap +) { + // Build a queue for each doc, sorted by version (and by timestamp within each version) + const queues = await sortUpdatesByQueue(updates) + + const sortedUpdates = [] + + let item + do { + // Find the earliest item from the tail of all queues + queues.sort(earliestTimestampFirst) + item = queues[0].pop() + if (item) { + sortedUpdates.push(item) + } + } while (item) + + // NOTE: leaving the version string code commented out, in case it ends up being needed + // let majorVersion = 0 + // let minorVersion = 0 + for (const update of sortedUpdates) { + // increment majorVersion if this is a file change + if (!('op' in update)) { + // remove v (only used for sorting) + delete update.v + + // set version + // majorVersion++ + // // minorVersion = 0 + // update.version = `${majorVersion}.${minorVersion}` // NOTE: not set as project-history doesn't need it and could cause problems if it gets higher than project.version + } + // increment minorVersion after every update + // minorVersion++ + } + + // add each update to the Redis queue for project-history to process + logger.debug( + { projectId, projectHistoryId }, + 'Sending updates for project to Redis' + ) + + const remainingQueueLength = await sendUpdatesToProjectHistory( + sortedUpdates, + projectId, + projectHistoryId, + fileMap + ) + // Failure will cause queued updates to be deleted (in the catch below) + + logger.debug( + { + projectId, + projectHistoryId, + remainingQueueLength, + }, + 'Updates sent to project-history' + ) + + if (remainingQueueLength > 0) { + throw new Error('flush to project-history did not complete') + } + + // TODO: roll back if any of the following fail? + + // TODO: check that the Redis queue is empty? + + // Clear any old entries in the main project history queue (these will not + // have a history id) + await HistoryManager.promises.flushProject(projectId) +} + +/** + * Add the zip file from track changes to the project file tree. + * We may be able to recover a failed history from the zip file in future. + * + * @param {string} projectId + * @param {string} rootFolderId + * @param {string} tempFilePath + */ + +async function uploadTrackChangesArchiveToProject( + projectId, + rootFolderId, + tempFilePath +) { + const { size } = await fs.promises.stat(tempFilePath) + if (size > settings.maxUploadSize) { + throw new FileTooLargeError({ + message: 'track-changes archive exceeds maximum size for archiving', + info: { size }, + }) + } + const { fileRef } = await ProjectEntityUpdateHandler.promises.addFile( + projectId, + rootFolderId, // project.rootFolder[0]._id, + `OverleafHistory-${new Date().toISOString().substring(0, 10)}.zip`, + tempFilePath, + null, + null, // no owner + null // no source + ) + logger.debug( + { projectId, fileRef }, + 'Uploaded track-changes zip archive to project due to error in migration' + ) +} + +/** + * Check all updates for invalid characters (nonBMP or null) and substitute + * the unicode replacement character if options.fixInvalidCharacters is true, + * otherwise throw an exception. + * @param {Array} updates + * @param {string} projectId + * @param {Object} options + */ +function validateUpdates(updates, projectId, options) { + const replace = options.fixInvalidCharacters + // check for invalid characters + function containsBadChars(str) { + return /[\uD800-\uDBFF]/.test(str) || str.indexOf('\x00') !== -1 + } + // Replace invalid characters so that they will be accepted by history_v1. + function sanitise(str) { + if (replace) { + return str.replace(/[\uD800-\uDFFF]/g, '\uFFFD').replace('\x00', '\uFFFD') + } else { + throw new Error('invalid character in content') + } + } + // Check size of doclines in update against max size allowed by history_v1. + // This catches docs which are too large when created, but not when they + // go over the limit due to edits. + function checkSize(update) { + if (update?.docLines?.length > settings.max_doc_length) { + throw new FileTooLargeError({ + message: 'docLines exceeds maximum size for history', + info: { docId: update.doc, size: update.docLines.length }, + }) + } + } + let latestTimestamp = 0 + // Iterate over the all the updates and their doclines or ops + for (const update of updates) { + checkSize(update) + // Find the timestamp of the most recent edit (either adding a doc or editing a doc) + // we exclude deletions as these are created in the migration and we didn't record + // the deletion time for older files. + const isDeleteUpdate = update.new_pathname === '' + if ( + update.doc && + !isDeleteUpdate && + update.meta.ts && + update.meta.ts > latestTimestamp + ) { + latestTimestamp = update.meta.ts + } + if (update.docLines && containsBadChars(update.docLines)) { + logger.debug({ update, replace }, 'invalid character in docLines') + update.docLines = sanitise(update.docLines) + } + if (update.op) { + for (const op of update.op) { + if (op.i && containsBadChars(op.i)) { + logger.debug({ update, replace }, 'invalid character in insert op') + op.i = sanitise(op.i) + } + if (op.d && containsBadChars(op.d)) { + logger.debug({ update, replace }, 'invalid character in delete op') + op.d = sanitise(op.d) + } + } + } + } + logger.debug( + { projectId, latestTimestamp, date: new Date(latestTimestamp) }, + 'timestamp of most recent edit' + ) + if (options.cutoffDate && new Date(latestTimestamp) > options.cutoffDate) { + throw new Error('project was edited after cutoff date') + } +} + +/** + * Migrate a project's history from track-changes to project-history + * + * @param {string} projectId + * + * @returns {Promise} + */ +async function migrateProjectHistory(projectId, options = {}) { + await fse.ensureDir(settings.path.projectHistories) + const projectHistoriesDir = await fs.promises.realpath( + settings.path.projectHistories + ) + const tempDir = await fs.promises.mkdtemp(projectHistoriesDir + path.sep) + const tempFilePath = path.join(tempDir, 'project.zip') + + try { + // fetch the zip archive of rewound content and updates from track-changes + // store the zip archive to disk, open it and build a Map of the entries + if (options.importZipFilePath) { + // use an existing track-changes archive on disk + logger.debug( + { src: options.importZipFilePath, dst: tempFilePath }, + 'importing zip file' + ) + await fs.promises.copyFile(options.importZipFilePath, tempFilePath) + const { size } = await fs.promises.stat(tempFilePath) + logger.info({ projectId, size }, 'imported zip file from disk') + } else { + await fetchTrackChangesArchive(projectId, tempFilePath) + } + const fileMap = await openTrackChangesArchive(tempFilePath) + + // read the manifest from the zip archive + const manifest = await readTrackChangesManifest(fileMap) + + // check that the project id in the manifest matches + // to be sure we are using the correct zip file + if (manifest.projectId !== projectId) { + throw new Error(`Incorrect projectId: ${manifest.projectId}`) + } + + // load the Project from MongoDB + const project = await ProjectGetter.promises.getProject(projectId) + + // create a history id for this project + const oldProjectHistoryId = _.get(project, 'overleaf.history.id') + + // throw an error if there is already a history associated with the project + if (oldProjectHistoryId) { + throw new Error( + `Project ${projectId} already has history ${oldProjectHistoryId}` + ) + } + + try { + // initialize a new project history and use the history id + // NOTE: not setting the history id on the project yet + const projectHistoryId = await HistoryManager.promises.initializeProject( + projectId + ) + + try { + // build a Map of the entities (docs and fileRefs) currently in the project, + // with _id as the key + const entities = await processRootFolder(project) + + // find all the deleted docs for this project and add them to the entity map + await readDeletedDocs(entities, projectId) + + // find all the deleted files for this project and add them to the entity map + await readDeletedFiles(entities, projectId) + + // check that the paths will not be rejected + validatePaths(entities, projectId) + + // build the array of updates that make up the new history for this project + const updates = await buildUpdates( + projectId, + projectHistoryId, + manifest, + entities, + fileMap + ) + + // check that the updates don't contain any characters that will be rejected by history_v1. + validateUpdates(updates, projectId, options) + + if (updates.length) { + await migrateTrackChangesUpdates( + projectId, + projectHistoryId, + updates, + fileMap + ) + } + } catch (error) { + if (options?.archiveOnFailure) { + // on error, optionally store the zip file in the project for future reference + logger.debug( + { projectId, error }, + 'Error sending track-changes updates to project history, attempting to archive zip file in project' + ) + try { + await uploadTrackChangesArchiveToProject( + projectId, + project.rootFolder[0]._id, + tempFilePath + ) + } catch (error) { + if (error instanceof InvalidNameError) { + logger.info({ projectId }, 'zip file already archived in project') + } else { + throw error + } + } finally { + // roll back the last updated timestamp and user + logger.debug( + { projectId }, + 'rolling back last updated time after uploading zip file' + ) + await ProjectUpdateHandler.promises.resetUpdated( + projectId, + project.lastUpdated, + project.lastUpdatedBy + ) + } + // set the overleaf.history.zipFileArchivedInProject flag for future reference + await ProjectHistoryHandler.promises.setMigrationArchiveFlag( + projectId + ) + // we consider archiving the zip file as "success" (at least we've given up on attempting + // to migrate the history) so we don't rethrow the error and continue to initialise the new + // empty history below. + } else { + // if we're not archiving the zip file then we rethrown the error to fail the migration + throw error + } + } + + // set the project's history id once the updates have been successfully processed + // (or we have given up and archived the zip file in the project). + logger.debug( + { projectId, projectHistoryId }, + 'Setting history id on project' + ) + await ProjectHistoryHandler.promises.setHistoryId( + projectId, + projectHistoryId + ) + + try { + // tell document updater to reload docs with the new history id + logger.debug({ projectId }, 'Asking document-updater to clear project') + await DocumentUpdaterHandler.promises.flushProjectToMongoAndDelete( + projectId + ) + + // run a project history resync in case any changes have arrived since the migration + logger.debug( + { projectId }, + 'Asking project-history to force resync project' + ) + + await HistoryManager.promises.resyncProject(projectId, { + force: true, + origin: { kind: 'history-migration' }, + }) + } catch (error) { + if (options.forceNewHistoryOnFailure) { + logger.warn( + { projectId }, + 'failed to resync project, forcing new history' + ) + } else { + throw error + } + } + // set the display to v2 history + logger.debug( + { projectId }, + 'Switching on full project history display for project' + ) + await ProjectHistoryHandler.promises.upgradeHistory(projectId, true) + } catch (error) { + // delete the history id again if something failed? + logger.warn( + OError.tag( + error, + 'Something went wrong flushing and resyncing project; clearing full project history for project', + { projectId } + ) + ) + await deleteProjectHistory(projectId) + + throw error + } + } finally { + // clean up the temporary directory + await fse.remove(tempDir) + } +} diff --git a/services/web/modules/history-migration/test/unit/src/ProjectHistoryControllerTests.js b/services/web/modules/history-migration/test/unit/src/ProjectHistoryControllerTests.js new file mode 100644 index 0000000000..52d1bf9f42 --- /dev/null +++ b/services/web/modules/history-migration/test/unit/src/ProjectHistoryControllerTests.js @@ -0,0 +1,346 @@ +const sinon = require('sinon') +const nock = require('nock') +const { expect } = require('chai') +const fs = require('fs') +const path = require('path') +const SandboxedModule = require('sandboxed-module') +const { ObjectId } = require('mongodb') +const unzipper = require('unzipper') + +const modulePath = '../../../app/src/ProjectHistoryController' + +describe('ProjectHistoryController', function () { + const projectId = ObjectId('611bd20c5d76a3c1bd0c7c13') + const deletedFileId = ObjectId('60f6e92c6c14d84fb7a71ae1') + const historyId = 123 + + let clock + const now = new Date(Date.UTC(2021, 1, 1, 0, 0)).getTime() + + before(async function () { + clock = sinon.useFakeTimers({ + now, + shouldAdvanceTime: true, + }) + }) + + after(function () { + // clock.runAll() + clock.restore() + }) + + beforeEach(function () { + this.db = { + users: { + countDocuments: sinon.stub().yields(), + }, + } + + this.project = { + _id: ObjectId('611bd20c5d76a3c1bd0c7c13'), + name: 'My Test Project', + rootDoc_id: ObjectId('611bd20c5d76a3c1bd0c7c15'), + rootFolder: [ + { + _id: ObjectId('611bd20c5d76a3c1bd0c7c12'), + name: 'rootFolder', + folders: [ + { + _id: ObjectId('611bd242e64281c13303d6b5'), + name: 'a folder', + folders: [ + { + _id: ObjectId('611bd247e64281c13303d6b7'), + name: 'a subfolder', + folders: [], + fileRefs: [], + docs: [ + { + _id: ObjectId('611bd24ee64281c13303d6b9'), + name: 'a renamed file in a subfolder.tex', + }, + ], + }, + ], + fileRefs: [], + docs: [], + }, + { + _id: ObjectId('611bd34ee64281c13303d6be'), + name: 'images', + folders: [], + fileRefs: [ + { + _id: ObjectId('611bd2bce64281c13303d6bb'), + name: 'overleaf-white.svg', + linkedFileData: { + provider: 'url', + url: 'https://cdn.overleaf.com/img/ol-brand/overleaf-white.svg', + }, + created: '2021-08-17T15:16:12.753Z', + }, + ], + docs: [], + }, + ], + fileRefs: [ + { + _id: ObjectId('611bd20c5d76a3c1bd0c7c19'), + name: 'universe.jpg', + linkedFileData: null, + created: '2021-08-17T15:13:16.400Z', + }, + ], + docs: [ + { + _id: ObjectId('611bd20c5d76a3c1bd0c7c15'), + name: 'main.tex', + }, + { + _id: ObjectId('611bd20c5d76a3c1bd0c7c17'), + name: 'references.bib', + }, + ], + }, + ], + compiler: 'pdflatex', + description: '', + deletedDocs: [], + members: [], + invites: [], + owner: { + _id: ObjectId('611572e24bff88527f61dccd'), + first_name: 'Test', + last_name: 'User', + email: 'test@example.com', + privileges: 'owner', + signUpDate: '2021-08-12T19:13:38.462Z', + }, + features: {}, + } + + this.multi = { + del: sinon.stub(), + rpush: sinon.stub(), + exec: sinon.stub().yields(null, 1), + } + + const { docs, folders } = this.project.rootFolder[0] + + const allDocs = [...docs] + + const processFolders = folders => { + for (const folder of folders) { + for (const doc of folder.docs) { + allDocs.push(doc) + } + + if (folder.folders) { + processFolders(folder.folders) + } + } + } + + processFolders(folders) + + allDocs.forEach(doc => { + doc.lines = [`this is the contents of ${doc.name}`] + }) + + // handle Doc.find().lean().cursor() + this.findDocs = sinon.stub().returns({ + lean: sinon.stub().returns({ + cursor: sinon.stub().returns(allDocs), + }), + }) + + // handle await Doc.findOne().lean() - single result, no cursor required + this.findOneDoc = sinon.stub().callsFake(id => { + const result = allDocs.find(doc => { + return doc._id.toString() === id.toString() + }) + return { lean: sinon.stub().resolves(result) } + }) + + this.deletedFiles = [ + { + _id: deletedFileId, + name: 'testing.tex', + deletedAt: new Date(), + }, + ] + + // handle DeletedFile.find().lean().cursor() + this.findDeletedFiles = sinon.stub().returns({ + lean: sinon + .stub() + .returns({ cursor: sinon.stub().returns(this.deletedFiles) }), + }) + + this.ProjectGetter = { + promises: { + getProject: sinon.stub().resolves(this.project), + }, + } + + this.FileStoreHandler = { + _buildUrl: (projectId, fileId) => + `http://filestore.test/${projectId}/${fileId}`, + } + + this.ProjectHistoryHandler = { + promises: { + setHistoryId: sinon.stub(), + upgradeHistory: sinon.stub(), + }, + } + + this.ProjectEntityUpdateHandler = { + promises: { + resyncProjectHistory: sinon.stub(), + }, + } + + this.DocumentUpdaterHandler = { + promises: { + flushProjectToMongoAndDelete: sinon.stub(), + }, + } + + this.HistoryManager = { + promises: { + resyncProject: sinon.stub(), + flushProject: sinon.stub(), + initializeProject: sinon.stub().resolves(historyId), + }, + } + + this.settings = { + redis: { + project_history_migration: { + key_schema: { + projectHistoryOps({ projectId }) { + return `ProjectHistory:Ops:{${projectId}}` // NOTE: the extra braces are intentional + }, + }, + }, + }, + apis: { + documentupdater: { + url: 'http://document-updater', + }, + trackchanges: { + url: 'http://track-changes', + }, + project_history: { + url: 'http://project-history', + }, + }, + path: { + projectHistories: 'data/projectHistories', + }, + } + + this.ProjectHistoryController = SandboxedModule.require(modulePath, { + requires: { + '../../../../app/src/Features/Project/ProjectGetter': + this.ProjectGetter, + '../../../../app/src/Features/FileStore/FileStoreHandler': + this.FileStoreHandler, + '../../../../app/src/Features/Project/ProjectHistoryHandler': + this.ProjectHistoryHandler, + '../../../../app/src/Features/Project/ProjectUpdateHandler': + this.ProjectUpdateHandler, + '../../../../app/src/Features/Project/ProjectEntityUpdateHandler': + this.ProjectEntityUpdateHandler, + '../../../../app/src/Features/History/HistoryManager': + this.HistoryManager, + '../../../../app/src/Features/DocumentUpdater/DocumentUpdaterHandler': + this.DocumentUpdaterHandler, + '../../../../app/src/models/Doc': { + Doc: { + find: this.findDocs, + findOne: this.findOneDoc, + }, + }, + '../../../../app/src/models/DeletedFile': { + DeletedFile: { + find: this.findDeletedFiles, + }, + }, + '../../../../app/src/infrastructure/mongodb': { + db: this.db, + }, + '../../../../app/src/infrastructure/Mongoose': { + Schema: { + ObjectId: sinon.stub(), + Types: { + Mixed: sinon.stub(), + }, + }, + }, + '../../../../app/src/infrastructure/RedisWrapper': { + client: () => ({ + multi: () => this.multi, + llen: sinon.stub().resolves(0), + }), + }, + unzipper: { + Open: { + file: () => + unzipper.Open.file( + path.join(__dirname, 'data/track-changes-project.zip') + ), + }, + }, + '@overleaf/settings': this.settings, + }, + }) + }) + + afterEach(function () { + nock.cleanAll() + }) + + it('migrates a project history', async function () { + const readStream = fs.createReadStream( + path.join(__dirname, 'data/track-changes-project.zip') + ) + + nock(this.settings.apis.trackchanges.url) + .get(`/project/${projectId}/zip`) + .reply(200, readStream) + + nock(this.settings.apis.project_history.url) + .post(`/project`) + .reply(200, { project: { id: historyId } }) + + await this.ProjectHistoryController.migrateProjectHistory( + projectId.toString(), + 5 + ) + + expect(this.multi.exec).to.have.been.calledOnce + expect(this.ProjectHistoryHandler.promises.setHistoryId).to.have.been + .calledOnce + // expect(this.ProjectEntityUpdateHandler.promises.resyncProjectHistory).to + // .have.been.calledOnce + expect(this.HistoryManager.promises.flushProject).to.have.been.calledTwice + expect(this.multi.rpush).to.have.callCount(12) + + const args = this.multi.rpush.args + + const snapshotPath = path.join( + __dirname, + 'data/migrate-project-history.snapshot.json' + ) + + // const snapshot = JSON.stringify(args, null, 2) + // await fs.promises.writeFile(snapshotPath, snapshot) + + const json = await fs.promises.readFile(snapshotPath, 'utf-8') + const expected = JSON.parse(json) + + expect(args).to.deep.equal(expected) + }) +}) diff --git a/services/web/modules/history-migration/test/unit/src/data/migrate-project-history.snapshot.json b/services/web/modules/history-migration/test/unit/src/data/migrate-project-history.snapshot.json new file mode 100644 index 0000000000..a2b37de5ae --- /dev/null +++ b/services/web/modules/history-migration/test/unit/src/data/migrate-project-history.snapshot.json @@ -0,0 +1,50 @@ +[ + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"file\":\"60f6e92c6c14d84fb7a71ae1\",\"pathname\":\"/_deleted/60f6e92c6c14d84fb7a71ae1/testing.tex\",\"meta\":{\"user_id\":null,\"ts\":\"2021-07-20T15:18:04.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123,\"url\":\"http://filestore.test/611bd20c5d76a3c1bd0c7c13/60f6e92c6c14d84fb7a71ae1\"}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"file\":\"60f6e92c6c14d84fb7a71ae1\",\"pathname\":\"/_deleted/60f6e92c6c14d84fb7a71ae1/testing.tex\",\"new_pathname\":\"\",\"meta\":{\"user_id\":null,\"ts\":\"2021-02-01T00:00:00.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd20c5d76a3c1bd0c7c15\",\"pathname\":\"/main.tex\",\"meta\":{\"user_id\":null,\"ts\":\"2021-08-17T15:13:16.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123,\"docLines\":\"\\\\documentclass{article}\\n\\\\usepackage[utf8]{inputenc}\\n\\n\\\\title{My Test Project}\\n\\\\author{alf.eaton+dev }\\n\\\\date{7 2021}\\n\\n\\\\usepackage{natbib}\\n\\\\usepackage{graphicx}\\n\\n\\\\begin{document}\\n\\n\\\\maketitle\\n\\n\\\\section{Introduction}\\nThere is a theory which states that if ever anyone discovers exactly what the Universe is for and why it is here, it will instantly disappear and be replaced by something even more bizarre and inexplicable.\\nThere is another theory which states that this has already happened.\\n\\n\\\\begin{figure}[h!]\\n\\\\centering\\n\\\\includegraphics[scale=1.7]{universe}\\n\\\\caption{The Universe}\\n\\\\label{fig:universe}\\n\\\\end{figure}\\n\\n\\\\section{Conclusion}\\n``I always thought something was fundamentally wrong with the universe'' \\\\citep{adams1995hitchhiker}\\n\\n\\\\bibliographystyle{plain}\\n\\\\bibliography{references}\\n\\\\end{document}\\n\"}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd20c5d76a3c1bd0c7c17\",\"pathname\":\"/references.bib\",\"meta\":{\"user_id\":null,\"ts\":\"2021-08-17T15:13:16.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123,\"docLines\":\"this is the contents of references.bib\"}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"file\":\"611bd20c5d76a3c1bd0c7c19\",\"pathname\":\"/universe.jpg\",\"meta\":{\"user_id\":null,\"ts\":\"2021-08-17T15:13:16.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123,\"url\":\"http://filestore.test/611bd20c5d76a3c1bd0c7c13/611bd20c5d76a3c1bd0c7c19\"}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd20c5d76a3c1bd0c7c15\",\"op\":[{\"p\":487,\"i\":\"\\n\\nAdding some text here.\"}],\"v\":1,\"lastV\":0,\"meta\":{\"user_id\":\"611572e24bff88527f61dccd\",\"ts\":1629213228148,\"pathname\":\"/main.tex\",\"doc_length\":805,\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd20c5d76a3c1bd0c7c15\",\"op\":[{\"p\":678,\"d\":\" something\"}],\"v\":2,\"lastV\":1,\"meta\":{\"user_id\":\"611572e24bff88527f61dccd\",\"ts\":1629213235181,\"pathname\":\"/main.tex\",\"doc_length\":829,\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd20c5d76a3c1bd0c7c15\",\"op\":[{\"d\":\" \",\"p\":722},{\"i\":\"\\n\",\"p\":722}],\"v\":3,\"lastV\":2,\"meta\":{\"user_id\":\"611572e24bff88527f61dccd\",\"ts\":1629213239472,\"pathname\":\"/main.tex\",\"doc_length\":819,\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd20c5d76a3c1bd0c7c15\",\"op\":[{\"p\":750,\"i\":\"\\n\\nAdding some text after deleting some text.\"}],\"v\":7,\"lastV\":6,\"meta\":{\"user_id\":\"611572e24bff88527f61dccd\",\"ts\":1629213241498,\"pathname\":\"/main.tex\",\"doc_length\":819,\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd24ee64281c13303d6b9\",\"pathname\":\"/a folder/a subfolder/a renamed file in a subfolder.tex\",\"meta\":{\"user_id\":null,\"ts\":\"2021-08-17T15:14:22.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123,\"docLines\":\"\"}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"doc\":\"611bd24ee64281c13303d6b9\",\"op\":[{\"p\":0,\"i\":\"Adding some content to the file in the subfolder.\"}],\"v\":2,\"lastV\":1,\"meta\":{\"user_id\":\"611572e24bff88527f61dccd\",\"ts\":1629213266076,\"pathname\":\"/a folder/a subfolder/a renamed file in a subfolder.tex\",\"doc_length\":0,\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123}" + ], + [ + "ProjectHistory:Ops:{611bd20c5d76a3c1bd0c7c13}", + "{\"file\":\"611bd2bce64281c13303d6bb\",\"pathname\":\"/images/overleaf-white.svg\",\"meta\":{\"user_id\":null,\"ts\":\"2021-08-17T15:16:12.000Z\",\"origin\":{\"kind\":\"history-migration\"}},\"projectHistoryId\":123,\"url\":\"http://filestore.test/611bd20c5d76a3c1bd0c7c13/611bd2bce64281c13303d6bb\"}" + ] +] diff --git a/services/web/modules/history-migration/test/unit/src/data/track-changes-project.zip b/services/web/modules/history-migration/test/unit/src/data/track-changes-project.zip new file mode 100644 index 0000000000000000000000000000000000000000..4767f19310b94a2ad87f61ea9eeef79f3d4872a1 GIT binary patch literal 3527 zcma);c|4SB8^=eCWoV8Z6$d#S*+vY**h@*4Awwjbnv9)h3WI3qAX1i)CA*2na5(m| zglMD**%CFjY&A(zgUIqedf(2d*Xi^;pZoqi&p+S${@wR={jLj*V&@P9fw;Lret|oV zmAv2Fzg_ZGzbJjDl221RN#(kPU=Vpgd^|+cT{&&R#S33izhha2};g*CnXJE zOYO6CIREfGY(P$*Hy4V|lj|ZqOeE=X`Im2>IGja|V&v*&VgJ0ee&Jf-kcOT%gHST- zHCXM|s#`t)(%fxy2sv8tELxbWRQr{euE}D6edJ|}xS0ZXG3G9G`o($k#Si%W&PRc zj-pFiH&1HegH`&P@0LdSHoJSNNU0qo?SJx7PeKUWGOG0#uY(Z(8uN~u0F+I^Lnl>q zQ8uf(M0DUyZV!&JTWa;GA65q%qEN}U+s0c%p2WOpQF}}{#4qB~sB}ycd#=4|hx$wT z)8WWa!_-uzHLXF51JXLht97`FG7mLr+*u?Q84c}}p-rEW810`N!`nZoP!81Ly-&Q{ zmxM`!c3~-hHvT>@S;zal4=j!aKl-qmm9@vZiH$4|i>Zy9qx=i2L9xV=6uN5eC_+;n+jJ|d5A z#LA@kXuVU^lEUnK%^D~A?Nj5b8B=$Hi+5&9__m)2(e=JH&7Lo6 z81CY}cJ^lnCUE^b!_^vm(y&rzAhW9(+hXP}-*)P8qt(T=4CQqj;z;n0@yy(nNtH7g zmc)8Mq9v*Uautv^^rI3HfJA8MhD5B(5P0?0KBl|}_Xc?wI zXFPp+EH6-~m0UJqm>+Ii3cJf69jdTm%tg9de7;{q5nbkryJk4;9hI@>$N5QwrKAo} zGJWHaToj<9_eYhe1jW=to8kOn7;wPjuDS&A_wwAYnl~=R(nT!Km@eh_QnM%#xo!g+NR+yY4)m`O7_1h)(b`TQ zwW)l$UDMN(!=>_7GG22>M_X0oT9d(;zdT;_;~%S%??zQ~hTcVsE~mafhjp%WO%zf<6nHi5$!mc8_;;v@o%D7<@{YC(a);Z&XOwF05;%n9?IG zlU{Txtg!-h(SPT{Pb_g8)@w4`XHEmGcQ0VQvVg#UwO${BgEv7*+1IixmJa6|THX1Y zpdXc^nNxi#dwo4*&3}lF^Y&k;WyV5}2`F{4+U|x!d)|Hz^4Xuk$YTfeL|2E$m z_x$MJ26XwVGxOH(!l>g?4;Kbii{m&HMqDeJ6s8QXHg;V_Hc9O#ZzEwOj3ySLnCK}n z?qn{X^||1abd2k6P6p*dm`wynuFQqPTVhb^q)E98w6qnB880dotkEgUdqtk~#v}Lq z{L3TG==0I;NJFugX1i|1t8rl|>K{XCpw9SijYr7Wx*|FuNoe~@6MBa^dAXGDjpNL@ zWet&pv;wra2QiPIuMB$**B=6p<-Rzrt;*91+NG9d7Rx1Qu44r~1GQlIF+~!xsxpMk z9e?FLd9SZSOEd|+yl$xlnD24Gd|f`9@9MGgdB!^{44B;U&D$*3H)bkQ| zoB!Q(;Cv~p*8rqOYp#A(U(;>S%cjNW%S=hPbMiN+^LlMxq!%#9SXwep-Q(~Vb6oX& zjl~qjllikYr9qz(n|I=aRPfv+#Mj@|4i)Oiw(s$DYpZz{HPHNmY1{6bis;{w=@sqE zUVS{4E9y#n9ZIw+T8I+s%ZxS9*Wp0qzZ0nhgn=?JSe01S*%_&-qM_`lj6lE63zMDoRu-r#ruxx#cCBN9Zigiqz$9*8ZE*iAkap<0*bt{fon3VdK*dC4&(gVqo1NHk;tngGi zPiq}DlOA1~dpp3h|M-qdgllfG*U^fKYs00&$%$ll)%}`g z$^y_UyF=r11TJ=mx8HliiC|EQt&<16BdCrrj3^y*v?cy-I_{0W!HAdWCQI4KIzTFD z+r|VutgPEY^OuI_zgefkzUk~XILxqfYl=Yjig1EJ+W{8N0q^eO>_brW@HvaGPi(@6 z!TBO)b-<4T1iS9to13&Z(42bF1Qo9Jsx1Ue+Pe=2JfV z+~0L2cW-)z(I3UmPZE#Q(nN@)Bf_H-4%}764CU86odLnz=2rd%?J7w1uJcRh7H8Yj_&m`omarhvPN3HVK@pVqGLNw(@eHtV9-8Py$X9f>{QdZD|g$82vl-dmsN;2M4O#Is1oQ8PG zCG0x26-!iTvI&jg5U=|UL%cgiGz~MFPf51PUCPMpR(yNs+)+qTbR);nnk2f1d*f2f zr=S@X`s_#5;-T3~FFJ!;xh8@?)q}j+h$hBK(Tmv+YO}ZJkE^3mY}*d7|8r0J`L2Me zGGycHUv2Vphq{@$RR_ajN(0gGkIXND|J5G1Y8O~M9>C83m5128zpaWki`V-jJeAG7 zttuyrm&Ne|`%~V`+p1Wxco)Fm;eDM%|404S^H|iYTwkc)NzIL${^}&RGBp-+^?S@O xH~rN~ZeN2?D2p0@$GqjYacACCeJr!f<}P>j{$)oz$*>}Ljq*u?>{`$QpW%Q literal 0 HcmV?d00001 diff --git a/services/web/scripts/history/migrate_history.js b/services/web/scripts/history/migrate_history.js index 281001c2e6..161f7d02ac 100644 --- a/services/web/scripts/history/migrate_history.js +++ b/services/web/scripts/history/migrate_history.js @@ -5,6 +5,7 @@ const { countProjects, countDocHistory, upgradeProject, + findProjects, } = require('../../modules/history-migration/app/src/HistoryUpgradeHelper') const { waitForDb } = require('../../app/src/infrastructure/mongodb') const minimist = require('minimist') @@ -50,7 +51,7 @@ async function findProjectsToMigrate() { } // Get a list of projects to migrate - const projectsToMigrate = findProjectsToMigrate( + const projectsToMigrate = await findProjects( { 'overleaf.history.display': { $ne: true } }, { _id: 1, overleaf: 1 } ) @@ -108,6 +109,7 @@ async function main() { process.exit(0) } await migrateProjects(projectsToMigrate) + console.log('Done.') } waitForDb() diff --git a/services/web/scripts/history/upgrade_v1_with_conversion_if_sl_history.js b/services/web/scripts/history/upgrade_v1_with_conversion_if_sl_history.js index 2c7f4d3b7f..d8fba2b704 100644 --- a/services/web/scripts/history/upgrade_v1_with_conversion_if_sl_history.js +++ b/services/web/scripts/history/upgrade_v1_with_conversion_if_sl_history.js @@ -20,7 +20,7 @@ const { ReadPreference, ObjectId } = require('mongodb') const { db, waitForDb } = require('../../app/src/infrastructure/mongodb') const { promiseMapWithLimit } = require('../../app/src/util/promises') const { batchedUpdate } = require('../helpers/batchedUpdate') -const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController') +const ProjectHistoryController = require('../../modules/history-migration/app/src/ProjectHistoryController') console.log({ DRY_RUN,