From 5a6c0668479e245f186ea9924d7f6a6a9e649dba Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Wed, 15 Apr 2026 08:22:13 +0200 Subject: [PATCH] [web] allow admins to clone projects with ranges and entire history (#32739) * [web] add consistent aria-label to editing/reviewing toggle * [docstore] add endpoint for getting all docs with ranges * [history-v1] fix schema of chunkId when deleting old history chunk * [web] skip duplicate project lookup for resolving rootDocPath * [web] ignore new limits for root doc path when making debug copy * [web] allow admins to clone projects with ranges and entire history * [web] fix tests * [history-v1] re-order params for cloning project * [web] fix duplicate import of logger after merge * [project-history] re-order params for cloning project history metadata GitOrigin-RevId: 7fa35b4f90885dd453150a348d491ba0ec8de412 --- libraries/stream-utils/index.js | 75 +++++++++++++ services/docstore/app.js | 4 + services/docstore/app/js/HttpController.js | 19 ++++ .../history-v1/api/controllers/projects.js | 59 ++++++++++ services/history-v1/api/routes/projects.js | 6 + services/history-v1/api/schema.js | 10 ++ .../storage/lib/blob_store/index.js | 35 ++++++ .../storage/lib/blob_store/mongo.js | 59 ++++++++++ .../storage/lib/blob_store/postgres.js | 27 +++++ .../storage/lib/chunk_store/index.js | 67 +++++++++++ .../storage/lib/chunk_store/mongo.js | 38 +++++++ .../storage/lib/chunk_store/postgres.js | 44 +++++++- .../history-v1/storage/lib/history_store.js | 40 +++++++ .../project-history/app/js/ErrorRecorder.js | 20 ++++ .../app/js/HistoryStoreManager.js | 17 ++- .../project-history/app/js/HttpController.js | 104 +++++++++++++++++- .../project-history/app/js/LabelsManager.js | 20 ++++ services/project-history/app/js/Router.js | 2 + .../project-history/app/js/SyncManager.js | 20 ++++ .../src/Features/Docstore/DocstoreManager.mjs | 24 ++++ .../src/Features/History/HistoryManager.mjs | 13 +++ .../Features/Project/ProjectController.mjs | 11 +- .../Features/Project/ProjectDuplicator.mjs | 97 ++++++++++++---- .../components/review-mode-switcher.tsx | 5 +- .../frontend/js/infrastructure/fetch-json.ts | 2 +- 25 files changed, 786 insertions(+), 32 deletions(-) diff --git a/libraries/stream-utils/index.js b/libraries/stream-utils/index.js index 7719d409a4..d8af84efc7 100644 --- a/libraries/stream-utils/index.js +++ b/libraries/stream-utils/index.js @@ -163,6 +163,80 @@ class MeteredStream extends Transform { } } +class IncrementalResponse { + #res + #ac + #timeout + #logger + #label + #info + constructor({ res, timeout, label, info, logger }) { + this.#res = res + this.#logger = logger + this.#label = label + this.#info = info + this.#ac = new AbortController() + this.#timeout = setTimeout(() => { + this.#logger.warn({ ...this.#info, timeout }, `${this.#label}: aborting`) + this.sendUpdate( + `error: ${label}: aborting after ${this.#humanReadableTimeout(timeout)}` + ) + this.#ac.abort() + }, timeout) + } + + signal() { + return this.#ac.signal + } + + end() { + this.#ac.abort() + clearTimeout(this.#timeout) + try { + this.#res.end() + } catch { + try { + this.#res.destroy() + } catch {} + } + } + + sendUpdate(msg) { + try { + this.#res.write(msg + '\n') + } catch (err) { + this.#ac.abort() + this.#logger.warn( + { err, ...this.#info }, + `${this.#label}: failed to send progress update` + ) + } + } + + fail(err) { + const aborted = this.#ac.signal.aborted + this.#ac.abort() + if (!aborted) { + this.#logger.err({ err, ...this.#info }, `${this.#label}: error`) + this.sendUpdate(`error: ${this.#label}`) + } + this.end() + } + + #humanReadableTimeout(timeout) { + let ms = timeout + const minutes = Math.floor(ms / 60_000) + ms -= minutes * 60_000 + const seconds = Math.floor(ms / 1_000) + ms -= seconds * 1_000 + let t = '' + if (minutes) t += `${minutes}min` + if (seconds) t += `${seconds}s` + if (ms) t += `${ms}ms` + return t + } +} + // Export our classes module.exports = { @@ -174,4 +248,5 @@ module.exports = { MeteredStream, SizeExceededError, AbortError, + IncrementalResponse, } diff --git a/services/docstore/app.js b/services/docstore/app.js index 01acc44dda..4722f89cb1 100644 --- a/services/docstore/app.js +++ b/services/docstore/app.js @@ -51,6 +51,10 @@ app.param('doc_id', function (req, res, next, docId) { app.get('/project/:project_id/doc-deleted', HttpController.getAllDeletedDocs) app.get('/project/:project_id/doc', HttpController.getAllDocs) +app.get( + '/project/:project_id/doc-with-ranges', + HttpController.getAllDocsWithRanges +) app.get('/project/:project_id/doc-versions', HttpController.getAllDocVersions) app.get('/project/:project_id/ranges', HttpController.getAllRanges) app.get( diff --git a/services/docstore/app/js/HttpController.js b/services/docstore/app/js/HttpController.js index edfd4c9789..d7de51bf51 100644 --- a/services/docstore/app/js/HttpController.js +++ b/services/docstore/app/js/HttpController.js @@ -58,6 +58,24 @@ async function getAllDocs(req, res) { res.json(docViews) } +async function getAllDocsWithRanges(req, res) { + const { project_id: projectId } = req.params + logger.debug({ projectId }, 'getting all docs with ranges') + const docs = await DocManager.getAllNonDeletedDocs(projectId, { + lines: true, + rev: true, + ranges: true, + }) + const docViews = _buildDocsArrayView(projectId, docs) + for (const docView of docViews) { + if (!docView.lines) { + logger.warn({ projectId, docId: docView._id }, 'missing doc lines') + docView.lines = [] + } + } + res.json(docViews) +} + async function getAllDocVersions(req, res) { const { project_id: projectId } = req.params const docs = await DocManager.getAllDocVersions(projectId) @@ -248,6 +266,7 @@ export default { isDocDeleted: expressify(isDocDeleted), getRawDoc: expressify(getRawDoc), getAllDocs: expressify(getAllDocs), + getAllDocsWithRanges: expressify(getAllDocsWithRanges), getAllDeletedDocs: expressify(getAllDeletedDocs), getAllRanges: expressify(getAllRanges), getAllDocVersions: expressify(getAllDocVersions), diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js index ce75cb884b..9f2005d2a8 100644 --- a/services/history-v1/api/controllers/projects.js +++ b/services/history-v1/api/controllers/projects.js @@ -22,6 +22,7 @@ const { HashCheckBlobStore, ProjectArchive, zipStore, + persistBuffer, } = require('../../storage') const render = require('./render') @@ -31,6 +32,7 @@ const StreamSizeLimit = require('./stream_size_limit') const { getProjectBlobsBatch } = require('../../storage/lib/blob_store') const assert = require('../../storage/lib/assert') const { getChunkMetadataForVersion } = require('../../storage/lib/chunk_store') +const { IncrementalResponse } = require('@overleaf/stream-utils') const pipeline = promisify(Stream.pipeline) @@ -50,6 +52,62 @@ async function initializeProject(req, res, next) { } } +async function cloneProject(req, res) { + const { + body: { targetProjectId }, + params: { project_id: sourceProjectId }, + } = parseReq(req, schemas.cloneProject) + + const incrResp = new IncrementalResponse({ + res, + timeout: 10 * 60_000 - 5_000, + logger, + label: 'clone history in history-v1', + info: { targetProjectId, sourceProjectId }, + }) + const signal = incrResp.signal() + + try { + try { + // Use the same limits importChanges, since these are passed to persistChanges + const farFuture = new Date() + farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000) + const limits = { + maxChanges: 0, + minChangeTimestamp: farFuture, + maxChangeTimestamp: farFuture, + autoResync: true, + } + incrResp.sendUpdate('flushing redis buffer: pending') + await persistBuffer(sourceProjectId, limits) + incrResp.sendUpdate('flushing redis buffer: done') + } catch (err) { + incrResp.sendUpdate('failed to flush redis buffer') + logger.error( + { err, targetProjectId, sourceProjectId }, + 'failed to persist buffer during clone' + ) + } + + await chunkStore.cloneProject( + sourceProjectId, + targetProjectId, + progress => { + if (signal.aborted) return + incrResp.sendUpdate(progress) + }, + signal + ) + if (!signal.aborted) { + incrResp.sendUpdate('cloning full project history data: done') + } + } catch (err) { + incrResp.fail(err) + } finally { + incrResp.end() + } +} + async function getLatestContent(req, res, next) { const { params } = parseReq(req, schemas.getLatestContent) const projectId = params.project_id @@ -510,6 +568,7 @@ async function getProjectBlobsStats(req, res) { module.exports = { initializeProject: expressify(initializeProject), + cloneProject: expressify(cloneProject), getLatestContent: expressify(getLatestContent), getContentAtVersion: expressify(getContentAtVersion), getLatestHashedContent: expressify(getLatestHashedContent), diff --git a/services/history-v1/api/routes/projects.js b/services/history-v1/api/routes/projects.js index 86f61fe179..9f81b4c54e 100644 --- a/services/history-v1/api/routes/projects.js +++ b/services/history-v1/api/routes/projects.js @@ -14,6 +14,12 @@ const { router.post('/projects', handleBasicAuth, projectsController.initializeProject) +router.post( + '/projects/:project_id/clone', + handleBasicAuth, + projectsController.cloneProject +) + router.post( '/projects/blob-stats', handleBasicAuth, diff --git a/services/history-v1/api/schema.js b/services/history-v1/api/schema.js index 066e1861a9..2651585d9f 100644 --- a/services/history-v1/api/schema.js +++ b/services/history-v1/api/schema.js @@ -72,6 +72,16 @@ const schemas = { .optional(), }), + cloneProject: z.object({ + body: z.object({ + targetProjectId: z.string(), + }), + + params: z.object({ + project_id: z.string(), + }), + }), + getProjectBlobsStats: z.object({ body: z.object({ projectIds: z.array(z.string()), diff --git a/services/history-v1/storage/lib/blob_store/index.js b/services/history-v1/storage/lib/blob_store/index.js index ddc030cdc8..dbbf329a01 100644 --- a/services/history-v1/storage/lib/blob_store/index.js +++ b/services/history-v1/storage/lib/blob_store/index.js @@ -21,6 +21,7 @@ const streams = require('../streams') const postgresBackend = require('./postgres') const mongoBackend = require('./mongo') const logger = require('@overleaf/logger') +const { promiseMapWithLimit } = require('@overleaf/promise-utils') /** @import { Readable } from 'stream' */ @@ -35,6 +36,25 @@ function makeProjectKey(projectId, hash) { return `${projectKey.format(projectId)}/${hash.slice(0, 2)}/${hash.slice(2)}` } +/** + * Copy the data structures for a given project. + * @param {string} sourceProjectId + * @param {string} targetProjectId + * @param {string} hash + */ +async function cloneBlob(sourceProjectId, targetProjectId, hash) { + const bucket = config.get('blobStore.projectBucket') + const dst = makeProjectKey(targetProjectId, hash) + const src = makeProjectKey(sourceProjectId, hash) + const info = { targetProjectId, sourceProjectId, hash } + logger.debug(info, 'cloneBlob started') + try { + await persistor.copyObject(bucket, src, dst) + } finally { + logger.debug(info, 'cloneBlob finished') + } +} + async function uploadBlob(projectId, blob, stream, opts = {}) { const bucket = config.get('blobStore.projectBucket') const key = makeProjectKey(projectId, blob.getHash()) @@ -178,6 +198,21 @@ class BlobStore { await this.backend.initialize(this.projectId) } + /** + * Set up the initial data structure for a given project + */ + async clone(sourceProjectId, onProgress, signal) { + const hashes = await this.backend.clone(sourceProjectId, this.projectId) + onProgress(`blobs-metadata-imported: ${hashes.length}`) + let done = 0 + await promiseMapWithLimit(50, hashes, async hash => { + if (signal.aborted) return + await cloneBlob(sourceProjectId, this.projectId, hash) + done++ + onProgress(`blobs-copied: ${done}`) + }) + } + /** * Write a blob, if one does not already exist, with the given UTF-8 encoded * string content. diff --git a/services/history-v1/storage/lib/blob_store/mongo.js b/services/history-v1/storage/lib/blob_store/mongo.js index 9117382148..8fb188eca4 100644 --- a/services/history-v1/storage/lib/blob_store/mongo.js +++ b/services/history-v1/storage/lib/blob_store/mongo.js @@ -46,6 +46,64 @@ async function initialize(projectId) { } } +/** + * Copy the data structures for a given project. + * @param {string} sourceProjectId + * @param {string} targetProjectId + */ +async function clone(sourceProjectId, targetProjectId) { + assert.mongoId(targetProjectId, 'bad target projectId') + assert.mongoId(sourceProjectId, 'bad source projectId') + const result = await mongodb.blobs.findOne({ + _id: new ObjectId(sourceProjectId), + }) + if (!result || !('blobs' in result)) { + throw new Error('missing blobs for source project') + } + const blobHashes = [] + for (const bucket of Object.values(result.blobs)) { + for (const record of bucket) { + blobHashes.push(record.h.toString('hex')) + } + } + + await mongodb.blobs.updateOne( + { _id: new ObjectId(targetProjectId) }, + { $set: { blobs: result.blobs } } + ) + + const minShardedId = makeShardedId(sourceProjectId, '0') + const maxShardedId = makeShardedId(sourceProjectId, 'f') + // @ts-ignore We are using a custom _id here. + const sharded = mongodb.shardedBlobs.find({ + _id: { $gte: minShardedId, $lte: maxShardedId }, + }) + const newShards = [] // gather up-to 16 shards + for await (const shardedRecord of sharded) { + if (shardedRecord.blobs == null) { + continue + } + // Schema of shard id: 0 + const shard = shardedRecord._id.toString('hex').slice(25) + const newId = makeShardedId(targetProjectId, shard) + newShards.push({ + ...shardedRecord, + _id: newId, + }) + + for (const bucket of Object.values(shardedRecord.blobs)) { + for (const record of bucket) { + blobHashes.push(record.h.toString('hex')) + } + } + } + if (newShards.length > 0) { + // @ts-ignore We are using a custom _id here. + await mongodb.shardedBlobs.insertMany(newShards) + } + return blobHashes +} + /** * Return blob metadata for the given project and hash. * @param {string} projectId @@ -428,6 +486,7 @@ function recordToBlob(record) { module.exports = { initialize, + clone, findBlob, findBlobs, getProjectBlobs, diff --git a/services/history-v1/storage/lib/blob_store/postgres.js b/services/history-v1/storage/lib/blob_store/postgres.js index 1cedeec5d7..ee84e7cad2 100644 --- a/services/history-v1/storage/lib/blob_store/postgres.js +++ b/services/history-v1/storage/lib/blob_store/postgres.js @@ -9,6 +9,32 @@ async function initialize(projectId) { // Nothing to do for Postgres } +/** + * Copy the data structures for a given project. + * @param {string} sourceProjectId + * @param {string} targetProjectId + */ +async function clone(sourceProjectId, targetProjectId) { + assert.postgresId(targetProjectId, 'bad target projectId') + assert.postgresId(sourceProjectId, 'bad source projectId') + + const result = await knex.raw( + `INSERT INTO project_blobs ( + project_id, hash_bytes, byte_length, string_length + ) + SELECT ?, hash_bytes, byte_length, string_length + FROM project_blobs + WHERE project_id = ? + RETURNING hash_bytes`, + [parseInt(targetProjectId, 10), parseInt(sourceProjectId, 10)] + ) + const blobHashes = [] + for (const row of result.rows) { + blobHashes.push(row.hash_bytes.toString('hex')) + } + return blobHashes +} + /** * Return blob metadata for the given project and hash */ @@ -152,6 +178,7 @@ function hashFromBuffer(buffer) { module.exports = { initialize, + clone, findBlob, findBlobs, getProjectBlobs, diff --git a/services/history-v1/storage/lib/chunk_store/index.js b/services/history-v1/storage/lib/chunk_store/index.js index d860a9b88a..9655a64902 100644 --- a/services/history-v1/storage/lib/chunk_store/index.js +++ b/services/history-v1/storage/lib/chunk_store/index.js @@ -38,6 +38,7 @@ const { ChunkVersionConflictError, VersionOutOfBoundsError, } = require('./errors') +const { promiseMapWithLimit } = require('@overleaf/promise-utils') /** * @import { Change } from 'overleaf-editor-core' @@ -78,6 +79,71 @@ async function initializeProject(projectId, snapshot) { return projectId } +/** + * Clone the project data. + * @param {string} sourceProjectId + * @param {string} targetProjectId + * @param {(string) => void} onProgress + * @param {AbortSignal} signal + */ +async function cloneProject( + sourceProjectId, + targetProjectId, + onProgress, + signal +) { + assert.projectId(targetProjectId, 'bad target projectId') + assert.projectId(sourceProjectId, 'bad source projectId') + + onProgress('existing history: checking') + const backend = getBackend(targetProjectId) + const chunkRecord = await backend.getLatestChunk(targetProjectId) + if (!chunkRecord) { + onProgress('existing history: not found, aborting') + throw new OError('target project is not initialized yet') + } + if (chunkRecord?.endVersion > 0) { + onProgress('existing history: found changes, aborting') + throw new AlreadyInitialized(targetProjectId) + } + + onProgress('existing history: deleting empty chunk') + await backend.deleteChunk(targetProjectId, chunkRecord.id) + onProgress('existing history: deleted empty chunk') + + async function cloneBlobs() { + onProgress('cloning blobs metadata: pending') + const blobStore = new BlobStore(targetProjectId) + await blobStore.clone(sourceProjectId, onProgress, signal) + onProgress('cloning blobs metadata: done') + } + + async function cloneChunks() { + onProgress('cloning chunks metadata: pending') + const chunkIds = await backend.clone(sourceProjectId, targetProjectId) + onProgress(`chunks-metadata-imported: ${chunkIds.size}`) + let done = 0 + await promiseMapWithLimit( + 50, + Array.from(chunkIds.entries()), + async ([sourceChunkId, targetChunkId]) => { + if (signal.aborted) return + await historyStore.cloneChunk( + sourceProjectId, + sourceChunkId, + targetProjectId, + targetChunkId + ) + done++ + onProgress(`chunks-copied: ${done}`) + } + ) + onProgress('cloning chunks metadata: done') + } + + await Promise.all([cloneBlobs(), cloneChunks()]) +} + /** * Load the blobs referenced in the given history */ @@ -619,6 +685,7 @@ class AlreadyInitialized extends OError { module.exports = { getBackend, initializeProject, + cloneProject, loadLatest, getLatestChunkMetadata, loadAtVersion, diff --git a/services/history-v1/storage/lib/chunk_store/mongo.js b/services/history-v1/storage/lib/chunk_store/mongo.js index 4453ccc82c..35ce40dd83 100644 --- a/services/history-v1/storage/lib/chunk_store/mongo.js +++ b/services/history-v1/storage/lib/chunk_store/mongo.js @@ -136,6 +136,43 @@ async function getProjectChunks(projectId) { return await cursor.map(chunkFromRecord).toArray() } +/** + * Copy the data structures for a given project. + * @param {string} sourceProjectId + * @param {string} targetProjectId + */ +async function clone(sourceProjectId, targetProjectId) { + assert.mongoId(targetProjectId, 'bad target projectId') + assert.mongoId(sourceProjectId, 'bad source projectId') + + const cursor = mongodb.chunks.find( + { + projectId: new ObjectId(sourceProjectId), + state: { $in: ['active', 'closed'] }, + }, + { projection: { projectId: 0 } } + ) + + const chunkIds = new Map() + const batch = [] + async function flushBatch() { + await mongodb.chunks.insertMany(batch) + batch.length = 0 + } + for await (const chunk of cursor) { + const newChunkId = new ObjectId() + chunkIds.set(chunk._id.toString(), newChunkId.toString()) + batch.push({ + ...chunk, + _id: newChunkId, + projectId: new ObjectId(targetProjectId), + }) + if (batch.length > 100) await flushBatch() + } + if (batch.length > 0) await flushBatch() + return chunkIds +} + /** * Insert a pending chunk before sending it to object storage. */ @@ -477,6 +514,7 @@ function chunkFromRecord(record) { } module.exports = { + clone, getLatestChunk, getChunkForVersion, getChunkForTimestamp, diff --git a/services/history-v1/storage/lib/chunk_store/postgres.js b/services/history-v1/storage/lib/chunk_store/postgres.js index d63395d2ff..48cb1d9bff 100644 --- a/services/history-v1/storage/lib/chunk_store/postgres.js +++ b/services/history-v1/storage/lib/chunk_store/postgres.js @@ -135,6 +135,47 @@ async function getProjectChunks(projectId) { .orderBy('end_version') return records.map(chunkFromRecord) } +/** + * Copy the data structures for a given project. + * @param {string} sourceProjectId + * @param {string} targetProjectId + */ +async function clone(sourceProjectId, targetProjectId) { + assert.postgresId(targetProjectId, 'bad target projectId') + assert.postgresId(sourceProjectId, 'bad source projectId') + + const cursor = knex('chunks') + .select() + .where('doc_id', parseInt(sourceProjectId, 10)) + .stream() + + const chunkIds = new Map() + const batch = [] + async function flushBatch() { + const newIds = await knex.raw( + "SELECT nextval('chunks_id_seq'::regclass)::integer AS chunk_id FROM generate_series(1, ?)", + batch.length + ) + const newRecords = [] + for (const [i, chunk] of batch.entries()) { + const newId = newIds.rows[i].chunk_id + chunkIds.set(chunk.id.toString(), newId.toString()) + newRecords.push({ + ...chunk, + id: newId, + doc_id: parseInt(targetProjectId, 10), + }) + } + await knex('chunks').insert(newRecords) + batch.length = 0 + } + for await (const chunk of cursor) { + batch.push(chunk) + if (batch.length > 100) await flushBatch() + } + if (batch.length > 0) await flushBatch() + return chunkIds +} /** * Insert a pending chunk before sending it to object storage. @@ -330,7 +371,7 @@ async function _closeChunk(tx, projectId, chunkId) { */ async function deleteChunk(projectId, chunkId) { assert.postgresId(projectId, 'bad projectId') - assert.integer(chunkId, 'bad chunkId') + assert.chunkId(chunkId, 'bad chunkId') await _deleteChunks(knex, { doc_id: parseInt(projectId, 10), @@ -422,6 +463,7 @@ async function resolveHistoryIdToMongoProjectId(projectId) { } module.exports = { + clone, getLatestChunk, getChunkForVersion, getChunkForTimestamp, diff --git a/services/history-v1/storage/lib/history_store.js b/services/history-v1/storage/lib/history_store.js index 9a219ab667..87051d1bd4 100644 --- a/services/history-v1/storage/lib/history_store.js +++ b/services/history-v1/storage/lib/history_store.js @@ -176,6 +176,46 @@ class HistoryStore { } } + /** + * Compress and store a {@link History}. + * + * @param {string} sourceProjectId + * @param {string} sourceChunkId + * @param {string} targetProjectId + * @param {string} targetChunkId + */ + async cloneChunk( + sourceProjectId, + sourceChunkId, + targetProjectId, + targetChunkId + ) { + assert.projectId(targetProjectId, 'bad target projectId') + assert.projectId(sourceProjectId, 'bad source projectId') + assert.chunkId(targetChunkId, 'bad chunkId') + assert.chunkId(sourceChunkId, 'bad chunkId') + const dstKey = getKey(targetProjectId, targetChunkId) + const srcKey = getKey(sourceProjectId, sourceChunkId) + + const info = { + targetProjectId, + sourceProjectId, + sourceChunkId, + targetChunkId, + srcKey, + dstKey, + } + logger.debug(info, 'cloneChunk started') + + try { + await this.#persistor.copyObject(this.#bucket, srcKey, dstKey) + } catch (err) { + throw new StoreError(sourceProjectId, sourceChunkId, err) + } finally { + logger.debug(info, 'cloneChunk finished') + } + } + /** * Delete multiple chunks from bucket. Expects an Array of objects with * projectId and chunkId properties diff --git a/services/project-history/app/js/ErrorRecorder.js b/services/project-history/app/js/ErrorRecorder.js index 648b53f569..304b72921d 100644 --- a/services/project-history/app/js/ErrorRecorder.js +++ b/services/project-history/app/js/ErrorRecorder.js @@ -86,6 +86,23 @@ async function recordSyncStart(projectId) { ) } +/** + * @param {string} sourceProjectId + * @param {string} targetProjectId + * @return {Promise} + */ +async function cloneFailure(sourceProjectId, targetProjectId) { + const failure = await db.projectHistoryFailures.findOne( + { project_id: sourceProjectId.toString() }, + { projection: { _id: 0, project_id: 0 } } + ) + if (!failure) return + await db.projectHistoryFailures.insertOne({ + ...failure, + project_id: targetProjectId.toString(), + }) +} + /** * @param projectId */ @@ -238,6 +255,7 @@ async function getFailures() { const getFailedProjectsCb = callbackify(getFailedProjects) const getFailureRecordCb = callbackify(getFailureRecord) const getFailuresCb = callbackify(getFailures) +const cloneFailureCb = callbackify(cloneFailure) const getLastFailureCb = callbackify(getLastFailure) const recordCb = callbackify(record) const clearErrorCb = callbackify(clearError) @@ -245,6 +263,7 @@ const recordSyncStartCb = callbackify(recordSyncStart) const setForceDebugCb = callbackify(setForceDebug) export { + cloneFailureCb as cloneFailure, getFailedProjectsCb as getFailedProjects, getFailureRecordCb as getFailureRecord, getLastFailureCb as getLastFailure, @@ -257,6 +276,7 @@ export { export const promises = { getFailedProjects, + cloneFailure, getFailureRecord, getLastFailure, getFailures, diff --git a/services/project-history/app/js/HistoryStoreManager.js b/services/project-history/app/js/HistoryStoreManager.js index 3df69a399c..96afac59a9 100644 --- a/services/project-history/app/js/HistoryStoreManager.js +++ b/services/project-history/app/js/HistoryStoreManager.js @@ -17,7 +17,7 @@ import * as Errors from './Errors.js' import * as LocalFileWriter from './LocalFileWriter.js' import * as HashManager from './HashManager.js' import * as HistoryBlobTranslator from './HistoryBlobTranslator.js' -import { promisifyMultiResult } from '@overleaf/promise-utils' +import { callbackify, promisifyMultiResult } from '@overleaf/promise-utils' const HTTP_REQUEST_TIMEOUT = Settings.overleaf.history.requestTimeout @@ -531,6 +531,18 @@ export function initializeProject(historyId, callback) { ) } +async function _cloneProject(sourceProjectId, targetProjectId, signal) { + return await fetchStream( + `${Settings.overleaf.history.host}/projects/${sourceProjectId}/clone`, + { + method: 'POST', + json: { targetProjectId }, + ...getHistoryFetchOptions(), + signal, + } + ) +} + export function deleteProject(projectId, callback) { _requestHistoryService( { method: 'DELETE', path: `projects/${projectId}` }, @@ -623,6 +635,8 @@ function _requestHistoryService(options, callback) { }) } +export const cloneProject = callbackify(_cloneProject) + export const promises = { /** @type {(projectId: string, historyId: string) => Promise<{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}>} */ getMostRecentChunk: promisify(getMostRecentChunk), @@ -640,4 +654,5 @@ export const promises = { createBlobForUpdate: promisify(createBlobForUpdate), initializeProject: promisify(initializeProject), deleteProject: promisify(deleteProject), + cloneProject: _cloneProject, } diff --git a/services/project-history/app/js/HttpController.js b/services/project-history/app/js/HttpController.js index 1ff9194118..bf4f1e7c9a 100644 --- a/services/project-history/app/js/HttpController.js +++ b/services/project-history/app/js/HttpController.js @@ -14,12 +14,114 @@ import * as LabelsManager from './LabelsManager.js' import * as HistoryApiManager from './HistoryApiManager.js' import * as RetryManager from './RetryManager.js' import * as FlushManager from './FlushManager.js' -import { pipeline } from 'node:stream' +import Stream, { pipeline } from 'node:stream' import { fetchNothing, RequestFailedError } from '@overleaf/fetch-utils' import { z, zz, parseReq } from '@overleaf/validation-tools' +import { IncrementalResponse } from '@overleaf/stream-utils' const ONE_DAY_IN_SECONDS = 24 * 60 * 60 +const cloneProjectSchema = z.object({ + body: z.object({ + targetProjectId: z.string(), + }), + + params: z.object({ + project_id: z.string(), + }), +}) + +export function cloneProject(req, res) { + const { + params: { project_id: sourceProjectId }, + body: { targetProjectId }, + } = parseReq(req, cloneProjectSchema) + const incrResp = new IncrementalResponse({ + res, + timeout: 10 * 60_000 - 5_000, + logger, + label: 'clone history in project-history', + info: { targetProjectId, sourceProjectId }, + }) + + WebApiManager.getHistoryId(targetProjectId, (err, targetHistoryId) => { + if (err) return incrResp.fail(OError.tag(err, 'get target historyId')) + WebApiManager.getHistoryId(sourceProjectId, (err, sourceHistoryId) => { + if (err) return incrResp.fail(OError.tag(err, 'get source historyId')) + + incrResp.sendUpdate('cloning full project history data: pending') + HistoryStoreManager.cloneProject( + sourceHistoryId.toString(), + targetHistoryId.toString(), + incrResp.signal(), + (err, stream) => { + if (err) { + incrResp.fail(OError.tag(err, 'clone history-v1 data')) + return + } + + // aborted. pipeline() would throw. + if (res.destroyed) { + stream.destroy() + incrResp.fail(new Error('request aborted')) + return + } + + // The stream.pipeline callback API does not support options. + Stream.promises.pipeline(stream, res, { end: false }).then( + () => { + incrResp.sendUpdate('clone labels: pending') + LabelsManager.cloneLabels( + sourceProjectId, + targetProjectId, + err => { + if (err) { + incrResp.fail(OError.tag(err, 'clone labels')) + return + } + incrResp.sendUpdate('clone labels: done') + + incrResp.sendUpdate('clone resync state: pending') + SyncManager.cloneResyncState( + sourceProjectId, + targetProjectId, + err => { + if (err) { + incrResp.fail(OError.tag(err, 'clone resync state')) + return + } + incrResp.sendUpdate('clone resync state: done') + + incrResp.sendUpdate('clone failure record: pending') + ErrorRecorder.cloneFailure( + sourceProjectId, + targetProjectId, + err => { + if (err) { + incrResp.fail(OError.tag(err, 'clone failure')) + return + } + incrResp.sendUpdate('clone failure record: done') + + incrResp.sendUpdate('done') + incrResp.end() + } + ) + } + ) + } + ) + }, + err => { + incrResp.fail(OError.tag(err, 'stream history-v1 response')) + } + ) + } + ) + }) + }) +} + const getProjectBlobSchema = z.object({ params: z.object({ history_id: zz.objectId().or(z.coerce.number()), diff --git a/services/project-history/app/js/LabelsManager.js b/services/project-history/app/js/LabelsManager.js index fe3dd4eed2..9b0446c0a5 100644 --- a/services/project-history/app/js/LabelsManager.js +++ b/services/project-history/app/js/LabelsManager.js @@ -4,6 +4,26 @@ import * as HistoryStoreManager from './HistoryStoreManager.js' import * as UpdatesProcessor from './UpdatesProcessor.js' import * as WebApiManager from './WebApiManager.js' +export function cloneLabels(sourceProjectId, targetProjectId, callback) { + db.projectHistoryLabels + .find({ project_id: new ObjectId(sourceProjectId) }) + .project({ _id: 0, project_id: 0 }) + .toArray((err, labels) => { + if (err) return callback(OError.tag(err)) + if (labels.length === 0) return callback() + db.projectHistoryLabels.insertMany( + labels.map(label => ({ + ...label, + project_id: new ObjectId(targetProjectId), + })), + err => { + if (err) return callback(OError.tag(err)) + callback() + } + ) + }) +} + export function getLabels(projectId, callback) { _toObjectId(projectId, function (error, projectId) { if (error) { diff --git a/services/project-history/app/js/Router.js b/services/project-history/app/js/Router.js index 8fd6ec036e..0907cb8e1f 100644 --- a/services/project-history/app/js/Router.js +++ b/services/project-history/app/js/Router.js @@ -83,6 +83,8 @@ export function initialize(app) { app.get('/project/:history_id/blob/:hash', HttpController.getProjectBlob) + app.post('/project/:project_id/clone', HttpController.cloneProject) + app.get('/status/failures', HttpController.getFailures) app.get('/status/queue', HttpController.getQueueCounts) diff --git a/services/project-history/app/js/SyncManager.js b/services/project-history/app/js/SyncManager.js index baadb466cf..24c9c0a5d4 100644 --- a/services/project-history/app/js/SyncManager.js +++ b/services/project-history/app/js/SyncManager.js @@ -109,6 +109,23 @@ async function startResyncWithoutLock(projectId, options) { await setResyncState(projectId, syncState) } +/** + * @param {string} sourceProjectId + * @param {string} targetProjectId + * @return {Promise} + */ +async function cloneResyncState(sourceProjectId, targetProjectId) { + const rawSyncState = await db.projectHistorySyncState.findOne( + { project_id: new ObjectId(sourceProjectId) }, + { projection: { _id: 0, project_id: 0 } } + ) + if (!rawSyncState) return + await db.projectHistorySyncState.insertOne({ + ...rawSyncState, + project_id: new ObjectId(targetProjectId), + }) +} + /** * @param {string} projectId * @return {Promise} @@ -1329,6 +1346,7 @@ function trackingDirectivesEqual(a, b) { // EXPORTS +const cloneResyncStateCb = callbackify(cloneResyncState) const getResyncStateCb = callbackify(getResyncState) const startResyncCb = callbackify(startResync) const startResyncWithoutLockCb = callbackify(startResyncWithoutLock) @@ -1373,6 +1391,7 @@ const expandSyncUpdatesCb = ( } export { + cloneResyncStateCb as cloneResyncState, getResyncStateCb as getResyncState, startResyncCb as startResync, startResyncWithoutLockCb as startResyncWithoutLock, @@ -1384,6 +1403,7 @@ export { } export const promises = { + cloneResyncState, getResyncState, startResync, startResyncWithoutLock, diff --git a/services/web/app/src/Features/Docstore/DocstoreManager.mjs b/services/web/app/src/Features/Docstore/DocstoreManager.mjs index 9d6901bafd..c151a10d51 100644 --- a/services/web/app/src/Features/Docstore/DocstoreManager.mjs +++ b/services/web/app/src/Features/Docstore/DocstoreManager.mjs @@ -82,6 +82,29 @@ async function getAllDocs(projectId) { } } +/** + * @param {string} projectId + */ +async function getAllDocsWithRanges(projectId) { + const url = new URL(settings.apis.docstore.url) + url.pathname = path.posix.join( + 'project', + projectId.toString(), + 'doc-with-ranges' + ) + try { + return await fetchJson(url, { signal: AbortSignal.timeout(TIMEOUT) }) + } catch (error) { + if (error instanceof RequestFailedError) { + throw new OError('docstore api responded with non-success code', { + projectId, + status: error.response.status, + }) + } + throw error + } +} + /** * * @param {string|ObjectId} projectId @@ -395,6 +418,7 @@ export default { deleteDoc, getAllDocVersions, getAllDocs, + getAllDocsWithRanges, getAllDeletedDocs, getAllRanges, getDoc, diff --git a/services/web/app/src/Features/History/HistoryManager.mjs b/services/web/app/src/Features/History/HistoryManager.mjs index 0d99b8fe47..c8db62c176 100644 --- a/services/web/app/src/Features/History/HistoryManager.mjs +++ b/services/web/app/src/Features/History/HistoryManager.mjs @@ -2,6 +2,7 @@ import { callbackify } from 'node:util' import { fetchJson, fetchNothing, + fetchStream, fetchStreamWithResponse, RequestFailedError, } from '@overleaf/fetch-utils' @@ -59,6 +60,17 @@ async function initializeProject(projectId) { return historyId } +async function cloneProject(sourceProjectId, targetProjectId) { + return await fetchStream( + `${settings.apis.project_history.url}/project/${sourceProjectId}/clone`, + { + method: 'POST', + json: { targetProjectId }, + signal: AbortSignal.timeout(10 * 60_000), + } + ) +} + async function flushProject(projectId) { try { await fetchNothing( @@ -460,6 +472,7 @@ export default { getChanges: callbackify(getChanges), promises: { initializeProject, + cloneProject, flushProject, resyncProject, deleteProject, diff --git a/services/web/app/src/Features/Project/ProjectController.mjs b/services/web/app/src/Features/Project/ProjectController.mjs index 7ab45293ac..9eaf9ec495 100644 --- a/services/web/app/src/Features/Project/ProjectController.mjs +++ b/services/web/app/src/Features/Project/ProjectController.mjs @@ -263,12 +263,17 @@ const _ProjectController = { res.setTimeout(5 * 60 * 1000) // allow extra time for the copy to complete metrics.inc('cloned-project') const projectId = req.params.Project_id - const { projectName, isDebugCopy, tags } = req.body + let { projectName, isDebugCopy, cloneHistory, cloneRanges, tags } = req.body + const currentUser = SessionManager.getSessionUser(req.session) + if (!hasAdminAccess(currentUser)) { + isDebugCopy = false + cloneHistory = false + cloneRanges = false + } logger.debug({ projectId, projectName, isDebugCopy }, 'cloning project') if (!SessionManager.isUserLoggedIn(req.session)) { return res.json({ redir: '/register' }) } - const currentUser = SessionManager.getSessionUser(req.session) const { first_name: firstName, last_name: lastName, email } = currentUser try { const project = await ProjectDuplicator.promises.duplicate( @@ -276,7 +281,7 @@ const _ProjectController = { projectId, projectName, tags, - isDebugCopy + { isDebugCopy, cloneHistory, cloneRanges } ) ProjectAuditLogHandler.addEntryIfManagedInBackground( projectId, diff --git a/services/web/app/src/Features/Project/ProjectDuplicator.mjs b/services/web/app/src/Features/Project/ProjectDuplicator.mjs index b496bda293..4de3f512fd 100644 --- a/services/web/app/src/Features/Project/ProjectDuplicator.mjs +++ b/services/web/app/src/Features/Project/ProjectDuplicator.mjs @@ -38,7 +38,11 @@ async function duplicate( originalProjectId, newProjectName, tags = [], - isDebugCopy + opts = { + isDebugCopy: false, + cloneHistory: false, + cloneRanges: false, + } ) { await DocumentUpdaterHandler.promises.flushProjectToMongo(originalProjectId) const originalProject = await ProjectGetter.promises.getProject( @@ -54,7 +58,7 @@ async function duplicate( } ) const { path: rootDocPath } = await ProjectLocator.promises.findRootDoc({ - project_id: originalProjectId, + project: originalProject, }) const originalEntries = _getFolderEntries(originalProject.rootFolder[0]) @@ -69,7 +73,7 @@ async function duplicate( }) const attributes = {} - if (isDebugCopy) { + if (opts.isDebugCopy) { attributes.isDebugCopyOf = originalProjectId // - Create new tag on owner._id if it doesn't already exist const debugTag = await TagsHandler.promises.createTag( @@ -101,6 +105,18 @@ async function duplicate( // remove any leading or trailing spaces newProjectName = newProjectName.trim() + if ( + opts.cloneHistory && + typeof originalProject.overleaf?.history?.id === 'number' + ) { + // Obtain an old history id. We want to store the data in the same DB. + const newHistoryId = parseInt( + await HistoryManager.promises.initializeProject(), + 10 + ) + attributes.overleaf = { history: { id: newHistoryId } } + } + // Now create the new project, cleaning it up on failure if necessary const newProject = await ProjectCreationHandler.promises.createBlankProject( owner._id, @@ -129,8 +145,18 @@ async function duplicate( originalProject.compiler ) const [docEntries, fileEntries] = await Promise.all([ - _copyDocs(originalEntries.docEntries, originalProject, newProject), - _copyFiles(originalEntries.fileEntries, originalProject, newProject), + _copyDocs( + originalEntries.docEntries, + originalProject, + newProject, + opts.cloneRanges + ), + _copyFiles( + originalEntries.fileEntries, + originalProject, + newProject, + opts.cloneHistory + ), ]) const projectVersion = await ProjectEntityMongoUpdateHandler.promises.createNewFolderStructure( @@ -139,17 +165,23 @@ async function duplicate( fileEntries ) // Silently ignore the rootDoc in case it's not valid per the new limits. + // Ignore the new limits in case we are creating a debug copoy. if ( rootDocPath && - ProjectEntityUpdateHandler.isPathValidForRootDoc(rootDocPath.fileSystem) + (ProjectEntityUpdateHandler.isPathValidForRootDoc( + rootDocPath.fileSystem + ) || + opts.isDebugCopy) ) { await _setRootDoc(newProject._id, rootDocPath.fileSystem) } - await _notifyDocumentUpdater(newProject, owner._id, { - newFiles: fileEntries, - newDocs: docEntries, - newProject: { version: projectVersion }, - }) + if (!opts.cloneHistory) { + await _notifyDocumentUpdater(newProject, owner._id, { + newFiles: fileEntries, + newDocs: docEntries, + newProject: { version: projectVersion }, + }) + } await TpdsProjectFlusher.promises.flushProjectToTpds(newProject._id) if (tags?.length > 0) { @@ -218,33 +250,50 @@ function _getFolderEntries(folder, folderPath = '/') { return { docEntries, fileEntries } } -async function _copyDocs(sourceEntries, sourceProject, targetProject) { - const docLinesById = await _getDocLinesForProject(sourceProject._id) +async function _copyDocs( + sourceEntries, + sourceProject, + targetProject, + cloneRanges +) { + const docsById = await _getDocContentForProject( + sourceProject._id, + cloneRanges + ) const targetEntries = [] for (const sourceEntry of sourceEntries) { const sourceDoc = sourceEntry.doc const path = sourceEntry.path const doc = new Doc({ name: sourceDoc.name }) - const docLines = docLinesById.get(sourceDoc._id.toString()) + const { lines, ranges } = docsById.get(sourceDoc._id.toString()) await DocstoreManager.promises.updateDoc( targetProject._id.toString(), doc._id.toString(), - docLines, + lines, 0, - {} + ranges || {} ) - targetEntries.push({ doc, path, docLines: docLines.join('\n') }) + targetEntries.push({ doc, path, docLines: lines.join('\n') }) } return targetEntries } -async function _getDocLinesForProject(projectId) { - const docs = await DocstoreManager.promises.getAllDocs(projectId) - const docLinesById = new Map(docs.map(doc => [doc._id, doc.lines])) - return docLinesById +async function _getDocContentForProject(projectId, cloneRanges) { + let docs + if (cloneRanges) { + docs = await DocstoreManager.promises.getAllDocsWithRanges(projectId) + } else { + docs = await DocstoreManager.promises.getAllDocs(projectId) + } + return new Map(docs.map(doc => [doc._id, doc])) } -async function _copyFiles(sourceEntries, sourceProject, targetProject) { +async function _copyFiles( + sourceEntries, + sourceProject, + targetProject, + cloneHistory +) { const sourceHistoryId = sourceProject.overleaf?.history?.id const targetHistoryId = targetProject.overleaf?.history?.id if (!sourceHistoryId) { @@ -268,6 +317,10 @@ async function _copyFiles(sourceEntries, sourceProject, targetProject) { file.linkedFileData = sourceFile.linkedFileData file.created = sourceFile.created } + if (cloneHistory) { + // All blobs will be cloned in bulk. Do not clone each individually. + return { createdBlob: true, file, path } + } try { await HistoryManager.promises.copyBlob( sourceHistoryId, diff --git a/services/web/frontend/js/features/review-panel/components/review-mode-switcher.tsx b/services/web/frontend/js/features/review-panel/components/review-mode-switcher.tsx index f60ac23175..31ae4c74cb 100644 --- a/services/web/frontend/js/features/review-panel/components/review-mode-switcher.tsx +++ b/services/web/frontend/js/features/review-panel/components/review-mode-switcher.tsx @@ -185,11 +185,10 @@ const ModeSwitcherToggleButtonContent = forwardRef< onClick(event) }} aria-expanded={ariaExpanded} + aria-label={label} > -
- {label} -
+
{label}
) diff --git a/services/web/frontend/js/infrastructure/fetch-json.ts b/services/web/frontend/js/infrastructure/fetch-json.ts index 24ab2deca7..1d3af35451 100644 --- a/services/web/frontend/js/infrastructure/fetch-json.ts +++ b/services/web/frontend/js/infrastructure/fetch-json.ts @@ -190,7 +190,7 @@ function fetchJSON( }) } -async function parseResponseBody(response: Response) { +export async function parseResponseBody(response: Response) { const contentType = response.headers.get('Content-Type') if (!contentType) {