[web] allow admins to clone projects with ranges and entire history (#32739)

* [web] add consistent aria-label to editing/reviewing toggle

* [docstore] add endpoint for getting all docs with ranges

* [history-v1] fix schema of chunkId when deleting old history chunk

* [web] skip duplicate project lookup for resolving rootDocPath

* [web] ignore new limits for root doc path when making debug copy

* [web] allow admins to clone projects with ranges and entire history

* [web] fix tests

* [history-v1] re-order params for cloning project

* [web] fix duplicate import of logger after merge

* [project-history] re-order params for cloning project history metadata

GitOrigin-RevId: 7fa35b4f90885dd453150a348d491ba0ec8de412
This commit is contained in:
Jakob Ackermann
2026-04-15 08:22:13 +02:00
committed by Copybot
parent 917d2700c8
commit 5a6c066847
25 changed files with 786 additions and 32 deletions

View File

@@ -163,6 +163,80 @@ class MeteredStream extends Transform {
}
}
class IncrementalResponse {
#res
#ac
#timeout
#logger
#label
#info
constructor({ res, timeout, label, info, logger }) {
this.#res = res
this.#logger = logger
this.#label = label
this.#info = info
this.#ac = new AbortController()
this.#timeout = setTimeout(() => {
this.#logger.warn({ ...this.#info, timeout }, `${this.#label}: aborting`)
this.sendUpdate(
`error: ${label}: aborting after ${this.#humanReadableTimeout(timeout)}`
)
this.#ac.abort()
}, timeout)
}
signal() {
return this.#ac.signal
}
end() {
this.#ac.abort()
clearTimeout(this.#timeout)
try {
this.#res.end()
} catch {
try {
this.#res.destroy()
} catch {}
}
}
sendUpdate(msg) {
try {
this.#res.write(msg + '\n')
} catch (err) {
this.#ac.abort()
this.#logger.warn(
{ err, ...this.#info },
`${this.#label}: failed to send progress update`
)
}
}
fail(err) {
const aborted = this.#ac.signal.aborted
this.#ac.abort()
if (!aborted) {
this.#logger.err({ err, ...this.#info }, `${this.#label}: error`)
this.sendUpdate(`error: ${this.#label}`)
}
this.end()
}
#humanReadableTimeout(timeout) {
let ms = timeout
const minutes = Math.floor(ms / 60_000)
ms -= minutes * 60_000
const seconds = Math.floor(ms / 1_000)
ms -= seconds * 1_000
let t = ''
if (minutes) t += `${minutes}min`
if (seconds) t += `${seconds}s`
if (ms) t += `${ms}ms`
return t
}
}
// Export our classes
module.exports = {
@@ -174,4 +248,5 @@ module.exports = {
MeteredStream,
SizeExceededError,
AbortError,
IncrementalResponse,
}

View File

@@ -51,6 +51,10 @@ app.param('doc_id', function (req, res, next, docId) {
app.get('/project/:project_id/doc-deleted', HttpController.getAllDeletedDocs)
app.get('/project/:project_id/doc', HttpController.getAllDocs)
app.get(
'/project/:project_id/doc-with-ranges',
HttpController.getAllDocsWithRanges
)
app.get('/project/:project_id/doc-versions', HttpController.getAllDocVersions)
app.get('/project/:project_id/ranges', HttpController.getAllRanges)
app.get(

View File

@@ -58,6 +58,24 @@ async function getAllDocs(req, res) {
res.json(docViews)
}
async function getAllDocsWithRanges(req, res) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all docs with ranges')
const docs = await DocManager.getAllNonDeletedDocs(projectId, {
lines: true,
rev: true,
ranges: true,
})
const docViews = _buildDocsArrayView(projectId, docs)
for (const docView of docViews) {
if (!docView.lines) {
logger.warn({ projectId, docId: docView._id }, 'missing doc lines')
docView.lines = []
}
}
res.json(docViews)
}
async function getAllDocVersions(req, res) {
const { project_id: projectId } = req.params
const docs = await DocManager.getAllDocVersions(projectId)
@@ -248,6 +266,7 @@ export default {
isDocDeleted: expressify(isDocDeleted),
getRawDoc: expressify(getRawDoc),
getAllDocs: expressify(getAllDocs),
getAllDocsWithRanges: expressify(getAllDocsWithRanges),
getAllDeletedDocs: expressify(getAllDeletedDocs),
getAllRanges: expressify(getAllRanges),
getAllDocVersions: expressify(getAllDocVersions),

View File

@@ -22,6 +22,7 @@ const {
HashCheckBlobStore,
ProjectArchive,
zipStore,
persistBuffer,
} = require('../../storage')
const render = require('./render')
@@ -31,6 +32,7 @@ const StreamSizeLimit = require('./stream_size_limit')
const { getProjectBlobsBatch } = require('../../storage/lib/blob_store')
const assert = require('../../storage/lib/assert')
const { getChunkMetadataForVersion } = require('../../storage/lib/chunk_store')
const { IncrementalResponse } = require('@overleaf/stream-utils')
const pipeline = promisify(Stream.pipeline)
@@ -50,6 +52,62 @@ async function initializeProject(req, res, next) {
}
}
async function cloneProject(req, res) {
const {
body: { targetProjectId },
params: { project_id: sourceProjectId },
} = parseReq(req, schemas.cloneProject)
const incrResp = new IncrementalResponse({
res,
timeout: 10 * 60_000 - 5_000,
logger,
label: 'clone history in history-v1',
info: { targetProjectId, sourceProjectId },
})
const signal = incrResp.signal()
try {
try {
// Use the same limits importChanges, since these are passed to persistChanges
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
maxChanges: 0,
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
autoResync: true,
}
incrResp.sendUpdate('flushing redis buffer: pending')
await persistBuffer(sourceProjectId, limits)
incrResp.sendUpdate('flushing redis buffer: done')
} catch (err) {
incrResp.sendUpdate('failed to flush redis buffer')
logger.error(
{ err, targetProjectId, sourceProjectId },
'failed to persist buffer during clone'
)
}
await chunkStore.cloneProject(
sourceProjectId,
targetProjectId,
progress => {
if (signal.aborted) return
incrResp.sendUpdate(progress)
},
signal
)
if (!signal.aborted) {
incrResp.sendUpdate('cloning full project history data: done')
}
} catch (err) {
incrResp.fail(err)
} finally {
incrResp.end()
}
}
async function getLatestContent(req, res, next) {
const { params } = parseReq(req, schemas.getLatestContent)
const projectId = params.project_id
@@ -510,6 +568,7 @@ async function getProjectBlobsStats(req, res) {
module.exports = {
initializeProject: expressify(initializeProject),
cloneProject: expressify(cloneProject),
getLatestContent: expressify(getLatestContent),
getContentAtVersion: expressify(getContentAtVersion),
getLatestHashedContent: expressify(getLatestHashedContent),

View File

@@ -14,6 +14,12 @@ const {
router.post('/projects', handleBasicAuth, projectsController.initializeProject)
router.post(
'/projects/:project_id/clone',
handleBasicAuth,
projectsController.cloneProject
)
router.post(
'/projects/blob-stats',
handleBasicAuth,

View File

@@ -72,6 +72,16 @@ const schemas = {
.optional(),
}),
cloneProject: z.object({
body: z.object({
targetProjectId: z.string(),
}),
params: z.object({
project_id: z.string(),
}),
}),
getProjectBlobsStats: z.object({
body: z.object({
projectIds: z.array(z.string()),

View File

@@ -21,6 +21,7 @@ const streams = require('../streams')
const postgresBackend = require('./postgres')
const mongoBackend = require('./mongo')
const logger = require('@overleaf/logger')
const { promiseMapWithLimit } = require('@overleaf/promise-utils')
/** @import { Readable } from 'stream' */
@@ -35,6 +36,25 @@ function makeProjectKey(projectId, hash) {
return `${projectKey.format(projectId)}/${hash.slice(0, 2)}/${hash.slice(2)}`
}
/**
* Copy the data structures for a given project.
* @param {string} sourceProjectId
* @param {string} targetProjectId
* @param {string} hash
*/
async function cloneBlob(sourceProjectId, targetProjectId, hash) {
const bucket = config.get('blobStore.projectBucket')
const dst = makeProjectKey(targetProjectId, hash)
const src = makeProjectKey(sourceProjectId, hash)
const info = { targetProjectId, sourceProjectId, hash }
logger.debug(info, 'cloneBlob started')
try {
await persistor.copyObject(bucket, src, dst)
} finally {
logger.debug(info, 'cloneBlob finished')
}
}
async function uploadBlob(projectId, blob, stream, opts = {}) {
const bucket = config.get('blobStore.projectBucket')
const key = makeProjectKey(projectId, blob.getHash())
@@ -178,6 +198,21 @@ class BlobStore {
await this.backend.initialize(this.projectId)
}
/**
* Set up the initial data structure for a given project
*/
async clone(sourceProjectId, onProgress, signal) {
const hashes = await this.backend.clone(sourceProjectId, this.projectId)
onProgress(`blobs-metadata-imported: ${hashes.length}`)
let done = 0
await promiseMapWithLimit(50, hashes, async hash => {
if (signal.aborted) return
await cloneBlob(sourceProjectId, this.projectId, hash)
done++
onProgress(`blobs-copied: ${done}`)
})
}
/**
* Write a blob, if one does not already exist, with the given UTF-8 encoded
* string content.

View File

@@ -46,6 +46,64 @@ async function initialize(projectId) {
}
}
/**
* Copy the data structures for a given project.
* @param {string} sourceProjectId
* @param {string} targetProjectId
*/
async function clone(sourceProjectId, targetProjectId) {
assert.mongoId(targetProjectId, 'bad target projectId')
assert.mongoId(sourceProjectId, 'bad source projectId')
const result = await mongodb.blobs.findOne({
_id: new ObjectId(sourceProjectId),
})
if (!result || !('blobs' in result)) {
throw new Error('missing blobs for source project')
}
const blobHashes = []
for (const bucket of Object.values(result.blobs)) {
for (const record of bucket) {
blobHashes.push(record.h.toString('hex'))
}
}
await mongodb.blobs.updateOne(
{ _id: new ObjectId(targetProjectId) },
{ $set: { blobs: result.blobs } }
)
const minShardedId = makeShardedId(sourceProjectId, '0')
const maxShardedId = makeShardedId(sourceProjectId, 'f')
// @ts-ignore We are using a custom _id here.
const sharded = mongodb.shardedBlobs.find({
_id: { $gte: minShardedId, $lte: maxShardedId },
})
const newShards = [] // gather up-to 16 shards
for await (const shardedRecord of sharded) {
if (shardedRecord.blobs == null) {
continue
}
// Schema of shard id: <projectId>0<shard id: hex>
const shard = shardedRecord._id.toString('hex').slice(25)
const newId = makeShardedId(targetProjectId, shard)
newShards.push({
...shardedRecord,
_id: newId,
})
for (const bucket of Object.values(shardedRecord.blobs)) {
for (const record of bucket) {
blobHashes.push(record.h.toString('hex'))
}
}
}
if (newShards.length > 0) {
// @ts-ignore We are using a custom _id here.
await mongodb.shardedBlobs.insertMany(newShards)
}
return blobHashes
}
/**
* Return blob metadata for the given project and hash.
* @param {string} projectId
@@ -428,6 +486,7 @@ function recordToBlob(record) {
module.exports = {
initialize,
clone,
findBlob,
findBlobs,
getProjectBlobs,

View File

@@ -9,6 +9,32 @@ async function initialize(projectId) {
// Nothing to do for Postgres
}
/**
* Copy the data structures for a given project.
* @param {string} sourceProjectId
* @param {string} targetProjectId
*/
async function clone(sourceProjectId, targetProjectId) {
assert.postgresId(targetProjectId, 'bad target projectId')
assert.postgresId(sourceProjectId, 'bad source projectId')
const result = await knex.raw(
`INSERT INTO project_blobs (
project_id, hash_bytes, byte_length, string_length
)
SELECT ?, hash_bytes, byte_length, string_length
FROM project_blobs
WHERE project_id = ?
RETURNING hash_bytes`,
[parseInt(targetProjectId, 10), parseInt(sourceProjectId, 10)]
)
const blobHashes = []
for (const row of result.rows) {
blobHashes.push(row.hash_bytes.toString('hex'))
}
return blobHashes
}
/**
* Return blob metadata for the given project and hash
*/
@@ -152,6 +178,7 @@ function hashFromBuffer(buffer) {
module.exports = {
initialize,
clone,
findBlob,
findBlobs,
getProjectBlobs,

View File

@@ -38,6 +38,7 @@ const {
ChunkVersionConflictError,
VersionOutOfBoundsError,
} = require('./errors')
const { promiseMapWithLimit } = require('@overleaf/promise-utils')
/**
* @import { Change } from 'overleaf-editor-core'
@@ -78,6 +79,71 @@ async function initializeProject(projectId, snapshot) {
return projectId
}
/**
* Clone the project data.
* @param {string} sourceProjectId
* @param {string} targetProjectId
* @param {(string) => void} onProgress
* @param {AbortSignal} signal
*/
async function cloneProject(
sourceProjectId,
targetProjectId,
onProgress,
signal
) {
assert.projectId(targetProjectId, 'bad target projectId')
assert.projectId(sourceProjectId, 'bad source projectId')
onProgress('existing history: checking')
const backend = getBackend(targetProjectId)
const chunkRecord = await backend.getLatestChunk(targetProjectId)
if (!chunkRecord) {
onProgress('existing history: not found, aborting')
throw new OError('target project is not initialized yet')
}
if (chunkRecord?.endVersion > 0) {
onProgress('existing history: found changes, aborting')
throw new AlreadyInitialized(targetProjectId)
}
onProgress('existing history: deleting empty chunk')
await backend.deleteChunk(targetProjectId, chunkRecord.id)
onProgress('existing history: deleted empty chunk')
async function cloneBlobs() {
onProgress('cloning blobs metadata: pending')
const blobStore = new BlobStore(targetProjectId)
await blobStore.clone(sourceProjectId, onProgress, signal)
onProgress('cloning blobs metadata: done')
}
async function cloneChunks() {
onProgress('cloning chunks metadata: pending')
const chunkIds = await backend.clone(sourceProjectId, targetProjectId)
onProgress(`chunks-metadata-imported: ${chunkIds.size}`)
let done = 0
await promiseMapWithLimit(
50,
Array.from(chunkIds.entries()),
async ([sourceChunkId, targetChunkId]) => {
if (signal.aborted) return
await historyStore.cloneChunk(
sourceProjectId,
sourceChunkId,
targetProjectId,
targetChunkId
)
done++
onProgress(`chunks-copied: ${done}`)
}
)
onProgress('cloning chunks metadata: done')
}
await Promise.all([cloneBlobs(), cloneChunks()])
}
/**
* Load the blobs referenced in the given history
*/
@@ -619,6 +685,7 @@ class AlreadyInitialized extends OError {
module.exports = {
getBackend,
initializeProject,
cloneProject,
loadLatest,
getLatestChunkMetadata,
loadAtVersion,

View File

@@ -136,6 +136,43 @@ async function getProjectChunks(projectId) {
return await cursor.map(chunkFromRecord).toArray()
}
/**
* Copy the data structures for a given project.
* @param {string} sourceProjectId
* @param {string} targetProjectId
*/
async function clone(sourceProjectId, targetProjectId) {
assert.mongoId(targetProjectId, 'bad target projectId')
assert.mongoId(sourceProjectId, 'bad source projectId')
const cursor = mongodb.chunks.find(
{
projectId: new ObjectId(sourceProjectId),
state: { $in: ['active', 'closed'] },
},
{ projection: { projectId: 0 } }
)
const chunkIds = new Map()
const batch = []
async function flushBatch() {
await mongodb.chunks.insertMany(batch)
batch.length = 0
}
for await (const chunk of cursor) {
const newChunkId = new ObjectId()
chunkIds.set(chunk._id.toString(), newChunkId.toString())
batch.push({
...chunk,
_id: newChunkId,
projectId: new ObjectId(targetProjectId),
})
if (batch.length > 100) await flushBatch()
}
if (batch.length > 0) await flushBatch()
return chunkIds
}
/**
* Insert a pending chunk before sending it to object storage.
*/
@@ -477,6 +514,7 @@ function chunkFromRecord(record) {
}
module.exports = {
clone,
getLatestChunk,
getChunkForVersion,
getChunkForTimestamp,

View File

@@ -135,6 +135,47 @@ async function getProjectChunks(projectId) {
.orderBy('end_version')
return records.map(chunkFromRecord)
}
/**
* Copy the data structures for a given project.
* @param {string} sourceProjectId
* @param {string} targetProjectId
*/
async function clone(sourceProjectId, targetProjectId) {
assert.postgresId(targetProjectId, 'bad target projectId')
assert.postgresId(sourceProjectId, 'bad source projectId')
const cursor = knex('chunks')
.select()
.where('doc_id', parseInt(sourceProjectId, 10))
.stream()
const chunkIds = new Map()
const batch = []
async function flushBatch() {
const newIds = await knex.raw(
"SELECT nextval('chunks_id_seq'::regclass)::integer AS chunk_id FROM generate_series(1, ?)",
batch.length
)
const newRecords = []
for (const [i, chunk] of batch.entries()) {
const newId = newIds.rows[i].chunk_id
chunkIds.set(chunk.id.toString(), newId.toString())
newRecords.push({
...chunk,
id: newId,
doc_id: parseInt(targetProjectId, 10),
})
}
await knex('chunks').insert(newRecords)
batch.length = 0
}
for await (const chunk of cursor) {
batch.push(chunk)
if (batch.length > 100) await flushBatch()
}
if (batch.length > 0) await flushBatch()
return chunkIds
}
/**
* Insert a pending chunk before sending it to object storage.
@@ -330,7 +371,7 @@ async function _closeChunk(tx, projectId, chunkId) {
*/
async function deleteChunk(projectId, chunkId) {
assert.postgresId(projectId, 'bad projectId')
assert.integer(chunkId, 'bad chunkId')
assert.chunkId(chunkId, 'bad chunkId')
await _deleteChunks(knex, {
doc_id: parseInt(projectId, 10),
@@ -422,6 +463,7 @@ async function resolveHistoryIdToMongoProjectId(projectId) {
}
module.exports = {
clone,
getLatestChunk,
getChunkForVersion,
getChunkForTimestamp,

View File

@@ -176,6 +176,46 @@ class HistoryStore {
}
}
/**
* Compress and store a {@link History}.
*
* @param {string} sourceProjectId
* @param {string} sourceChunkId
* @param {string} targetProjectId
* @param {string} targetChunkId
*/
async cloneChunk(
sourceProjectId,
sourceChunkId,
targetProjectId,
targetChunkId
) {
assert.projectId(targetProjectId, 'bad target projectId')
assert.projectId(sourceProjectId, 'bad source projectId')
assert.chunkId(targetChunkId, 'bad chunkId')
assert.chunkId(sourceChunkId, 'bad chunkId')
const dstKey = getKey(targetProjectId, targetChunkId)
const srcKey = getKey(sourceProjectId, sourceChunkId)
const info = {
targetProjectId,
sourceProjectId,
sourceChunkId,
targetChunkId,
srcKey,
dstKey,
}
logger.debug(info, 'cloneChunk started')
try {
await this.#persistor.copyObject(this.#bucket, srcKey, dstKey)
} catch (err) {
throw new StoreError(sourceProjectId, sourceChunkId, err)
} finally {
logger.debug(info, 'cloneChunk finished')
}
}
/**
* Delete multiple chunks from bucket. Expects an Array of objects with
* projectId and chunkId properties

View File

@@ -86,6 +86,23 @@ async function recordSyncStart(projectId) {
)
}
/**
* @param {string} sourceProjectId
* @param {string} targetProjectId
* @return {Promise<void>}
*/
async function cloneFailure(sourceProjectId, targetProjectId) {
const failure = await db.projectHistoryFailures.findOne(
{ project_id: sourceProjectId.toString() },
{ projection: { _id: 0, project_id: 0 } }
)
if (!failure) return
await db.projectHistoryFailures.insertOne({
...failure,
project_id: targetProjectId.toString(),
})
}
/**
* @param projectId
*/
@@ -238,6 +255,7 @@ async function getFailures() {
const getFailedProjectsCb = callbackify(getFailedProjects)
const getFailureRecordCb = callbackify(getFailureRecord)
const getFailuresCb = callbackify(getFailures)
const cloneFailureCb = callbackify(cloneFailure)
const getLastFailureCb = callbackify(getLastFailure)
const recordCb = callbackify(record)
const clearErrorCb = callbackify(clearError)
@@ -245,6 +263,7 @@ const recordSyncStartCb = callbackify(recordSyncStart)
const setForceDebugCb = callbackify(setForceDebug)
export {
cloneFailureCb as cloneFailure,
getFailedProjectsCb as getFailedProjects,
getFailureRecordCb as getFailureRecord,
getLastFailureCb as getLastFailure,
@@ -257,6 +276,7 @@ export {
export const promises = {
getFailedProjects,
cloneFailure,
getFailureRecord,
getLastFailure,
getFailures,

View File

@@ -17,7 +17,7 @@ import * as Errors from './Errors.js'
import * as LocalFileWriter from './LocalFileWriter.js'
import * as HashManager from './HashManager.js'
import * as HistoryBlobTranslator from './HistoryBlobTranslator.js'
import { promisifyMultiResult } from '@overleaf/promise-utils'
import { callbackify, promisifyMultiResult } from '@overleaf/promise-utils'
const HTTP_REQUEST_TIMEOUT = Settings.overleaf.history.requestTimeout
@@ -531,6 +531,18 @@ export function initializeProject(historyId, callback) {
)
}
async function _cloneProject(sourceProjectId, targetProjectId, signal) {
return await fetchStream(
`${Settings.overleaf.history.host}/projects/${sourceProjectId}/clone`,
{
method: 'POST',
json: { targetProjectId },
...getHistoryFetchOptions(),
signal,
}
)
}
export function deleteProject(projectId, callback) {
_requestHistoryService(
{ method: 'DELETE', path: `projects/${projectId}` },
@@ -623,6 +635,8 @@ function _requestHistoryService(options, callback) {
})
}
export const cloneProject = callbackify(_cloneProject)
export const promises = {
/** @type {(projectId: string, historyId: string) => Promise<{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}>} */
getMostRecentChunk: promisify(getMostRecentChunk),
@@ -640,4 +654,5 @@ export const promises = {
createBlobForUpdate: promisify(createBlobForUpdate),
initializeProject: promisify(initializeProject),
deleteProject: promisify(deleteProject),
cloneProject: _cloneProject,
}

View File

@@ -14,12 +14,114 @@ import * as LabelsManager from './LabelsManager.js'
import * as HistoryApiManager from './HistoryApiManager.js'
import * as RetryManager from './RetryManager.js'
import * as FlushManager from './FlushManager.js'
import { pipeline } from 'node:stream'
import Stream, { pipeline } from 'node:stream'
import { fetchNothing, RequestFailedError } from '@overleaf/fetch-utils'
import { z, zz, parseReq } from '@overleaf/validation-tools'
import { IncrementalResponse } from '@overleaf/stream-utils'
const ONE_DAY_IN_SECONDS = 24 * 60 * 60
const cloneProjectSchema = z.object({
body: z.object({
targetProjectId: z.string(),
}),
params: z.object({
project_id: z.string(),
}),
})
export function cloneProject(req, res) {
const {
params: { project_id: sourceProjectId },
body: { targetProjectId },
} = parseReq(req, cloneProjectSchema)
const incrResp = new IncrementalResponse({
res,
timeout: 10 * 60_000 - 5_000,
logger,
label: 'clone history in project-history',
info: { targetProjectId, sourceProjectId },
})
WebApiManager.getHistoryId(targetProjectId, (err, targetHistoryId) => {
if (err) return incrResp.fail(OError.tag(err, 'get target historyId'))
WebApiManager.getHistoryId(sourceProjectId, (err, sourceHistoryId) => {
if (err) return incrResp.fail(OError.tag(err, 'get source historyId'))
incrResp.sendUpdate('cloning full project history data: pending')
HistoryStoreManager.cloneProject(
sourceHistoryId.toString(),
targetHistoryId.toString(),
incrResp.signal(),
(err, stream) => {
if (err) {
incrResp.fail(OError.tag(err, 'clone history-v1 data'))
return
}
// aborted. pipeline() would throw.
if (res.destroyed) {
stream.destroy()
incrResp.fail(new Error('request aborted'))
return
}
// The stream.pipeline callback API does not support options.
Stream.promises.pipeline(stream, res, { end: false }).then(
() => {
incrResp.sendUpdate('clone labels: pending')
LabelsManager.cloneLabels(
sourceProjectId,
targetProjectId,
err => {
if (err) {
incrResp.fail(OError.tag(err, 'clone labels'))
return
}
incrResp.sendUpdate('clone labels: done')
incrResp.sendUpdate('clone resync state: pending')
SyncManager.cloneResyncState(
sourceProjectId,
targetProjectId,
err => {
if (err) {
incrResp.fail(OError.tag(err, 'clone resync state'))
return
}
incrResp.sendUpdate('clone resync state: done')
incrResp.sendUpdate('clone failure record: pending')
ErrorRecorder.cloneFailure(
sourceProjectId,
targetProjectId,
err => {
if (err) {
incrResp.fail(OError.tag(err, 'clone failure'))
return
}
incrResp.sendUpdate('clone failure record: done')
incrResp.sendUpdate('done')
incrResp.end()
}
)
}
)
}
)
},
err => {
incrResp.fail(OError.tag(err, 'stream history-v1 response'))
}
)
}
)
})
})
}
const getProjectBlobSchema = z.object({
params: z.object({
history_id: zz.objectId().or(z.coerce.number()),

View File

@@ -4,6 +4,26 @@ import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as WebApiManager from './WebApiManager.js'
export function cloneLabels(sourceProjectId, targetProjectId, callback) {
db.projectHistoryLabels
.find({ project_id: new ObjectId(sourceProjectId) })
.project({ _id: 0, project_id: 0 })
.toArray((err, labels) => {
if (err) return callback(OError.tag(err))
if (labels.length === 0) return callback()
db.projectHistoryLabels.insertMany(
labels.map(label => ({
...label,
project_id: new ObjectId(targetProjectId),
})),
err => {
if (err) return callback(OError.tag(err))
callback()
}
)
})
}
export function getLabels(projectId, callback) {
_toObjectId(projectId, function (error, projectId) {
if (error) {

View File

@@ -83,6 +83,8 @@ export function initialize(app) {
app.get('/project/:history_id/blob/:hash', HttpController.getProjectBlob)
app.post('/project/:project_id/clone', HttpController.cloneProject)
app.get('/status/failures', HttpController.getFailures)
app.get('/status/queue', HttpController.getQueueCounts)

View File

@@ -109,6 +109,23 @@ async function startResyncWithoutLock(projectId, options) {
await setResyncState(projectId, syncState)
}
/**
* @param {string} sourceProjectId
* @param {string} targetProjectId
* @return {Promise<void>}
*/
async function cloneResyncState(sourceProjectId, targetProjectId) {
const rawSyncState = await db.projectHistorySyncState.findOne(
{ project_id: new ObjectId(sourceProjectId) },
{ projection: { _id: 0, project_id: 0 } }
)
if (!rawSyncState) return
await db.projectHistorySyncState.insertOne({
...rawSyncState,
project_id: new ObjectId(targetProjectId),
})
}
/**
* @param {string} projectId
* @return {Promise<SyncState>}
@@ -1329,6 +1346,7 @@ function trackingDirectivesEqual(a, b) {
// EXPORTS
const cloneResyncStateCb = callbackify(cloneResyncState)
const getResyncStateCb = callbackify(getResyncState)
const startResyncCb = callbackify(startResync)
const startResyncWithoutLockCb = callbackify(startResyncWithoutLock)
@@ -1373,6 +1391,7 @@ const expandSyncUpdatesCb = (
}
export {
cloneResyncStateCb as cloneResyncState,
getResyncStateCb as getResyncState,
startResyncCb as startResync,
startResyncWithoutLockCb as startResyncWithoutLock,
@@ -1384,6 +1403,7 @@ export {
}
export const promises = {
cloneResyncState,
getResyncState,
startResync,
startResyncWithoutLock,

View File

@@ -82,6 +82,29 @@ async function getAllDocs(projectId) {
}
}
/**
* @param {string} projectId
*/
async function getAllDocsWithRanges(projectId) {
const url = new URL(settings.apis.docstore.url)
url.pathname = path.posix.join(
'project',
projectId.toString(),
'doc-with-ranges'
)
try {
return await fetchJson(url, { signal: AbortSignal.timeout(TIMEOUT) })
} catch (error) {
if (error instanceof RequestFailedError) {
throw new OError('docstore api responded with non-success code', {
projectId,
status: error.response.status,
})
}
throw error
}
}
/**
*
* @param {string|ObjectId} projectId
@@ -395,6 +418,7 @@ export default {
deleteDoc,
getAllDocVersions,
getAllDocs,
getAllDocsWithRanges,
getAllDeletedDocs,
getAllRanges,
getDoc,

View File

@@ -2,6 +2,7 @@ import { callbackify } from 'node:util'
import {
fetchJson,
fetchNothing,
fetchStream,
fetchStreamWithResponse,
RequestFailedError,
} from '@overleaf/fetch-utils'
@@ -59,6 +60,17 @@ async function initializeProject(projectId) {
return historyId
}
async function cloneProject(sourceProjectId, targetProjectId) {
return await fetchStream(
`${settings.apis.project_history.url}/project/${sourceProjectId}/clone`,
{
method: 'POST',
json: { targetProjectId },
signal: AbortSignal.timeout(10 * 60_000),
}
)
}
async function flushProject(projectId) {
try {
await fetchNothing(
@@ -460,6 +472,7 @@ export default {
getChanges: callbackify(getChanges),
promises: {
initializeProject,
cloneProject,
flushProject,
resyncProject,
deleteProject,

View File

@@ -263,12 +263,17 @@ const _ProjectController = {
res.setTimeout(5 * 60 * 1000) // allow extra time for the copy to complete
metrics.inc('cloned-project')
const projectId = req.params.Project_id
const { projectName, isDebugCopy, tags } = req.body
let { projectName, isDebugCopy, cloneHistory, cloneRanges, tags } = req.body
const currentUser = SessionManager.getSessionUser(req.session)
if (!hasAdminAccess(currentUser)) {
isDebugCopy = false
cloneHistory = false
cloneRanges = false
}
logger.debug({ projectId, projectName, isDebugCopy }, 'cloning project')
if (!SessionManager.isUserLoggedIn(req.session)) {
return res.json({ redir: '/register' })
}
const currentUser = SessionManager.getSessionUser(req.session)
const { first_name: firstName, last_name: lastName, email } = currentUser
try {
const project = await ProjectDuplicator.promises.duplicate(
@@ -276,7 +281,7 @@ const _ProjectController = {
projectId,
projectName,
tags,
isDebugCopy
{ isDebugCopy, cloneHistory, cloneRanges }
)
ProjectAuditLogHandler.addEntryIfManagedInBackground(
projectId,

View File

@@ -38,7 +38,11 @@ async function duplicate(
originalProjectId,
newProjectName,
tags = [],
isDebugCopy
opts = {
isDebugCopy: false,
cloneHistory: false,
cloneRanges: false,
}
) {
await DocumentUpdaterHandler.promises.flushProjectToMongo(originalProjectId)
const originalProject = await ProjectGetter.promises.getProject(
@@ -54,7 +58,7 @@ async function duplicate(
}
)
const { path: rootDocPath } = await ProjectLocator.promises.findRootDoc({
project_id: originalProjectId,
project: originalProject,
})
const originalEntries = _getFolderEntries(originalProject.rootFolder[0])
@@ -69,7 +73,7 @@ async function duplicate(
})
const attributes = {}
if (isDebugCopy) {
if (opts.isDebugCopy) {
attributes.isDebugCopyOf = originalProjectId
// - Create new tag on owner._id if it doesn't already exist
const debugTag = await TagsHandler.promises.createTag(
@@ -101,6 +105,18 @@ async function duplicate(
// remove any leading or trailing spaces
newProjectName = newProjectName.trim()
if (
opts.cloneHistory &&
typeof originalProject.overleaf?.history?.id === 'number'
) {
// Obtain an old history id. We want to store the data in the same DB.
const newHistoryId = parseInt(
await HistoryManager.promises.initializeProject(),
10
)
attributes.overleaf = { history: { id: newHistoryId } }
}
// Now create the new project, cleaning it up on failure if necessary
const newProject = await ProjectCreationHandler.promises.createBlankProject(
owner._id,
@@ -129,8 +145,18 @@ async function duplicate(
originalProject.compiler
)
const [docEntries, fileEntries] = await Promise.all([
_copyDocs(originalEntries.docEntries, originalProject, newProject),
_copyFiles(originalEntries.fileEntries, originalProject, newProject),
_copyDocs(
originalEntries.docEntries,
originalProject,
newProject,
opts.cloneRanges
),
_copyFiles(
originalEntries.fileEntries,
originalProject,
newProject,
opts.cloneHistory
),
])
const projectVersion =
await ProjectEntityMongoUpdateHandler.promises.createNewFolderStructure(
@@ -139,17 +165,23 @@ async function duplicate(
fileEntries
)
// Silently ignore the rootDoc in case it's not valid per the new limits.
// Ignore the new limits in case we are creating a debug copoy.
if (
rootDocPath &&
ProjectEntityUpdateHandler.isPathValidForRootDoc(rootDocPath.fileSystem)
(ProjectEntityUpdateHandler.isPathValidForRootDoc(
rootDocPath.fileSystem
) ||
opts.isDebugCopy)
) {
await _setRootDoc(newProject._id, rootDocPath.fileSystem)
}
await _notifyDocumentUpdater(newProject, owner._id, {
newFiles: fileEntries,
newDocs: docEntries,
newProject: { version: projectVersion },
})
if (!opts.cloneHistory) {
await _notifyDocumentUpdater(newProject, owner._id, {
newFiles: fileEntries,
newDocs: docEntries,
newProject: { version: projectVersion },
})
}
await TpdsProjectFlusher.promises.flushProjectToTpds(newProject._id)
if (tags?.length > 0) {
@@ -218,33 +250,50 @@ function _getFolderEntries(folder, folderPath = '/') {
return { docEntries, fileEntries }
}
async function _copyDocs(sourceEntries, sourceProject, targetProject) {
const docLinesById = await _getDocLinesForProject(sourceProject._id)
async function _copyDocs(
sourceEntries,
sourceProject,
targetProject,
cloneRanges
) {
const docsById = await _getDocContentForProject(
sourceProject._id,
cloneRanges
)
const targetEntries = []
for (const sourceEntry of sourceEntries) {
const sourceDoc = sourceEntry.doc
const path = sourceEntry.path
const doc = new Doc({ name: sourceDoc.name })
const docLines = docLinesById.get(sourceDoc._id.toString())
const { lines, ranges } = docsById.get(sourceDoc._id.toString())
await DocstoreManager.promises.updateDoc(
targetProject._id.toString(),
doc._id.toString(),
docLines,
lines,
0,
{}
ranges || {}
)
targetEntries.push({ doc, path, docLines: docLines.join('\n') })
targetEntries.push({ doc, path, docLines: lines.join('\n') })
}
return targetEntries
}
async function _getDocLinesForProject(projectId) {
const docs = await DocstoreManager.promises.getAllDocs(projectId)
const docLinesById = new Map(docs.map(doc => [doc._id, doc.lines]))
return docLinesById
async function _getDocContentForProject(projectId, cloneRanges) {
let docs
if (cloneRanges) {
docs = await DocstoreManager.promises.getAllDocsWithRanges(projectId)
} else {
docs = await DocstoreManager.promises.getAllDocs(projectId)
}
return new Map(docs.map(doc => [doc._id, doc]))
}
async function _copyFiles(sourceEntries, sourceProject, targetProject) {
async function _copyFiles(
sourceEntries,
sourceProject,
targetProject,
cloneHistory
) {
const sourceHistoryId = sourceProject.overleaf?.history?.id
const targetHistoryId = targetProject.overleaf?.history?.id
if (!sourceHistoryId) {
@@ -268,6 +317,10 @@ async function _copyFiles(sourceEntries, sourceProject, targetProject) {
file.linkedFileData = sourceFile.linkedFileData
file.created = sourceFile.created
}
if (cloneHistory) {
// All blobs will be cloned in bulk. Do not clone each individually.
return { createdBlob: true, file, path }
}
try {
await HistoryManager.promises.copyBlob(
sourceHistoryId,

View File

@@ -185,11 +185,10 @@ const ModeSwitcherToggleButtonContent = forwardRef<
onClick(event)
}}
aria-expanded={ariaExpanded}
aria-label={label}
>
<MaterialIcon type={iconType} />
<div className="review-mode-switcher-toggle-label" aria-label={label}>
{label}
</div>
<div className="review-mode-switcher-toggle-label">{label}</div>
<MaterialIcon type="keyboard_arrow_down" />
</button>
)

View File

@@ -190,7 +190,7 @@ function fetchJSON<T>(
})
}
async function parseResponseBody(response: Response) {
export async function parseResponseBody(response: Response) {
const contentType = response.headers.get('Content-Type')
if (!contentType) {