diff --git a/libraries/overleaf-editor-core/lib/change.js b/libraries/overleaf-editor-core/lib/change.js index 35183b9159..15c61d6807 100644 --- a/libraries/overleaf-editor-core/lib/change.js +++ b/libraries/overleaf-editor-core/lib/change.js @@ -74,6 +74,14 @@ class Change { static fromRaw(raw) { if (!raw) return null + return Change.mustFromRaw(raw) + } + + /** + * @param {RawChange} raw + * @return {Change} + */ + static mustFromRaw(raw) { assert.array.of.object(raw.operations, 'bad raw.operations') assert.nonEmptyString(raw.timestamp, 'bad raw.timestamp') diff --git a/libraries/overleaf-editor-core/lib/types.ts b/libraries/overleaf-editor-core/lib/types.ts index a393e7fcaa..84db04c5cd 100644 --- a/libraries/overleaf-editor-core/lib/types.ts +++ b/libraries/overleaf-editor-core/lib/types.ts @@ -59,10 +59,10 @@ export type RawChange = { operations: RawOperation[] timestamp: string authors?: (number | null)[] - v2Authors: string[] - origin: RawOrigin - projectVersion: string - v2DocVersions: RawV2DocVersions + v2Authors?: string[] + origin?: RawOrigin + projectVersion?: string + v2DocVersions?: RawV2DocVersions } export type RawOperation = diff --git a/package-lock.json b/package-lock.json index 5718d4b2e2..dfdbf36c1b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -50366,6 +50366,7 @@ "dockerode": "^4.0.9", "express": "4.22.1", "lodash": "^4.17.21", + "overleaf-editor-core": "*", "p-limit": "^3.1.0", "request": "2.88.2", "send": "^0.19.0", diff --git a/services/clsi/Dockerfile b/services/clsi/Dockerfile index 0dbb4b382a..0cbe57620b 100644 --- a/services/clsi/Dockerfile +++ b/services/clsi/Dockerfile @@ -21,6 +21,7 @@ COPY libraries/fetch-utils/package.json /overleaf/libraries/fetch-utils/package. COPY libraries/logger/package.json /overleaf/libraries/logger/package.json COPY libraries/metrics/package.json /overleaf/libraries/metrics/package.json COPY libraries/o-error/package.json /overleaf/libraries/o-error/package.json +COPY libraries/overleaf-editor-core/package.json /overleaf/libraries/overleaf-editor-core/package.json COPY libraries/promise-utils/package.json /overleaf/libraries/promise-utils/package.json COPY libraries/settings/package.json /overleaf/libraries/settings/package.json COPY libraries/stream-utils/package.json /overleaf/libraries/stream-utils/package.json @@ -32,6 +33,7 @@ COPY libraries/fetch-utils/ /overleaf/libraries/fetch-utils/ COPY libraries/logger/ /overleaf/libraries/logger/ COPY libraries/metrics/ /overleaf/libraries/metrics/ COPY libraries/o-error/ /overleaf/libraries/o-error/ +COPY libraries/overleaf-editor-core/ /overleaf/libraries/overleaf-editor-core/ COPY libraries/promise-utils/ /overleaf/libraries/promise-utils/ COPY libraries/settings/ /overleaf/libraries/settings/ COPY libraries/stream-utils/ /overleaf/libraries/stream-utils/ diff --git a/services/clsi/Makefile b/services/clsi/Makefile index 9b6dcdb5f0..f873e2fa27 100644 --- a/services/clsi/Makefile +++ b/services/clsi/Makefile @@ -20,6 +20,7 @@ IMAGE_CACHE ?= $(IMAGE_REPO):cache-$(shell cat \ $(MONOREPO)/libraries/logger/package.json \ $(MONOREPO)/libraries/metrics/package.json \ $(MONOREPO)/libraries/o-error/package.json \ + $(MONOREPO)/libraries/overleaf-editor-core/package.json \ $(MONOREPO)/libraries/promise-utils/package.json \ $(MONOREPO)/libraries/settings/package.json \ $(MONOREPO)/libraries/stream-utils/package.json \ diff --git a/services/clsi/app/js/CLSICacheHandler.js b/services/clsi/app/js/CLSICacheHandler.js index 29a5eaf035..0f97a889b1 100644 --- a/services/clsi/app/js/CLSICacheHandler.js +++ b/services/clsi/app/js/CLSICacheHandler.js @@ -220,15 +220,45 @@ function notifyCLSICacheAboutBuild({ enqueue([{ path: 'output.tar.gz' }]) }) .catch(err => { + if (isENOENT(err)) return logger.warn( { err, projectId, userId, buildId, shard }, 'build output.tar.gz for clsi-cache failed' ) }) + copyHistorySnapshot({ projectId, userId, buildId }) + .then(() => { + enqueue([{ path: 'history-resync.json.gz' }]) + }) + .catch(err => { + if (isENOENT(err)) return + logger.warn( + { err, projectId, userId, buildId, shard }, + 'copy history-resync.json.gz for clsi-cache failed' + ) + }) + return shard } +/** + * @param {Object} opts + * @param {string} opts.projectId + * @param {string} opts.userId + * @param {string} opts.buildId + * @return {Promise} + */ +async function copyHistorySnapshot({ projectId, userId, buildId }) { + const src = Path.join( + Settings.path.clsiCacheDir, + userId ? `${projectId}-${userId}` : projectId, + 'history.json.gz' + ) + const outputDir = getOutputDir({ projectId, userId, buildId }) + const dst = Path.join(outputDir, 'history-resync.json.gz') + await fs.promises.cp(src, dst) +} /** * @param {Object} opts * @param {string} opts.projectId @@ -239,12 +269,7 @@ function notifyCLSICacheAboutBuild({ */ async function buildTarball({ projectId, userId, buildId, outputFiles }) { const timer = new Metrics.Timer('clsi_cache_build', 1, {}, TIMING_BUCKETS) - const outputDir = Path.join( - Settings.path.outputDir, - userId ? `${projectId}-${userId}` : projectId, - CACHE_SUBDIR, - buildId - ) + const outputDir = getOutputDir({ projectId, userId, buildId }) const files = outputFiles.filter(f => !isExtraneousFile(f.path)) if (files.length > MAX_ENTRIES_IN_OUTPUT_TAR) { @@ -287,6 +312,33 @@ async function downloadOutputDotSynctexFromCompileCache( buildId, outputDir ) { + const requestPath = `/project/${projectId}/${ + userId ? `user/${userId}/` : '' + }build/${editorId}-${buildId}/search/output/output.synctex.gz` + return await downloadSingleFile(projectId, requestPath, outputDir, 'synctex') +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {string} cacheDir + * @return {Promise} + */ +async function downloadHistorySnapshot(projectId, userId, cacheDir) { + const requestPath = `/project/${projectId}/${ + userId ? `user/${userId}/` : '' + }latest/output/history-resync.json.gz` + return await downloadSingleFile(projectId, requestPath, cacheDir, 'snapshot') +} + +/** + * @param {string} projectId + * @param {string} requestPath + * @param {string} outputDir + * @param {string} label + * @return {Promise} + */ +async function downloadSingleFile(projectId, requestPath, outputDir, label) { if (!Settings.apis.clsiCache.enabled) return false if (!OBJECT_ID_REGEX.test(projectId)) return false const shardCfg = getAvailableShard(projectId) @@ -296,20 +348,17 @@ async function downloadOutputDotSynctexFromCompileCache( const timer = new Metrics.Timer( 'clsi_cache_download', 1, - { method: 'synctex' }, + { method: label }, TIMING_BUCKETS ) + const u = new URL(url) + u.pathname = requestPath let stream try { - stream = await fetchStream( - `${url}/project/${projectId}/${ - userId ? `user/${userId}/` : '' - }build/${editorId}-${buildId}/search/output/output.synctex.gz`, - { - method: 'GET', - signal: AbortSignal.timeout(TIMEOUT), - } - ) + stream = await fetchStream(u, { + method: 'GET', + signal: AbortSignal.timeout(TIMEOUT), + }) } catch (err) { if (err instanceof RequestFailedError && err.response.status === 404) { closeCircuitBreaker(url) @@ -321,13 +370,14 @@ async function downloadOutputDotSynctexFromCompileCache( throw OError.tag(err, 'download failed', { shard }) } await fs.promises.mkdir(outputDir, { recursive: true }) - const dst = Path.join(outputDir, 'output.synctex.gz') + const name = Path.basename(requestPath) + const dst = Path.join(outputDir, name) const tmp = dst + crypto.randomUUID() try { await pipeline( stream, new MeteredStream(Metrics, 'clsi_cache_egress', { - path: 'output.synctex.gz', + path: name, }), fs.createWriteStream(tmp) ) @@ -437,8 +487,25 @@ async function downloadLatestCompileCache(projectId, userId, compileDir) { return !abort } +/** + * @param {Object} opts + * @param {string} opts.projectId + * @param {string} opts.userId + * @param {string} opts.buildId + * @return {string} + */ +function getOutputDir({ projectId, userId, buildId }) { + return Path.join( + Settings.path.outputDir, + userId ? `${projectId}-${userId}` : projectId, + CACHE_SUBDIR, + buildId + ) +} + /** * @param {unknown} err + * @return {boolean} */ function isENOENT(err) { return err instanceof Error && 'code' in err && err.code === 'ENOENT' @@ -447,5 +514,6 @@ function isENOENT(err) { export default { notifyCLSICacheAboutBuild, downloadLatestCompileCache, + downloadHistorySnapshot, downloadOutputDotSynctexFromCompileCache, } diff --git a/services/clsi/app/js/CompileController.js b/services/clsi/app/js/CompileController.js index c0f9d61248..836efe2b58 100644 --- a/services/clsi/app/js/CompileController.js +++ b/services/clsi/app/js/CompileController.js @@ -40,7 +40,7 @@ function compile(req, res, next) { stats, timings, (error, result) => { - let { buildId, outputFiles } = result || {} + let { buildId, outputFiles, baseHistoryVersion } = result || {} let code, status if (outputFiles == null) { outputFiles = [] @@ -50,7 +50,7 @@ function compile(req, res, next) { status = 'compile-in-progress' } else if (error instanceof Errors.FilesOutOfSyncError) { code = 409 // Http 409 Conflict - status = 'retry' + status = 'conflict' logger.warn( { projectId: request.project_id, @@ -58,6 +58,10 @@ function compile(req, res, next) { }, 'files out of sync, please retry' ) + } else if (error instanceof Errors.MissingUpdatesError) { + code = 409 + status = 'missing-updates' + baseHistoryVersion = error.info.baseHistoryVersion } else if ( error?.code === 'EPIPE' || error instanceof Errors.TooManyCompileRequestsError @@ -146,6 +150,7 @@ function compile(req, res, next) { compile: { status, error: error?.message || error, + baseHistoryVersion, stats, timings, buildId, diff --git a/services/clsi/app/js/CompileManager.js b/services/clsi/app/js/CompileManager.js index f17ee5d3f4..4cb9132a28 100644 --- a/services/clsi/app/js/CompileManager.js +++ b/services/clsi/app/js/CompileManager.js @@ -22,6 +22,7 @@ import StatsManager from './StatsManager.js' import SafeReader from './SafeReader.js' import LatexMetrics from './LatexMetrics.js' import { callbackifyMultiResult } from '@overleaf/promise-utils' +import * as HistoryResourceWriter from './HistoryResourceWriter.js' const { downloadLatestCompileCache, downloadOutputDotSynctexFromCompileCache } = CLSICacheHandler @@ -104,13 +105,24 @@ async function doCompile(request, stats, timings) { 'syncing resources to disk' ) - let resourceList + let resourceList, baseHistoryVersion try { - // NOTE: resourceList is insecure, it should only be used to exclude files from the output list - resourceList = await ResourceWriter.promises.syncResourcesToDisk( - request, - compileDir - ) + if (request.historyId) { + ;({ resourceList, baseHistoryVersion } = + await HistoryResourceWriter.syncResourcesToDisk( + projectId, + userId, + request, + compileDir, + timings + )) + } else { + // NOTE: resourceList is insecure, it should only be used to exclude files from the output list + resourceList = await ResourceWriter.promises.syncResourcesToDisk( + request, + compileDir + ) + } } catch (error) { if (error instanceof Errors.FilesOutOfSyncError) { OError.tag(error, 'files out of sync, please retry', { @@ -326,7 +338,7 @@ async function doCompile(request, stats, timings) { ) } - return { outputFiles, buildId } + return { outputFiles, buildId, baseHistoryVersion } } async function _saveOutputFiles({ @@ -837,6 +849,7 @@ function _emitMetrics(request, status, stats, timings) { if (timings.compileE2E != null) { ClsiMetrics.e2eCompileDurationSeconds.observe( { + compileFromHistory: !!request.historyId, compile: request.metricsOpts.compile, group: request.compileGroup, }, diff --git a/services/clsi/app/js/Errors.js b/services/clsi/app/js/Errors.js index 83f3f04299..bf20bb3c90 100644 --- a/services/clsi/app/js/Errors.js +++ b/services/clsi/app/js/Errors.js @@ -33,6 +33,7 @@ export class TimedOutError extends OError {} export class NoXrefTableError extends OError {} export class TooManyCompileRequestsError extends OError {} export class InvalidParameter extends OError {} +export class MissingUpdatesError extends OError {} export default { QueueLimitReachedError, @@ -43,4 +44,5 @@ export default { NoXrefTableError, TooManyCompileRequestsError, InvalidParameter, + MissingUpdatesError, } diff --git a/services/clsi/app/js/HistoryResourceWriter.js b/services/clsi/app/js/HistoryResourceWriter.js new file mode 100644 index 0000000000..8dcf92fbf0 --- /dev/null +++ b/services/clsi/app/js/HistoryResourceWriter.js @@ -0,0 +1,564 @@ +// @ts-check +import logger from '@overleaf/logger' +import zlib from 'node:zlib' +import Settings from '@overleaf/settings' +import Path from 'node:path' +import fs from 'node:fs' +import CLSICacheHandler from './CLSICacheHandler.js' +import Errors from './Errors.js' +import { callbackify, promisify } from 'node:util' +import { + AddFileOperation, + Change, + EditFileOperation, + File, + MoveFileOperation, + Snapshot, +} from 'overleaf-editor-core' +import { fetchString, RequestFailedError } from '@overleaf/fetch-utils' +import { setTimeout } from 'node:timers/promises' +import ResourceWriter from './ResourceWriter.js' +import UrlCache from './UrlCache.js' +import OError from '@overleaf/o-error' +import ClsiMetrics from './Metrics.js' +import { promiseMapSettledWithLimit } from '@overleaf/promise-utils' + +const gzip = promisify(zlib.gzip) +const gunzip = promisify(zlib.gunzip) + +export const clearCacheCb = callbackify(clearCache) + +/** + * @param {string} projectId + * @param {string} userId + * @return {Promise} + */ +export async function clearCache(projectId, userId) { + const { dir } = snapshotPath(projectId, userId) + try { + await fs.promises.rm(dir, { recursive: true, force: true }) + } catch (err) { + if (isENOENT(err)) return + logger.warn( + { err, projectId, userId }, + 'compile from cache: failed to clear history cache' + ) + } +} + +/** + * @param {string} projectId + * @param {string} userId + * @return {{ dir: string, path: string, resyncPath: string }} + */ +function snapshotPath(projectId, userId) { + const dir = Path.join( + Settings.path.clsiCacheDir, + userId ? `${projectId}-${userId}` : projectId + ) + + const path = Path.join(dir, 'history.json.gz') + const resyncPath = Path.join(dir, 'history-resync.json.gz') + return { dir, path, resyncPath } +} + +/** + * @param {unknown} err + * @return {boolean} + */ +function isENOENT(err) { + return err instanceof Error && 'code' in err && err.code === 'ENOENT' +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {number} remoteBaseVersion + * @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number}>} + */ +async function loadSnapshot(projectId, userId, remoteBaseVersion) { + const { path, resyncPath } = snapshotPath(projectId, userId) + let maxLocalBaseVersion = -1 + for (const candidate of [path, resyncPath]) { + try { + const fullSync = candidate === resyncPath + return await loadSnapshotFromFile(candidate, remoteBaseVersion, fullSync) + } catch (err) { + if (err instanceof Errors.MissingUpdatesError) { + maxLocalBaseVersion = Math.max( + maxLocalBaseVersion, + err.info.baseHistoryVersion + ) + } else if (!isENOENT(err)) { + logger.warn( + { err, projectId, userId }, + 'compile from cache: cannot read history from disk' + ) + } + } + } + try { + return await loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion) + } catch (err) { + if (err instanceof Errors.MissingUpdatesError) { + maxLocalBaseVersion = Math.max( + maxLocalBaseVersion, + err.info.baseHistoryVersion + ) + } else if (!isENOENT(err)) { + logger.warn( + { err, projectId, userId }, + 'compile from cache: cannot download from clsi-cache' + ) + } + } + throw new Errors.MissingUpdatesError('needs more updates', { + baseHistoryVersion: maxLocalBaseVersion, + }) +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {number} remoteBaseVersion + * @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number}>} + */ +async function loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion) { + const { dir, resyncPath } = snapshotPath(projectId, userId) + await fs.promises.mkdir(dir, { recursive: true }) + const ok = await CLSICacheHandler.downloadHistorySnapshot( + projectId, + userId, + dir + ) + if (!ok) { + throw new Errors.MissingUpdatesError('needs full sync', { + baseHistoryVersion: -1, + }) + } + logger.debug( + { projectId, userId }, + 'compile from cache: restored history from clsi-cache' + ) + return await loadSnapshotFromFile(resyncPath, remoteBaseVersion, true) +} + +/** + * @param {string} path + * @param {number} remoteBaseVersion + * @param {boolean} fullSync + * @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], localBaseVersion: number, fullSync: boolean}>} + */ +async function loadSnapshotFromFile(path, remoteBaseVersion, fullSync) { + let blob = await fs.promises.readFile(path) + blob = await gunzip(blob) + const { rawSnapshot, globalBlobs, localBaseVersion } = JSON.parse( + blob.toString('utf-8') + ) + if (localBaseVersion < remoteBaseVersion) { + throw new Errors.MissingUpdatesError('missing updates', { + baseHistoryVersion: localBaseVersion, + }) + } + return { rawSnapshot, globalBlobs, localBaseVersion, fullSync } +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {Snapshot} snapshot + * @param {number} localBaseVersion + * @param {string[]} globalBlobs + * @return {Promise} + */ +async function saveSnapshot( + projectId, + userId, + snapshot, + localBaseVersion, + globalBlobs +) { + const { dir, path } = snapshotPath(projectId, userId) + await fs.promises.mkdir(dir, { recursive: true }) + const tmp = path + '~' + await fs.promises.writeFile( + tmp, + await gzip( + JSON.stringify({ + globalBlobs, + localBaseVersion, + rawSnapshot: snapshot.toRaw(), + }) + ), + { flag: 'wx' } + ) + await fs.promises.rename(tmp, path) +} + +/** + * @param {string} projectId + * @param {string} userId + * @return {Promise} + */ +async function deleteResyncSnapshot(projectId, userId) { + const { resyncPath } = snapshotPath(projectId, userId) + try { + await fs.promises.unlink(resyncPath) + } catch (err) { + if (!isENOENT(err)) { + logger.warn( + { err, projectId, userId }, + 'compile from cache: failed to clear history-resync.json.gz' + ) + } + } +} + +/** + * @param {string} compileDir + * @param {string} subDir + * @param {Map} entries + * @return {Promise>} + */ +async function discoverExistingEntries( + compileDir, + subDir = '.', + entries = new Map() +) { + const dirents = await fs.promises.readdir(Path.join(compileDir, subDir), { + withFileTypes: true, + }) + for (const dirent of dirents) { + const path = Path.join(subDir, dirent.name) + if (dirent.isDirectory()) { + await discoverExistingEntries(compileDir, path, entries) + } else if (dirent.isFile()) { + entries.set(path, false) + } else if ( + dirent.isSymbolicLink() || + dirent.isFIFO() || + dirent.isSocket() + ) { + // should not happen, delete right away + logger.warn( + { compileDir, subDir, dirent }, + 'compile from cache: found blocked dirent' + ) + await fs.promises.unlink(Path.join(compileDir, path)) + } else { + throw new OError('unexpected dir entry', { compileDir, subDir, dirent }) + } + } + entries.set(subDir, true) + return entries +} + +/** + * @param {string} compileDir + * @param {Snapshot} snapshot + * @param {Map} entriesDepthFirst + */ +async function removeExtraneousEntries( + compileDir, + snapshot, + entriesDepthFirst +) { + const keepFolders = new Set(['.']) + for (const [path, isDir] of entriesDepthFirst) { + const shouldBeFile = !!snapshot.getFile(path) + if (isDir) { + if (!shouldBeFile) { + // directory can stay directory + if (keepFolders.has(path)) { + // folder is still in use + keepFolders.add(Path.dirname(path)) + } else { + // empty folder + await fs.promises.rmdir(Path.join(compileDir, path)) + entriesDepthFirst.delete(path) + } + continue + } + // a folder turned into a file + // before: foo/bar.txt/baz.txt + // ^^^^^^^ folder + // now: foo/bar.txt + // ^^^^^^^ file + const needle = path + '/' + for (const [child, childIsDir] of entriesDepthFirst) { + if (!child.startsWith(needle)) continue + if (childIsDir) { + await fs.promises.rmdir(Path.join(compileDir, child)) + } else { + await fs.promises.unlink(Path.join(compileDir, child)) + } + entriesDepthFirst.delete(child) + } + await fs.promises.rmdir(Path.join(compileDir, path)) + entriesDepthFirst.delete(path) + continue + } + if (shouldBeFile || !ResourceWriter.isExtraneousFile(path)) { + // resource or cached file + keepFolders.add(Path.dirname(path)) + continue + } + await fs.promises.unlink(Path.join(compileDir, path)) + entriesDepthFirst.delete(path) + } +} + +/** + * @param {string} compileDir + * @param {string} path + * @param {Map} entriesDepthFirst + */ +async function ensureHasParentFolder(compileDir, path, entriesDepthFirst) { + const parentFolderPath = Path.dirname(path) + if (entriesDepthFirst.has(parentFolderPath)) return + await ensureHasParentFolder(compileDir, parentFolderPath, entriesDepthFirst) + await fs.promises.mkdir(Path.join(compileDir, parentFolderPath)) + entriesDepthFirst.set(parentFolderPath, true) +} + +/** + * @param {import('overleaf-editor-core/lib/types.js').RawOperation[][]} raw + * @return {Change[]} + */ +function changesFromRawChangeOperations(raw) { + return raw.map(o => Change.mustFromRaw({ operations: o, timestamp: '0' })) +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {Object} request + * @param {string} compileDir + * @param {Record} timings + * @return {Promise<{baseHistoryVersion: number, resourceList: {path: string}[]}>} + */ +export async function syncResourcesToDisk( + projectId, + userId, + request, + compileDir, + timings +) { + const remoteBaseVersion = request.baseHistoryVersion + let rawSnapshot, globalBlobs, localBaseVersion, source + let fullSync = true + try { + ;({ rawSnapshot, globalBlobs, fullSync, localBaseVersion } = + await loadSnapshot(projectId, userId, remoteBaseVersion)) + source = fullSync ? 'clsi-cache' : 'local' + logger.debug( + { projectId, userId, localBaseVersion, remoteBaseVersion }, + 'compile from cache: using existing snapshot' + ) + } catch (err) { + if (!request.rawSnapshot) throw err + if (!(err instanceof Errors.MissingUpdatesError)) { + logger.warn( + { err, projectId, userId }, + 'compile from cache: bad local history state during full resync' + ) + } + logger.debug( + { projectId, userId }, + 'compile from cache: using incoming snapshot' + ) + source = 'remote' + localBaseVersion = remoteBaseVersion + rawSnapshot = request.rawSnapshot + globalBlobs = [] + } + globalBlobs = Array.from(new Set(globalBlobs.concat(request.globalBlobs))) + + const snapshot = Snapshot.fromRaw(rawSnapshot) + + const changes = changesFromRawChangeOperations( + request.rawChangeOperations.slice(localBaseVersion - remoteBaseVersion) + ) + const applyAllStart = performance.now() + snapshot.applyAll(changes) + timings.snapshotApplyAll = Math.ceil(performance.now() - applyAllStart) + if (!ClsiMetrics.shouldSkipMetrics(request)) { + ClsiMetrics.snapshotApplyAllDurationSeconds.observe( + { group: request.compileGroup, source }, + timings.snapshotApplyAll / 1_000 + ) + } + + const entriesDepthFirst = await discoverExistingEntries(compileDir) + await removeExtraneousEntries(compileDir, snapshot, entriesDepthFirst) + + const changedPaths = [] + if (fullSync) { + changedPaths.push(...snapshot.getFilePathnames()) + logger.debug({ projectId, userId }, 'compile from cache: full sync') + } else { + const dedupe = new Set() + for (const change of changes) { + for (const operation of change.getOperations()) { + if (operation instanceof AddFileOperation) { + dedupe.add(operation.pathname) + } else if (operation instanceof MoveFileOperation) { + dedupe.add(operation.pathname) + if (!operation.isRemoveFile()) dedupe.add(operation.newPathname) + } else if (operation instanceof EditFileOperation) { + dedupe.add(operation.pathname) + } + } + } + // Restore deleted files + for (const path of snapshot.getFilePathnames()) { + if (!entriesDepthFirst.has(path)) dedupe.add(path) + } + changedPaths.push(...dedupe) + logger.debug( + { projectId, userId, changedPaths }, + 'compile from cache: incremental sync' + ) + } + + const blobStore = new BlobStore(request.historyId, globalBlobs) + const loadEagerStart = performance.now() + await snapshot.loadFiles('eager', blobStore) + timings.snapshotLoadEager = Math.ceil(performance.now() - loadEagerStart) + if (!ClsiMetrics.shouldSkipMetrics(request)) { + ClsiMetrics.snapshotLoadEagerDurationSeconds.observe( + { group: request.compileGroup, source }, + timings.snapshotLoadEager / 1_000 + ) + } + for (const path of changedPaths) { + const file = snapshot.getFile(path) + if (!file) continue // deleted, handled by removeExtraneousEntries + await ensureHasParentFolder(compileDir, path, entriesDepthFirst) + } + + let createCacheFolder + // Use Promise.allSettled to ensure that all writes have stopped when we exit. + const allDone = await promiseMapSettledWithLimit( + Settings.parallelFileDownloads, + changedPaths, + async path => { + const file = snapshot.getFile(path) + if (!file) return // deleted, handled by removeExtraneousEntries + + const content = file.getContent({ filterTrackedDeletes: true }) + if (typeof content === 'string') { + await fs.promises.writeFile( + Path.join(compileDir, path), + content, + 'utf-8' + ) + } else { + const hash = file.getHash() + if (!hash) { + throw new OError('unexpected file without content and hash', { path }) + } + const fallbackURL = null // no fallback + const lastModified = new Date(0) // content is static + if (!createCacheFolder) { + createCacheFolder = UrlCache.promises.createProjectDir(projectId) + } + await createCacheFolder + await UrlCache.promises.downloadUrlToFile( + projectId, + blobStore.getBlobURL(hash).href, + fallbackURL, + Path.join(compileDir, path), + lastModified + ) + } + } + ) + for (const [idx, result] of allDone.entries()) { + if (result.status === 'fulfilled') continue + const path = changedPaths[idx] + throw OError.tag(result.reason, 'write failed', { path }) + } + const baseHistoryVersion = localBaseVersion + changes.length + if (fullSync || changes.length) { + await saveSnapshot( + projectId, + userId, + snapshot, + baseHistoryVersion, + globalBlobs + ) + } + if (fullSync) { + await deleteResyncSnapshot(projectId, userId) + } + return { + baseHistoryVersion, + resourceList: snapshot.getFilePathnames().map(path => ({ path })), + } +} + +class BlobStore { + /** @type {string} */ + #historyId + /** @type {string[]} */ + #globalBlobs + + /** + * @param {string} historyId + * @param {string[]} globalBlobs + */ + constructor(historyId, globalBlobs) { + this.#historyId = historyId + this.#globalBlobs = globalBlobs + } + + /** + * @param {string} hash + * @return {URL} + */ + getBlobURL(hash) { + const u = new URL(Settings.apis.filestore.url) + if (this.#globalBlobs.includes(hash)) { + u.pathname = `/history/global/hash/${hash}` + } else { + u.pathname = `/history/project/${this.#historyId}/hash/${hash}` + } + return u + } + + /** + * @param {string} hash + * @return {Promise} + */ + async getString(hash) { + if (hash === File.EMPTY_FILE_HASH) return '' + const u = this.getBlobURL(hash) + let remainingAttempts = 3 + while (true) { + try { + return await fetchString(u, { signal: AbortSignal.timeout(3_000) }) + } catch (err) { + if (err instanceof RequestFailedError && err.response.status === 404) { + throw new Errors.NotFoundError() + } + remainingAttempts-- + if (remainingAttempts <= 0) throw err + logger.warn( + { err, url: u.href, remainingAttempts }, + 'compile from cache: history blob download failed' + ) + await setTimeout(100) + } + } + } + + /** + * @param {string} hash + * @return {Promise} + */ + async getObject(hash) { + const string = await this.getString(hash) + return JSON.parse(string) + } +} diff --git a/services/clsi/app/js/Metrics.js b/services/clsi/app/js/Metrics.js index 05a9518c69..62a1cdd9d7 100644 --- a/services/clsi/app/js/Metrics.js +++ b/services/clsi/app/js/Metrics.js @@ -31,7 +31,7 @@ const e2eCompileDurationSeconds = new prom.Histogram({ name: 'clsi_e2e_compile_duration_seconds', help: 'Duration of the entire compile request in clsi (sync, latexmk, output)', buckets: COMPILE_TIME_BUCKETS, - labelNames: ['compile', 'group'], + labelNames: ['compile', 'group', 'compileFromHistory'], }) const e2eCompileDurationClsiPerfSeconds = new prom.Gauge({ @@ -68,6 +68,20 @@ const imageProcessingDurationSeconds = new prom.Histogram({ labelNames: ['group', 'type'], }) +const snapshotApplyAllDurationSeconds = new prom.Histogram({ + name: 'clsi_snapshot_applyAll_duration_seconds', + help: 'Time spent applying snapshot changes', + buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10], + labelNames: ['group', 'source'], +}) + +const snapshotLoadEagerDurationSeconds = new prom.Histogram({ + name: 'clsi_snapshot_load_eager_duration_seconds', + help: 'Time spent loading string blobs for snapshot', + buckets: [0.01, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50], + labelNames: ['group', 'source'], +}) + function shouldSkipMetrics(request) { return ['clsi-perf', 'health-check', 'clsi-cache-template'].includes( request.metricsOpts.path @@ -83,5 +97,7 @@ export default { processOutputFilesDurationSeconds, latexmkRuleDurationSeconds, imageProcessingDurationSeconds, + snapshotApplyAllDurationSeconds, + snapshotLoadEagerDurationSeconds, shouldSkipMetrics, } diff --git a/services/clsi/app/js/OutputFileArchiveManager.js b/services/clsi/app/js/OutputFileArchiveManager.js index dd8e52e2cc..2f69a56232 100644 --- a/services/clsi/app/js/OutputFileArchiveManager.js +++ b/services/clsi/app/js/OutputFileArchiveManager.js @@ -93,10 +93,11 @@ export default { ) return outputFiles.filter( - // Ignore the pdf, clsi-cache tar-ball and also ignore the files ignored by the frontend. + // Ignore the pdf, clsi-cache tar-ball, history snapshot blob and also ignore the files ignored by the frontend. ({ path }) => path !== 'output.pdf' && path !== 'output.tar.gz' && + path !== 'history-resync.json.gz' && !ignoreFiles.includes(path) ) } catch (error) { diff --git a/services/clsi/app/js/ProjectPersistenceManager.js b/services/clsi/app/js/ProjectPersistenceManager.js index 05fb1c3616..ef47b30b1b 100644 --- a/services/clsi/app/js/ProjectPersistenceManager.js +++ b/services/clsi/app/js/ProjectPersistenceManager.js @@ -16,6 +16,7 @@ import Settings from '@overleaf/settings' import { callbackify } from 'node:util' import Path from 'node:path' import fs from 'node:fs' +import * as HistoryResourceWriter from './HistoryResourceWriter.js' let ProjectPersistenceManager const oneDay = 24 * 60 * 60 * 1000 @@ -204,19 +205,15 @@ export default ProjectPersistenceManager = { } logger.debug({ projectId, userId }, 'clearing project for user') return CompileManager.clearProject(projectId, userId, function (error) { - if (error != null) { - return callback(error) - } - return ProjectPersistenceManager.clearProjectFromCache( - projectId, - { reason: 'cleared' }, - function (error) { - if (error != null) { - return callback(error) - } - return callback() - } - ) + if (error) return callback(error) + HistoryResourceWriter.clearCacheCb(projectId, userId, error => { + if (error) return callback(error) + ProjectPersistenceManager.clearProjectFromCache( + projectId, + { reason: 'cleared' }, + callback + ) + }) }) }, diff --git a/services/clsi/app/js/RequestParser.js b/services/clsi/app/js/RequestParser.js index cd37b8b40e..332df85033 100644 --- a/services/clsi/app/js/RequestParser.js +++ b/services/clsi/app/js/RequestParser.js @@ -4,6 +4,7 @@ import OutputCacheManager from './OutputCacheManager.js' const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex'] const MAX_TIMEOUT = 600 const EDITOR_ID_REGEX = /^[a-f0-9-]{36}$/ // UUID +const HISTORY_ID_REGEX = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/ // mongo id or postgres id function parse(body, callback) { const response = {} @@ -112,7 +113,12 @@ function parse(body, callback) { // resources (full) or only those resources to be updated // in-place (incremental). response.syncType = _parseAttribute('syncType', compile.options.syncType, { - validValues: ['full', 'incremental'], + validValues: [ + 'full', + 'incremental', + 'history-full', + 'history-incremental', + ], type: 'string', }) @@ -139,6 +145,22 @@ function parse(body, callback) { response.resources = (compile.resources || []).map(resource => _parseResource(resource) ) + response.historyId = _parseAttribute( + 'historyId', + compile.options.historyId, + { type: 'string', regex: HISTORY_ID_REGEX } + ) + response.baseHistoryVersion = _parseAttribute( + 'baseHistoryVersion', + compile.baseHistoryVersion, + { type: 'number' } + ) + response.globalBlobs = _parseAttribute('globalBlobs', compile.globalBlobs, { + type: 'array', + }) + // The snapshot and changes are validated when loading them in editor-core. + response.rawSnapshot = compile.rawSnapshot + response.rawChangeOperations = compile.rawChangeOperations const rootResourcePath = _parseAttribute( 'rootResourcePath', @@ -216,7 +238,11 @@ function _parseAttribute(name, attribute, options) { ) } } - if (options.type != null) { + if (options.type === 'array') { + if (!Array.isArray(attribute)) { + throw new Error(`${name} attribute should be an array`) + } + } else if (options.type != null) { // eslint-disable-next-line valid-typeof if (typeof attribute !== options.type) { throw new Error(`${name} attribute should be a ${options.type}`) diff --git a/services/clsi/config/settings.defaults.cjs b/services/clsi/config/settings.defaults.cjs index 37fbe6cf48..575059867b 100644 --- a/services/clsi/config/settings.defaults.cjs +++ b/services/clsi/config/settings.defaults.cjs @@ -62,6 +62,11 @@ module.exports = { ({ zone, readOnly }) => zone === process.env.ZONE && !readOnly ), }, + filestore: { + url: + process.env.FILESTORE_DOMAIN_OVERRIDE || + `http://${process.env.FILESTORE_HOST || '127.0.0.1'}:3009`, + }, }, smokeTest: process.env.SMOKE_TEST || false, diff --git a/services/clsi/package.json b/services/clsi/package.json index fd59bf6c7a..d749cf925e 100644 --- a/services/clsi/package.json +++ b/services/clsi/package.json @@ -30,6 +30,7 @@ "dockerode": "^4.0.9", "express": "4.22.1", "lodash": "^4.17.21", + "overleaf-editor-core": "*", "p-limit": "^3.1.0", "request": "2.88.2", "send": "^0.19.0", diff --git a/services/clsi/test/unit/js/CompileController.test.js b/services/clsi/test/unit/js/CompileController.test.js index d609476eb5..c293b5d220 100644 --- a/services/clsi/test/unit/js/CompileController.test.js +++ b/services/clsi/test/unit/js/CompileController.test.js @@ -148,6 +148,7 @@ describe('CompileController', () => { ...file, })), clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) .should.equal(true) @@ -176,6 +177,7 @@ describe('CompileController', () => { ...file, })), clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) .should.equal(true) @@ -224,6 +226,7 @@ describe('CompileController', () => { ...file, })), clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) }) @@ -272,6 +275,7 @@ describe('CompileController', () => { ...file, })), clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) }) @@ -304,6 +308,7 @@ describe('CompileController', () => { stats: ctx.stats, timings: ctx.timings, clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) .should.equal(true) @@ -339,6 +344,7 @@ describe('CompileController', () => { // JSON.stringify will omit these undefined values buildId: undefined, clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) .should.equal(true) @@ -373,6 +379,7 @@ describe('CompileController', () => { // JSON.stringify will omit these undefined values buildId: undefined, clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) .should.equal(true) @@ -405,6 +412,7 @@ describe('CompileController', () => { // JSON.stringify will omit these undefined values buildId: undefined, clsiCacheShard: undefined, + baseHistoryVersion: undefined, }, }) .should.equal(true) diff --git a/services/clsi/test/unit/js/ProjectPersistenceManager.test.js b/services/clsi/test/unit/js/ProjectPersistenceManager.test.js index ea77af44de..abcb424c09 100644 --- a/services/clsi/test/unit/js/ProjectPersistenceManager.test.js +++ b/services/clsi/test/unit/js/ProjectPersistenceManager.test.js @@ -21,6 +21,14 @@ describe('ProjectPersistenceManager', () => { default: (ctx.UrlCache = {}), })) + vi.doMock( + '../../../app/js/HistoryResourceWriter', + () => + (ctx.HistoryResourceWriter = { + clearCacheCb: sinon.stub().yields(null), + }) + ) + vi.doMock('../../../app/js/CompileManager', () => ({ default: (ctx.CompileManager = {}), })) @@ -163,6 +171,13 @@ describe('ProjectPersistenceManager', () => { .should.equal(true) }) + it('should clear the history cache', ctx => { + ctx.HistoryResourceWriter.clearCacheCb.should.have.been.calledWith( + ctx.project_id, + ctx.user_id + ) + }) + it('should clear all the cached Urls for the project', ctx => { return ctx.UrlCache.clearProject .calledWith(ctx.project_id) diff --git a/services/clsi/test/unit/js/RequestParser.test.js b/services/clsi/test/unit/js/RequestParser.test.js index 411bcc0a9d..b08a6acb09 100644 --- a/services/clsi/test/unit/js/RequestParser.test.js +++ b/services/clsi/test/unit/js/RequestParser.test.js @@ -494,7 +494,8 @@ describe('RequestParser', () => { it('should return an error', ctx => { ctx.callback .calledWithMatch({ - message: 'syncType attribute should be one of: full, incremental', + message: + 'syncType attribute should be one of: full, incremental, history-full, history-incremental', }) .should.equal(true) }) diff --git a/services/web/app/src/Features/Compile/ClsiManager.mjs b/services/web/app/src/Features/Compile/ClsiManager.mjs index e96b84f3f5..5813167152 100644 --- a/services/web/app/src/Features/Compile/ClsiManager.mjs +++ b/services/web/app/src/Features/Compile/ClsiManager.mjs @@ -23,6 +23,10 @@ import ClsiCacheHandler from './ClsiCacheHandler.mjs' import HistoryManager from '../History/HistoryManager.mjs' import SplitTestHandler from '../SplitTests/SplitTestHandler.mjs' import AnalyticsManager from '../Analytics/AnalyticsManager.mjs' +import RedisWrapper from '../../infrastructure/RedisWrapper.mjs' + +// use the redis db with eviction policy enabled +const rclient = RedisWrapper.client('clsi_cookie') const ClsiCookieManager = ClsiCookieManagerFactory( Settings.apis.clsi?.backendGroupName @@ -38,6 +42,41 @@ const CLSI_COOKIES_ENABLED = (Settings.clsiCookie?.key ?? '') !== '' // The timeout in services/clsi/app.js is 10 minutes, so we'll be on the safe side with 12 minutes const COMPILE_REQUEST_TIMEOUT_MS = 12 * 60 * 1000 +function _baseHistoryVersionKey(projectId, userId) { + return `baseHistoryVersion:${projectId}:${userId}` +} + +async function getBaseHistoryVersion(projectId, userId) { + let v + try { + v = await rclient.get(_baseHistoryVersionKey(projectId, userId)) + } catch (err) { + logger.warn({ err, projectId, userId }, 'failed to get baseHistoryVersion') + return -1 + } + if (!v) return -1 + const n = parseInt(v, 10) + if (Number.isNaN(n)) return -1 + return n +} + +async function setBaseHistoryVersion(projectId, userId, baseHistoryVersion) { + const clsiCacheExpiryInSeconds = 8 * 24 * 60 * 60 // 8 days + try { + await rclient.setex( + _baseHistoryVersionKey(projectId, userId), + clsiCacheExpiryInSeconds, + baseHistoryVersion + ) + } catch (err) { + logger.warn({ err, projectId, userId }, 'failed to set baseHistoryVersion') + } +} + +async function clearBaseHistoryVersion(projectId, userId) { + await rclient.del(_baseHistoryVersionKey(projectId, userId)) +} + function getNewCompileBackendClass(projectId, compileBackendClass) { // Sample x% of projects to move up one bracket. if ( @@ -111,7 +150,13 @@ async function sendRequest(projectId, userId, options) { options = {} } let result = await sendRequestOnce(projectId, userId, options) - if (result.status === 'conflict') { + if (result.status === 'missing-updates') { + // try again with updated baseline + result = await sendRequestOnce(projectId, userId, { + ...options, + baseHistoryVersion: result.baseHistoryVersion, + }) + } else if (result.status === 'conflict') { // Try again, with a full compile result = await sendRequestOnce(projectId, userId, { ...options, @@ -130,7 +175,7 @@ async function sendRequest(projectId, userId, options) { async function sendRequestOnce(projectId, userId, options) { let req try { - req = await _buildRequest(projectId, options) + req = await _buildRequest(projectId, userId, options) } catch (err) { if (err.message === 'no main file specified') { return { @@ -178,6 +223,16 @@ async function stopCompile(projectId, userId, options) { ) } +/** + * @param {PromiseSettledResult} result + * @private + */ +function _throwIfRejected(result) { + if (result.status === 'rejected') { + throw result.reason + } +} + async function deleteAuxFiles(projectId, userId, options, clsiserverid) { if (options == null) { options = {} @@ -189,37 +244,42 @@ async function deleteAuxFiles(projectId, userId, options, clsiserverid) { projectId, userId ) - const opts = { - method: 'DELETE', - } - - try { - await _makeRequestWithClsiServerId( + const [ + clsiResult, + clsiCacheResult, + documentUpdaterResult, + clsiServerIdResult, + baseHistoryVersionResult, + ] = await Promise.allSettled([ + _makeRequestWithClsiServerId( projectId, userId, compileGroup, compileBackendClass, url, - opts, + { method: 'DELETE' }, clsiserverid + ), + ClsiCacheHandler.clearCache(projectId, userId), + DocumentUpdaterHandler.promises.clearProjectState(projectId), + clearClsiServerId(projectId, userId, compileBackendClass), + clearBaseHistoryVersion(projectId, userId), + ]) + if (clsiCacheResult.status === 'rejected') { + logger.warn( + { err: clsiCacheResult.reason, projectId, userId }, + 'purge clsi-cache failed' ) - } finally { - // always clear the clsi-cache - try { - await ClsiCacheHandler.clearCache(projectId, userId) - } catch (err) { - logger.warn({ err, projectId, userId }, 'purge clsi-cache failed') - } - - // always clear the project state from the docupdater, even if there - // was a problem with the request to the clsi - try { - await DocumentUpdaterHandler.promises.clearProjectState(projectId) - } finally { - // always clear the clsi server id, even if prior actions failed - await clearClsiServerId(projectId, userId, compileBackendClass) - } } + if (baseHistoryVersionResult.status === 'rejected') { + logger.warn( + { err: baseHistoryVersionResult.reason, projectId, userId }, + 'failed to clear baseHistoryVersion' + ) + } + _throwIfRejected(clsiResult) + _throwIfRejected(documentUpdaterResult) + _throwIfRejected(clsiServerIdResult) } async function _sendBuiltRequest(projectId, userId, req, options) { @@ -254,6 +314,9 @@ async function _sendBuiltRequest(projectId, userId, req, options) { ) collectMetricsOnBlgFiles(outputFiles) const compile = response?.compile || {} + if (compile.baseHistoryVersion) { + await setBaseHistoryVersion(projectId, userId, compile.baseHistoryVersion) + } return { status: compile.status, outputFiles, @@ -263,6 +326,7 @@ async function _sendBuiltRequest(projectId, userId, req, options) { timings: compile.timings, outputUrlPrefix: compile.outputUrlPrefix, clsiCacheShard: compile.clsiCacheShard, + baseHistoryVersion: compile.baseHistoryVersion, } } @@ -604,6 +668,12 @@ async function _postToClsi( if (err.response.status === 413) { return { response: { compile: { status: 'project-too-large' } } } } else if (err.response.status === 409) { + try { + const body = JSON.parse(err.body || '{}') + if (body.compile?.status === 'missing-updates') { + return { response: body } + } + } catch {} return { response: { compile: { status: 'conflict' } } } } else if (err.response.status === 423) { return { response: { compile: { status: 'compile-in-progress' } } } @@ -657,13 +727,12 @@ function _parseOutputFiles(projectId, rawOutputFiles = []) { return outputFiles } -async function _buildRequest(projectId, options) { +async function _buildRequest(projectId, userId, options) { const project = await ProjectGetter.promises.getProject(projectId, { compiler: 1, - rootDoc_id: 1, imageName: 1, - rootFolder: 1, 'overleaf.history.id': 1, + ...(options.compileFromHistory ? {} : { rootDoc_id: 1, rootFolder: 1 }), }) if (project == null) { throw new Errors.NotFoundError(`project does not exist: ${projectId}`) @@ -671,6 +740,31 @@ async function _buildRequest(projectId, options) { if (!VALID_COMPILERS.includes(project.compiler)) { project.compiler = 'pdflatex' } + const historyId = project.overleaf.history.id + let { baseHistoryVersion } = options + + if (options.compileFromHistory && !baseHistoryVersion) { + baseHistoryVersion = await getBaseHistoryVersion(projectId, userId) + } + + if (options.compileFromHistory && baseHistoryVersion === -1) { + // full sync + return await _buildRequestFromHistoryFull( + projectId, + historyId, + options, + project + ) + } else if (options.compileFromHistory) { + // incremental sync + return await _buildRequestFromHistoryIncremental( + projectId, + historyId, + options, + project, + baseHistoryVersion + ) + } if (options.incrementalCompilesEnabled || options.syncType != null) { // new way, either incremental or full @@ -755,6 +849,119 @@ async function getOutputFileStream( } } +/** + * @param {import('overleaf-editor-core/lib/types.js').RawChange[]} changes + * @return {import('overleaf-editor-core/lib/types.js').RawOperation[][]} + * @private + */ +function _rawChangeOperationsFromChanges(changes) { + // omit timestamp (required, back-filled in clsi) + // omit authors (optional) + // omit v2Authors (optional) + // omit origin (optional) + // omit projectVersion (optional) + // omit v2DocVersions (optional) + return changes.map(change => change.operations) +} + +/** + * @param {import('overleaf-editor-core/lib/types.js').RawOperation[][]} rawChangeOperations + * @return {Set} + * @private + */ +function _collectGlobalBlobs(rawChangeOperations) { + const globalBlobs = new Set() + for (const operations of rawChangeOperations) { + for (const operation of operations) { + const hash = operation?.file?.hash + if (hash && HistoryManager.isGlobalBlob(hash)) { + globalBlobs.add(hash) + } + } + } + return globalBlobs +} + +async function _buildRequestFromHistoryFull( + projectId, + historyId, + options, + project +) { + await HistoryManager.promises.flushProject(projectId) + const { + chunk: { + history: { snapshot: rawSnapshot, changes: rawChanges }, + startVersion, + }, + } = await HistoryManager.promises.getLatestHistory(historyId) + const rawChangeOperations = _rawChangeOperationsFromChanges(rawChanges) + const globalBlobs = _collectGlobalBlobs(rawChangeOperations) + for (const { hash, rangesHash } of Object.values(rawSnapshot.files)) { + if (hash && HistoryManager.isGlobalBlob(hash)) { + globalBlobs.add(hash) + } + if (rangesHash && HistoryManager.isGlobalBlob(rangesHash)) { + globalBlobs.add(rangesHash) + } + } + options = { + ...options, + syncType: 'history-full', + historyId, + baseHistoryVersion: startVersion, + rawSnapshot, + rawChangeOperations, + globalBlobs: Array.from(globalBlobs), + } + return _finaliseRequest(projectId, options, project, [], []) +} + +async function _buildRequestFromHistoryIncremental( + projectId, + historyId, + options, + project, + baseHistoryVersion +) { + await HistoryManager.promises.flushProject(projectId) + const rawChangeOperations = [] + let hasMore = true + let since = baseHistoryVersion + let size = 0 + while (hasMore) { + let changes + ;({ changes, hasMore } = await HistoryManager.promises.getChanges( + historyId, + { since } + )) + since += changes.length + const newRawChangeOperations = _rawChangeOperationsFromChanges(changes) + size += Buffer.from(JSON.stringify(newRawChangeOperations)).byteLength + if (size > 6.5 * 1024 * 1024) { + // clsi has a payload limit of 7MiB. Do not send too many operations. + // Fall back to sending the latest snapshot instead. + return await _buildRequestFromHistoryFull( + projectId, + historyId, + options, + project + ) + } + rawChangeOperations.push(...newRawChangeOperations) + } + const globalBlobs = _collectGlobalBlobs(rawChangeOperations) + options = { + ...options, + syncType: 'history-incremental', + historyId, + baseHistoryVersion, + rawChangeOperations, + globalBlobs: Array.from(globalBlobs), + } + return _finaliseRequest(projectId, options, project, [], []) +} + function _buildRequestFromDocupdater( projectId, options, @@ -812,7 +1019,7 @@ async function _getContentFromMongo(projectId) { function _finaliseRequest(projectId, options, project, docs, files) { const resources = [] let flags - let rootResourcePath = null + let rootResourcePath = options.rootResourcePath let rootResourcePathOverride = null let hasMainFile = false let numberOfDocsInProject = 0 @@ -883,6 +1090,7 @@ function _finaliseRequest(projectId, options, project, docs, files) { return { compile: { options: { + historyId: options.historyId?.toString(), // send as string, if set buildId: options.buildId, editorId: options.editorId, compiler: project.compiler, @@ -909,6 +1117,10 @@ function _finaliseRequest(projectId, options, project, docs, files) { metricsMethod: options.compileGroup, metricsPath: options.metricsPath, }, + baseHistoryVersion: options.baseHistoryVersion, + rawSnapshot: options.rawSnapshot, + rawChangeOperations: options.rawChangeOperations, + globalBlobs: options.globalBlobs, rootResourcePath, resources, }, @@ -917,7 +1129,7 @@ function _finaliseRequest(projectId, options, project, docs, files) { async function wordCount(projectId, userId, file, limits, clsiserverid) { const { compileBackendClass, compileGroup } = limits - const req = await _buildRequest(projectId, limits) + const req = await _buildRequest(projectId, userId, limits) const filename = file || req.compile.rootResourcePath const url = _getCompilerUrl( compileBackendClass, diff --git a/services/web/app/src/Features/Compile/CompileController.mjs b/services/web/app/src/Features/Compile/CompileController.mjs index 14ad0be5e6..8f5be3f659 100644 --- a/services/web/app/src/Features/Compile/CompileController.mjs +++ b/services/web/app/src/Features/Compile/CompileController.mjs @@ -52,7 +52,7 @@ function getPdfCachingMinChunkSize(req, res) { return Settings.pdfCachingMinChunkSize } -function _getSplitTestOptions(req, res) { +async function _getSplitTestOptions(req, res) { // Use the query flags from the editor request for overriding the split test. let query = {} try { @@ -61,12 +61,20 @@ function _getSplitTestOptions(req, res) { } catch (e) {} const editorReq = { ...req, query } + const { variant } = await SplitTestHandler.promises.getAssignment( + editorReq, + res, + 'compile-from-history' + ) + const compileFromHistory = variant === 'enabled' + const pdfDownloadDomain = Settings.pdfDownloadDomain const enablePdfCaching = Settings.enablePdfCaching if (!enablePdfCaching || !req.query.enable_pdf_caching) { // The frontend does not want to do pdf caching. return { + compileFromHistory, pdfDownloadDomain, enablePdfCaching: false, } @@ -74,6 +82,7 @@ function _getSplitTestOptions(req, res) { const pdfCachingMinChunkSize = getPdfCachingMinChunkSize(editorReq, res) return { + compileFromHistory, pdfDownloadDomain, enablePdfCaching, pdfCachingMinChunkSize, @@ -137,6 +146,7 @@ const _CompileController = { fileLineErrors, stopOnFirstError, editorId: req.body.editorId, + rootResourcePath: req.body.rootResourcePath, } if (req.body.rootDoc_id) { @@ -161,11 +171,16 @@ const _CompileController = { options.incrementalCompilesEnabled = true } - let { enablePdfCaching, pdfCachingMinChunkSize, pdfDownloadDomain } = - _getSplitTestOptions(req, res) + let { + enablePdfCaching, + pdfCachingMinChunkSize, + pdfDownloadDomain, + compileFromHistory, + } = await _getSplitTestOptions(req, res) if (Features.hasFeature('saas')) { options.compileFromClsiCache = true options.populateClsiCache = true + options.compileFromHistory = compileFromHistory } options.enablePdfCaching = enablePdfCaching if (enablePdfCaching) { diff --git a/services/web/app/src/Features/Compile/CompileManager.mjs b/services/web/app/src/Features/Compile/CompileManager.mjs index 3e10b0dd9e..4d5017b04a 100644 --- a/services/web/app/src/Features/Compile/CompileManager.mjs +++ b/services/web/app/src/Features/Compile/CompileManager.mjs @@ -84,6 +84,7 @@ async function compile(projectId, userId, options = {}) { outputUrlPrefix, buildId, clsiCacheShard, + baseHistoryVersion, } = await ClsiManager.promises.sendRequest(projectId, compileAsUser, options) return { @@ -97,6 +98,7 @@ async function compile(projectId, userId, options = {}) { outputUrlPrefix, buildId, clsiCacheShard, + baseHistoryVersion, } } diff --git a/services/web/app/src/Features/History/HistoryManager.mjs b/services/web/app/src/Features/History/HistoryManager.mjs index 9e77d95d9a..a1f6bde69e 100644 --- a/services/web/app/src/Features/History/HistoryManager.mjs +++ b/services/web/app/src/Features/History/HistoryManager.mjs @@ -35,6 +35,10 @@ async function loadGlobalBlobs() { // END copy from services/history-v1/storage/lib/blob_store/index.js +function isGlobalBlob(hash) { + return GLOBAL_BLOBS.has(hash) +} + function getFilestoreBlobURL(historyId, hash) { if (GLOBAL_BLOBS.has(hash)) { return `${settings.apis.filestore.url}/history/global/hash/${hash}` @@ -405,6 +409,7 @@ function _userView(user) { const loadGlobalBlobsPromise = loadGlobalBlobs() export default { + isGlobalBlob, getFilestoreBlobURL, loadGlobalBlobsPromise, initializeProject: callbackify(initializeProject), diff --git a/services/web/frontend/js/features/pdf-preview/util/compiler.ts b/services/web/frontend/js/features/pdf-preview/util/compiler.ts index 573b5268b2..404ee2336b 100644 --- a/services/web/frontend/js/features/pdf-preview/util/compiler.ts +++ b/services/web/frontend/js/features/pdf-preview/util/compiler.ts @@ -36,13 +36,14 @@ export default class DocumentCompiler { cleanupCompileResult: () => void signal: AbortSignal openDocs: OpenDocuments - projectRootDocId?: string | null + projectRootDocId: string | null clsiServerId: string | null currentDoc: DocumentContainer | null error: Error | undefined timer: number defaultOptions: CompileOptions debouncedAutoCompile: DebouncedFunc<() => void> + pathInFolder: (docId: string) => string | null constructor({ compilingRef, @@ -80,6 +81,7 @@ export default class DocumentCompiler { this.cleanupCompileResult = cleanupCompileResult this.signal = signal this.openDocs = openDocs + this.pathInFolder = () => null this.projectRootDocId = null this.clsiServerId = null @@ -134,10 +136,19 @@ export default class DocumentCompiler { const t0 = performance.now() - const rootDocId = this.getRootDocOverrideId() + let rootDocIdOverride = this.getRootDocOverrideId() + let rootResourcePath + try { + // Only required for compile-from-history + rootDocIdOverride = rootDocIdOverride || this.projectRootDocId + rootResourcePath = rootDocIdOverride + ? this.pathInFolder(rootDocIdOverride) + : 'main.tex' + } catch {} const body = { - rootDoc_id: rootDocId, + rootDoc_id: rootDocIdOverride, + rootResourcePath, draft: options.draft, check: 'silent', // NOTE: 'error' and 'validate' are possible, but unused // use incremental compile for all users but revert to a full compile @@ -165,7 +176,7 @@ export default class DocumentCompiler { this.setError(undefined) data.options = options - data.rootDocId = rootDocId + data.rootDocId = rootDocIdOverride if (data.clsiServerId) { this.clsiServerId = data.clsiServerId } diff --git a/services/web/frontend/js/shared/context/local-compile-context.tsx b/services/web/frontend/js/shared/context/local-compile-context.tsx index 72531581c5..22fb931863 100644 --- a/services/web/frontend/js/shared/context/local-compile-context.tsx +++ b/services/web/frontend/js/shared/context/local-compile-context.tsx @@ -151,7 +151,7 @@ export const LocalCompileProvider: FC = ({ const { features, alphaProgram } = useUserContext() const { fileTreeData } = useFileTreeData() - const { findEntityByPath } = useFileTreePathContext() + const { pathInFolder, findEntityByPath } = useFileTreePathContext() // whether a compile is in progress const [compiling, setCompiling] = useState(false) @@ -342,8 +342,12 @@ export const LocalCompileProvider: FC = ({ // keep the project rootDocId in sync with the compiler useEffect(() => { - compiler.projectRootDocId = rootDocId + compiler.projectRootDocId = rootDocId || null }, [compiler, rootDocId]) + // keep pathInFolder in sync with the compiler + useEffect(() => { + compiler.pathInFolder = pathInFolder + }, [compiler, pathInFolder]) // keep draft setting in sync with the compiler useEffect(() => { diff --git a/services/web/scripts/e2e_test_setup.mjs b/services/web/scripts/e2e_test_setup.mjs index 9c4f5254f2..685fa732ec 100644 --- a/services/web/scripts/e2e_test_setup.mjs +++ b/services/web/scripts/e2e_test_setup.mjs @@ -133,6 +133,26 @@ async function provisionSplitTests() { Path.join(MONOREPO, 'tools/saas-e2e/split-tests.json') ) ) + // Add WIP split test, we can update the JSON blob once this is in production + SPLIT_TESTS.push({ + name: 'compile-from-history', + versions: [ + { + versionNumber: 1, + createdAt: '2026-02-25T14:55:31.260Z', + active: true, + analyticsEnabled: false, + phase: 'release', + variants: [ + { + name: 'enabled', + rolloutPercent: 0, + rolloutStripes: [], + }, + ], + }, + ], + }) console.log(`> Importing ${SPLIT_TESTS.length} split-tests from production.`) await SplitTestManager.replaceSplitTests(SPLIT_TESTS) } diff --git a/services/web/test/unit/src/Compile/ClsiManager.test.mjs b/services/web/test/unit/src/Compile/ClsiManager.test.mjs index dc679c27ef..a317abd153 100644 --- a/services/web/test/unit/src/Compile/ClsiManager.test.mjs +++ b/services/web/test/unit/src/Compile/ClsiManager.test.mjs @@ -176,6 +176,18 @@ describe('ClsiManager', function () { recordEventForUserInBackground: sinon.stub(), } + ctx.redis = { + auth() {}, + del: sinon.stub(), + get: sinon.stub(), + setex: sinon.stub().resolves(), + } + vi.doMock('../../../../app/src/infrastructure/RedisWrapper', () => ({ + default: (ctx.RedisWrapper = { + client: () => ctx.redis, + }), + })) + vi.doMock('@overleaf/settings', () => ({ default: ctx.Settings, })) diff --git a/services/web/test/unit/src/Compile/CompileController.test.mjs b/services/web/test/unit/src/Compile/CompileController.test.mjs index be98f0c0c1..b53dd481a9 100644 --- a/services/web/test/unit/src/Compile/CompileController.test.mjs +++ b/services/web/test/unit/src/Compile/CompileController.test.mjs @@ -308,10 +308,12 @@ describe('CompileController', function () { isAutoCompile: false, compileFromClsiCache: true, populateClsiCache: true, + compileFromHistory: false, enablePdfCaching: false, fileLineErrors: false, stopOnFirstError: false, editorId: undefined, + rootResourcePath: undefined, } ) }) @@ -350,10 +352,12 @@ describe('CompileController', function () { isAutoCompile: true, compileFromClsiCache: true, populateClsiCache: true, + compileFromHistory: false, enablePdfCaching: false, fileLineErrors: false, stopOnFirstError: false, editorId: undefined, + rootResourcePath: undefined, } ) }) @@ -373,11 +377,13 @@ describe('CompileController', function () { isAutoCompile: false, compileFromClsiCache: true, populateClsiCache: true, + compileFromHistory: false, enablePdfCaching: false, draft: true, fileLineErrors: false, stopOnFirstError: false, editorId: undefined, + rootResourcePath: undefined, } ) }) @@ -397,10 +403,36 @@ describe('CompileController', function () { isAutoCompile: false, compileFromClsiCache: true, populateClsiCache: true, + compileFromHistory: false, enablePdfCaching: false, fileLineErrors: false, stopOnFirstError: false, editorId: 'the-editor-id', + rootResourcePath: undefined, + } + ) + }) + }) + describe('with a rootResourcePath', function () { + beforeEach(async function (ctx) { + ctx.req.body = { rootResourcePath: 'foo.tex' } + await ctx.CompileController.compile(ctx.req, ctx.res, ctx.next) + }) + + it('should pass the rootResourcePath to the compiler', function (ctx) { + ctx.CompileManager.promises.compile.should.have.been.calledWith( + ctx.projectId, + ctx.user_id, + { + isAutoCompile: false, + compileFromClsiCache: true, + populateClsiCache: true, + compileFromHistory: false, + enablePdfCaching: false, + fileLineErrors: false, + stopOnFirstError: false, + editorId: undefined, + rootResourcePath: 'foo.tex', } ) })