[clsi] initial implementation of compile from history (#31883)

* [clsi] initial implementation of compile from history

* [clsi] copy changes

* [saas-e2e] extend test case with nested folder

* [saas-e2e] add test case for tracked changes

* [web] fix accumulating changes from multiple chunks

* [web] optimize size check for compile request payload

* [clsi] deduplicate globalBlobs

* [clsi] add validation for request body details

* [clsi] add metrics for compile from history

* [clsi] download binary files concurrently

* [clsi] skip download of empty file blob

* [clsi] break down e2e compile time metric by compileFromHistory

GitOrigin-RevId: 0dadef93e89d8a172c35cb130a1042d9d1bec42a
This commit is contained in:
Jakob Ackermann
2026-03-03 11:20:08 +01:00
committed by Copybot
parent 5723a9589a
commit 81b7121408
28 changed files with 1135 additions and 88 deletions
@@ -74,6 +74,14 @@ class Change {
static fromRaw(raw) {
if (!raw) return null
return Change.mustFromRaw(raw)
}
/**
* @param {RawChange} raw
* @return {Change}
*/
static mustFromRaw(raw) {
assert.array.of.object(raw.operations, 'bad raw.operations')
assert.nonEmptyString(raw.timestamp, 'bad raw.timestamp')
+4 -4
View File
@@ -59,10 +59,10 @@ export type RawChange = {
operations: RawOperation[]
timestamp: string
authors?: (number | null)[]
v2Authors: string[]
origin: RawOrigin
projectVersion: string
v2DocVersions: RawV2DocVersions
v2Authors?: string[]
origin?: RawOrigin
projectVersion?: string
v2DocVersions?: RawV2DocVersions
}
export type RawOperation =
+1
View File
@@ -50366,6 +50366,7 @@
"dockerode": "^4.0.9",
"express": "4.22.1",
"lodash": "^4.17.21",
"overleaf-editor-core": "*",
"p-limit": "^3.1.0",
"request": "2.88.2",
"send": "^0.19.0",
+2
View File
@@ -21,6 +21,7 @@ COPY libraries/fetch-utils/package.json /overleaf/libraries/fetch-utils/package.
COPY libraries/logger/package.json /overleaf/libraries/logger/package.json
COPY libraries/metrics/package.json /overleaf/libraries/metrics/package.json
COPY libraries/o-error/package.json /overleaf/libraries/o-error/package.json
COPY libraries/overleaf-editor-core/package.json /overleaf/libraries/overleaf-editor-core/package.json
COPY libraries/promise-utils/package.json /overleaf/libraries/promise-utils/package.json
COPY libraries/settings/package.json /overleaf/libraries/settings/package.json
COPY libraries/stream-utils/package.json /overleaf/libraries/stream-utils/package.json
@@ -32,6 +33,7 @@ COPY libraries/fetch-utils/ /overleaf/libraries/fetch-utils/
COPY libraries/logger/ /overleaf/libraries/logger/
COPY libraries/metrics/ /overleaf/libraries/metrics/
COPY libraries/o-error/ /overleaf/libraries/o-error/
COPY libraries/overleaf-editor-core/ /overleaf/libraries/overleaf-editor-core/
COPY libraries/promise-utils/ /overleaf/libraries/promise-utils/
COPY libraries/settings/ /overleaf/libraries/settings/
COPY libraries/stream-utils/ /overleaf/libraries/stream-utils/
+1
View File
@@ -20,6 +20,7 @@ IMAGE_CACHE ?= $(IMAGE_REPO):cache-$(shell cat \
$(MONOREPO)/libraries/logger/package.json \
$(MONOREPO)/libraries/metrics/package.json \
$(MONOREPO)/libraries/o-error/package.json \
$(MONOREPO)/libraries/overleaf-editor-core/package.json \
$(MONOREPO)/libraries/promise-utils/package.json \
$(MONOREPO)/libraries/settings/package.json \
$(MONOREPO)/libraries/stream-utils/package.json \
+86 -18
View File
@@ -220,15 +220,45 @@ function notifyCLSICacheAboutBuild({
enqueue([{ path: 'output.tar.gz' }])
})
.catch(err => {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId, buildId, shard },
'build output.tar.gz for clsi-cache failed'
)
})
copyHistorySnapshot({ projectId, userId, buildId })
.then(() => {
enqueue([{ path: 'history-resync.json.gz' }])
})
.catch(err => {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId, buildId, shard },
'copy history-resync.json.gz for clsi-cache failed'
)
})
return shard
}
/**
* @param {Object} opts
* @param {string} opts.projectId
* @param {string} opts.userId
* @param {string} opts.buildId
* @return {Promise<void>}
*/
async function copyHistorySnapshot({ projectId, userId, buildId }) {
const src = Path.join(
Settings.path.clsiCacheDir,
userId ? `${projectId}-${userId}` : projectId,
'history.json.gz'
)
const outputDir = getOutputDir({ projectId, userId, buildId })
const dst = Path.join(outputDir, 'history-resync.json.gz')
await fs.promises.cp(src, dst)
}
/**
* @param {Object} opts
* @param {string} opts.projectId
@@ -239,12 +269,7 @@ function notifyCLSICacheAboutBuild({
*/
async function buildTarball({ projectId, userId, buildId, outputFiles }) {
const timer = new Metrics.Timer('clsi_cache_build', 1, {}, TIMING_BUCKETS)
const outputDir = Path.join(
Settings.path.outputDir,
userId ? `${projectId}-${userId}` : projectId,
CACHE_SUBDIR,
buildId
)
const outputDir = getOutputDir({ projectId, userId, buildId })
const files = outputFiles.filter(f => !isExtraneousFile(f.path))
if (files.length > MAX_ENTRIES_IN_OUTPUT_TAR) {
@@ -287,6 +312,33 @@ async function downloadOutputDotSynctexFromCompileCache(
buildId,
outputDir
) {
const requestPath = `/project/${projectId}/${
userId ? `user/${userId}/` : ''
}build/${editorId}-${buildId}/search/output/output.synctex.gz`
return await downloadSingleFile(projectId, requestPath, outputDir, 'synctex')
}
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheDir
* @return {Promise<boolean>}
*/
async function downloadHistorySnapshot(projectId, userId, cacheDir) {
const requestPath = `/project/${projectId}/${
userId ? `user/${userId}/` : ''
}latest/output/history-resync.json.gz`
return await downloadSingleFile(projectId, requestPath, cacheDir, 'snapshot')
}
/**
* @param {string} projectId
* @param {string} requestPath
* @param {string} outputDir
* @param {string} label
* @return {Promise<boolean>}
*/
async function downloadSingleFile(projectId, requestPath, outputDir, label) {
if (!Settings.apis.clsiCache.enabled) return false
if (!OBJECT_ID_REGEX.test(projectId)) return false
const shardCfg = getAvailableShard(projectId)
@@ -296,20 +348,17 @@ async function downloadOutputDotSynctexFromCompileCache(
const timer = new Metrics.Timer(
'clsi_cache_download',
1,
{ method: 'synctex' },
{ method: label },
TIMING_BUCKETS
)
const u = new URL(url)
u.pathname = requestPath
let stream
try {
stream = await fetchStream(
`${url}/project/${projectId}/${
userId ? `user/${userId}/` : ''
}build/${editorId}-${buildId}/search/output/output.synctex.gz`,
{
method: 'GET',
signal: AbortSignal.timeout(TIMEOUT),
}
)
stream = await fetchStream(u, {
method: 'GET',
signal: AbortSignal.timeout(TIMEOUT),
})
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
closeCircuitBreaker(url)
@@ -321,13 +370,14 @@ async function downloadOutputDotSynctexFromCompileCache(
throw OError.tag(err, 'download failed', { shard })
}
await fs.promises.mkdir(outputDir, { recursive: true })
const dst = Path.join(outputDir, 'output.synctex.gz')
const name = Path.basename(requestPath)
const dst = Path.join(outputDir, name)
const tmp = dst + crypto.randomUUID()
try {
await pipeline(
stream,
new MeteredStream(Metrics, 'clsi_cache_egress', {
path: 'output.synctex.gz',
path: name,
}),
fs.createWriteStream(tmp)
)
@@ -437,8 +487,25 @@ async function downloadLatestCompileCache(projectId, userId, compileDir) {
return !abort
}
/**
* @param {Object} opts
* @param {string} opts.projectId
* @param {string} opts.userId
* @param {string} opts.buildId
* @return {string}
*/
function getOutputDir({ projectId, userId, buildId }) {
return Path.join(
Settings.path.outputDir,
userId ? `${projectId}-${userId}` : projectId,
CACHE_SUBDIR,
buildId
)
}
/**
* @param {unknown} err
* @return {boolean}
*/
function isENOENT(err) {
return err instanceof Error && 'code' in err && err.code === 'ENOENT'
@@ -447,5 +514,6 @@ function isENOENT(err) {
export default {
notifyCLSICacheAboutBuild,
downloadLatestCompileCache,
downloadHistorySnapshot,
downloadOutputDotSynctexFromCompileCache,
}
+7 -2
View File
@@ -40,7 +40,7 @@ function compile(req, res, next) {
stats,
timings,
(error, result) => {
let { buildId, outputFiles } = result || {}
let { buildId, outputFiles, baseHistoryVersion } = result || {}
let code, status
if (outputFiles == null) {
outputFiles = []
@@ -50,7 +50,7 @@ function compile(req, res, next) {
status = 'compile-in-progress'
} else if (error instanceof Errors.FilesOutOfSyncError) {
code = 409 // Http 409 Conflict
status = 'retry'
status = 'conflict'
logger.warn(
{
projectId: request.project_id,
@@ -58,6 +58,10 @@ function compile(req, res, next) {
},
'files out of sync, please retry'
)
} else if (error instanceof Errors.MissingUpdatesError) {
code = 409
status = 'missing-updates'
baseHistoryVersion = error.info.baseHistoryVersion
} else if (
error?.code === 'EPIPE' ||
error instanceof Errors.TooManyCompileRequestsError
@@ -146,6 +150,7 @@ function compile(req, res, next) {
compile: {
status,
error: error?.message || error,
baseHistoryVersion,
stats,
timings,
buildId,
+20 -7
View File
@@ -22,6 +22,7 @@ import StatsManager from './StatsManager.js'
import SafeReader from './SafeReader.js'
import LatexMetrics from './LatexMetrics.js'
import { callbackifyMultiResult } from '@overleaf/promise-utils'
import * as HistoryResourceWriter from './HistoryResourceWriter.js'
const { downloadLatestCompileCache, downloadOutputDotSynctexFromCompileCache } =
CLSICacheHandler
@@ -104,13 +105,24 @@ async function doCompile(request, stats, timings) {
'syncing resources to disk'
)
let resourceList
let resourceList, baseHistoryVersion
try {
// NOTE: resourceList is insecure, it should only be used to exclude files from the output list
resourceList = await ResourceWriter.promises.syncResourcesToDisk(
request,
compileDir
)
if (request.historyId) {
;({ resourceList, baseHistoryVersion } =
await HistoryResourceWriter.syncResourcesToDisk(
projectId,
userId,
request,
compileDir,
timings
))
} else {
// NOTE: resourceList is insecure, it should only be used to exclude files from the output list
resourceList = await ResourceWriter.promises.syncResourcesToDisk(
request,
compileDir
)
}
} catch (error) {
if (error instanceof Errors.FilesOutOfSyncError) {
OError.tag(error, 'files out of sync, please retry', {
@@ -326,7 +338,7 @@ async function doCompile(request, stats, timings) {
)
}
return { outputFiles, buildId }
return { outputFiles, buildId, baseHistoryVersion }
}
async function _saveOutputFiles({
@@ -837,6 +849,7 @@ function _emitMetrics(request, status, stats, timings) {
if (timings.compileE2E != null) {
ClsiMetrics.e2eCompileDurationSeconds.observe(
{
compileFromHistory: !!request.historyId,
compile: request.metricsOpts.compile,
group: request.compileGroup,
},
+2
View File
@@ -33,6 +33,7 @@ export class TimedOutError extends OError {}
export class NoXrefTableError extends OError {}
export class TooManyCompileRequestsError extends OError {}
export class InvalidParameter extends OError {}
export class MissingUpdatesError extends OError {}
export default {
QueueLimitReachedError,
@@ -43,4 +44,5 @@ export default {
NoXrefTableError,
TooManyCompileRequestsError,
InvalidParameter,
MissingUpdatesError,
}
@@ -0,0 +1,564 @@
// @ts-check
import logger from '@overleaf/logger'
import zlib from 'node:zlib'
import Settings from '@overleaf/settings'
import Path from 'node:path'
import fs from 'node:fs'
import CLSICacheHandler from './CLSICacheHandler.js'
import Errors from './Errors.js'
import { callbackify, promisify } from 'node:util'
import {
AddFileOperation,
Change,
EditFileOperation,
File,
MoveFileOperation,
Snapshot,
} from 'overleaf-editor-core'
import { fetchString, RequestFailedError } from '@overleaf/fetch-utils'
import { setTimeout } from 'node:timers/promises'
import ResourceWriter from './ResourceWriter.js'
import UrlCache from './UrlCache.js'
import OError from '@overleaf/o-error'
import ClsiMetrics from './Metrics.js'
import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
const gzip = promisify(zlib.gzip)
const gunzip = promisify(zlib.gunzip)
export const clearCacheCb = callbackify(clearCache)
/**
* @param {string} projectId
* @param {string} userId
* @return {Promise<void>}
*/
export async function clearCache(projectId, userId) {
const { dir } = snapshotPath(projectId, userId)
try {
await fs.promises.rm(dir, { recursive: true, force: true })
} catch (err) {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId },
'compile from cache: failed to clear history cache'
)
}
}
/**
* @param {string} projectId
* @param {string} userId
* @return {{ dir: string, path: string, resyncPath: string }}
*/
function snapshotPath(projectId, userId) {
const dir = Path.join(
Settings.path.clsiCacheDir,
userId ? `${projectId}-${userId}` : projectId
)
const path = Path.join(dir, 'history.json.gz')
const resyncPath = Path.join(dir, 'history-resync.json.gz')
return { dir, path, resyncPath }
}
/**
* @param {unknown} err
* @return {boolean}
*/
function isENOENT(err) {
return err instanceof Error && 'code' in err && err.code === 'ENOENT'
}
/**
* @param {string} projectId
* @param {string} userId
* @param {number} remoteBaseVersion
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number}>}
*/
async function loadSnapshot(projectId, userId, remoteBaseVersion) {
const { path, resyncPath } = snapshotPath(projectId, userId)
let maxLocalBaseVersion = -1
for (const candidate of [path, resyncPath]) {
try {
const fullSync = candidate === resyncPath
return await loadSnapshotFromFile(candidate, remoteBaseVersion, fullSync)
} catch (err) {
if (err instanceof Errors.MissingUpdatesError) {
maxLocalBaseVersion = Math.max(
maxLocalBaseVersion,
err.info.baseHistoryVersion
)
} else if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: cannot read history from disk'
)
}
}
}
try {
return await loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion)
} catch (err) {
if (err instanceof Errors.MissingUpdatesError) {
maxLocalBaseVersion = Math.max(
maxLocalBaseVersion,
err.info.baseHistoryVersion
)
} else if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: cannot download from clsi-cache'
)
}
}
throw new Errors.MissingUpdatesError('needs more updates', {
baseHistoryVersion: maxLocalBaseVersion,
})
}
/**
* @param {string} projectId
* @param {string} userId
* @param {number} remoteBaseVersion
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number}>}
*/
async function loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion) {
const { dir, resyncPath } = snapshotPath(projectId, userId)
await fs.promises.mkdir(dir, { recursive: true })
const ok = await CLSICacheHandler.downloadHistorySnapshot(
projectId,
userId,
dir
)
if (!ok) {
throw new Errors.MissingUpdatesError('needs full sync', {
baseHistoryVersion: -1,
})
}
logger.debug(
{ projectId, userId },
'compile from cache: restored history from clsi-cache'
)
return await loadSnapshotFromFile(resyncPath, remoteBaseVersion, true)
}
/**
* @param {string} path
* @param {number} remoteBaseVersion
* @param {boolean} fullSync
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], localBaseVersion: number, fullSync: boolean}>}
*/
async function loadSnapshotFromFile(path, remoteBaseVersion, fullSync) {
let blob = await fs.promises.readFile(path)
blob = await gunzip(blob)
const { rawSnapshot, globalBlobs, localBaseVersion } = JSON.parse(
blob.toString('utf-8')
)
if (localBaseVersion < remoteBaseVersion) {
throw new Errors.MissingUpdatesError('missing updates', {
baseHistoryVersion: localBaseVersion,
})
}
return { rawSnapshot, globalBlobs, localBaseVersion, fullSync }
}
/**
* @param {string} projectId
* @param {string} userId
* @param {Snapshot} snapshot
* @param {number} localBaseVersion
* @param {string[]} globalBlobs
* @return {Promise<void>}
*/
async function saveSnapshot(
projectId,
userId,
snapshot,
localBaseVersion,
globalBlobs
) {
const { dir, path } = snapshotPath(projectId, userId)
await fs.promises.mkdir(dir, { recursive: true })
const tmp = path + '~'
await fs.promises.writeFile(
tmp,
await gzip(
JSON.stringify({
globalBlobs,
localBaseVersion,
rawSnapshot: snapshot.toRaw(),
})
),
{ flag: 'wx' }
)
await fs.promises.rename(tmp, path)
}
/**
* @param {string} projectId
* @param {string} userId
* @return {Promise<void>}
*/
async function deleteResyncSnapshot(projectId, userId) {
const { resyncPath } = snapshotPath(projectId, userId)
try {
await fs.promises.unlink(resyncPath)
} catch (err) {
if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: failed to clear history-resync.json.gz'
)
}
}
}
/**
* @param {string} compileDir
* @param {string} subDir
* @param {Map<string, boolean>} entries
* @return {Promise<Map<string, boolean>>}
*/
async function discoverExistingEntries(
compileDir,
subDir = '.',
entries = new Map()
) {
const dirents = await fs.promises.readdir(Path.join(compileDir, subDir), {
withFileTypes: true,
})
for (const dirent of dirents) {
const path = Path.join(subDir, dirent.name)
if (dirent.isDirectory()) {
await discoverExistingEntries(compileDir, path, entries)
} else if (dirent.isFile()) {
entries.set(path, false)
} else if (
dirent.isSymbolicLink() ||
dirent.isFIFO() ||
dirent.isSocket()
) {
// should not happen, delete right away
logger.warn(
{ compileDir, subDir, dirent },
'compile from cache: found blocked dirent'
)
await fs.promises.unlink(Path.join(compileDir, path))
} else {
throw new OError('unexpected dir entry', { compileDir, subDir, dirent })
}
}
entries.set(subDir, true)
return entries
}
/**
* @param {string} compileDir
* @param {Snapshot} snapshot
* @param {Map<string, boolean>} entriesDepthFirst
*/
async function removeExtraneousEntries(
compileDir,
snapshot,
entriesDepthFirst
) {
const keepFolders = new Set(['.'])
for (const [path, isDir] of entriesDepthFirst) {
const shouldBeFile = !!snapshot.getFile(path)
if (isDir) {
if (!shouldBeFile) {
// directory can stay directory
if (keepFolders.has(path)) {
// folder is still in use
keepFolders.add(Path.dirname(path))
} else {
// empty folder
await fs.promises.rmdir(Path.join(compileDir, path))
entriesDepthFirst.delete(path)
}
continue
}
// a folder turned into a file
// before: foo/bar.txt/baz.txt
// ^^^^^^^ folder
// now: foo/bar.txt
// ^^^^^^^ file
const needle = path + '/'
for (const [child, childIsDir] of entriesDepthFirst) {
if (!child.startsWith(needle)) continue
if (childIsDir) {
await fs.promises.rmdir(Path.join(compileDir, child))
} else {
await fs.promises.unlink(Path.join(compileDir, child))
}
entriesDepthFirst.delete(child)
}
await fs.promises.rmdir(Path.join(compileDir, path))
entriesDepthFirst.delete(path)
continue
}
if (shouldBeFile || !ResourceWriter.isExtraneousFile(path)) {
// resource or cached file
keepFolders.add(Path.dirname(path))
continue
}
await fs.promises.unlink(Path.join(compileDir, path))
entriesDepthFirst.delete(path)
}
}
/**
* @param {string} compileDir
* @param {string} path
* @param {Map<string, boolean>} entriesDepthFirst
*/
async function ensureHasParentFolder(compileDir, path, entriesDepthFirst) {
const parentFolderPath = Path.dirname(path)
if (entriesDepthFirst.has(parentFolderPath)) return
await ensureHasParentFolder(compileDir, parentFolderPath, entriesDepthFirst)
await fs.promises.mkdir(Path.join(compileDir, parentFolderPath))
entriesDepthFirst.set(parentFolderPath, true)
}
/**
* @param {import('overleaf-editor-core/lib/types.js').RawOperation[][]} raw
* @return {Change[]}
*/
function changesFromRawChangeOperations(raw) {
return raw.map(o => Change.mustFromRaw({ operations: o, timestamp: '0' }))
}
/**
* @param {string} projectId
* @param {string} userId
* @param {Object} request
* @param {string} compileDir
* @param {Record<string, number>} timings
* @return {Promise<{baseHistoryVersion: number, resourceList: {path: string}[]}>}
*/
export async function syncResourcesToDisk(
projectId,
userId,
request,
compileDir,
timings
) {
const remoteBaseVersion = request.baseHistoryVersion
let rawSnapshot, globalBlobs, localBaseVersion, source
let fullSync = true
try {
;({ rawSnapshot, globalBlobs, fullSync, localBaseVersion } =
await loadSnapshot(projectId, userId, remoteBaseVersion))
source = fullSync ? 'clsi-cache' : 'local'
logger.debug(
{ projectId, userId, localBaseVersion, remoteBaseVersion },
'compile from cache: using existing snapshot'
)
} catch (err) {
if (!request.rawSnapshot) throw err
if (!(err instanceof Errors.MissingUpdatesError)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: bad local history state during full resync'
)
}
logger.debug(
{ projectId, userId },
'compile from cache: using incoming snapshot'
)
source = 'remote'
localBaseVersion = remoteBaseVersion
rawSnapshot = request.rawSnapshot
globalBlobs = []
}
globalBlobs = Array.from(new Set(globalBlobs.concat(request.globalBlobs)))
const snapshot = Snapshot.fromRaw(rawSnapshot)
const changes = changesFromRawChangeOperations(
request.rawChangeOperations.slice(localBaseVersion - remoteBaseVersion)
)
const applyAllStart = performance.now()
snapshot.applyAll(changes)
timings.snapshotApplyAll = Math.ceil(performance.now() - applyAllStart)
if (!ClsiMetrics.shouldSkipMetrics(request)) {
ClsiMetrics.snapshotApplyAllDurationSeconds.observe(
{ group: request.compileGroup, source },
timings.snapshotApplyAll / 1_000
)
}
const entriesDepthFirst = await discoverExistingEntries(compileDir)
await removeExtraneousEntries(compileDir, snapshot, entriesDepthFirst)
const changedPaths = []
if (fullSync) {
changedPaths.push(...snapshot.getFilePathnames())
logger.debug({ projectId, userId }, 'compile from cache: full sync')
} else {
const dedupe = new Set()
for (const change of changes) {
for (const operation of change.getOperations()) {
if (operation instanceof AddFileOperation) {
dedupe.add(operation.pathname)
} else if (operation instanceof MoveFileOperation) {
dedupe.add(operation.pathname)
if (!operation.isRemoveFile()) dedupe.add(operation.newPathname)
} else if (operation instanceof EditFileOperation) {
dedupe.add(operation.pathname)
}
}
}
// Restore deleted files
for (const path of snapshot.getFilePathnames()) {
if (!entriesDepthFirst.has(path)) dedupe.add(path)
}
changedPaths.push(...dedupe)
logger.debug(
{ projectId, userId, changedPaths },
'compile from cache: incremental sync'
)
}
const blobStore = new BlobStore(request.historyId, globalBlobs)
const loadEagerStart = performance.now()
await snapshot.loadFiles('eager', blobStore)
timings.snapshotLoadEager = Math.ceil(performance.now() - loadEagerStart)
if (!ClsiMetrics.shouldSkipMetrics(request)) {
ClsiMetrics.snapshotLoadEagerDurationSeconds.observe(
{ group: request.compileGroup, source },
timings.snapshotLoadEager / 1_000
)
}
for (const path of changedPaths) {
const file = snapshot.getFile(path)
if (!file) continue // deleted, handled by removeExtraneousEntries
await ensureHasParentFolder(compileDir, path, entriesDepthFirst)
}
let createCacheFolder
// Use Promise.allSettled to ensure that all writes have stopped when we exit.
const allDone = await promiseMapSettledWithLimit(
Settings.parallelFileDownloads,
changedPaths,
async path => {
const file = snapshot.getFile(path)
if (!file) return // deleted, handled by removeExtraneousEntries
const content = file.getContent({ filterTrackedDeletes: true })
if (typeof content === 'string') {
await fs.promises.writeFile(
Path.join(compileDir, path),
content,
'utf-8'
)
} else {
const hash = file.getHash()
if (!hash) {
throw new OError('unexpected file without content and hash', { path })
}
const fallbackURL = null // no fallback
const lastModified = new Date(0) // content is static
if (!createCacheFolder) {
createCacheFolder = UrlCache.promises.createProjectDir(projectId)
}
await createCacheFolder
await UrlCache.promises.downloadUrlToFile(
projectId,
blobStore.getBlobURL(hash).href,
fallbackURL,
Path.join(compileDir, path),
lastModified
)
}
}
)
for (const [idx, result] of allDone.entries()) {
if (result.status === 'fulfilled') continue
const path = changedPaths[idx]
throw OError.tag(result.reason, 'write failed', { path })
}
const baseHistoryVersion = localBaseVersion + changes.length
if (fullSync || changes.length) {
await saveSnapshot(
projectId,
userId,
snapshot,
baseHistoryVersion,
globalBlobs
)
}
if (fullSync) {
await deleteResyncSnapshot(projectId, userId)
}
return {
baseHistoryVersion,
resourceList: snapshot.getFilePathnames().map(path => ({ path })),
}
}
class BlobStore {
/** @type {string} */
#historyId
/** @type {string[]} */
#globalBlobs
/**
* @param {string} historyId
* @param {string[]} globalBlobs
*/
constructor(historyId, globalBlobs) {
this.#historyId = historyId
this.#globalBlobs = globalBlobs
}
/**
* @param {string} hash
* @return {URL}
*/
getBlobURL(hash) {
const u = new URL(Settings.apis.filestore.url)
if (this.#globalBlobs.includes(hash)) {
u.pathname = `/history/global/hash/${hash}`
} else {
u.pathname = `/history/project/${this.#historyId}/hash/${hash}`
}
return u
}
/**
* @param {string} hash
* @return {Promise<string>}
*/
async getString(hash) {
if (hash === File.EMPTY_FILE_HASH) return ''
const u = this.getBlobURL(hash)
let remainingAttempts = 3
while (true) {
try {
return await fetchString(u, { signal: AbortSignal.timeout(3_000) })
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new Errors.NotFoundError()
}
remainingAttempts--
if (remainingAttempts <= 0) throw err
logger.warn(
{ err, url: u.href, remainingAttempts },
'compile from cache: history blob download failed'
)
await setTimeout(100)
}
}
}
/**
* @param {string} hash
* @return {Promise<any>}
*/
async getObject(hash) {
const string = await this.getString(hash)
return JSON.parse(string)
}
}
+17 -1
View File
@@ -31,7 +31,7 @@ const e2eCompileDurationSeconds = new prom.Histogram({
name: 'clsi_e2e_compile_duration_seconds',
help: 'Duration of the entire compile request in clsi (sync, latexmk, output)',
buckets: COMPILE_TIME_BUCKETS,
labelNames: ['compile', 'group'],
labelNames: ['compile', 'group', 'compileFromHistory'],
})
const e2eCompileDurationClsiPerfSeconds = new prom.Gauge({
@@ -68,6 +68,20 @@ const imageProcessingDurationSeconds = new prom.Histogram({
labelNames: ['group', 'type'],
})
const snapshotApplyAllDurationSeconds = new prom.Histogram({
name: 'clsi_snapshot_applyAll_duration_seconds',
help: 'Time spent applying snapshot changes',
buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
labelNames: ['group', 'source'],
})
const snapshotLoadEagerDurationSeconds = new prom.Histogram({
name: 'clsi_snapshot_load_eager_duration_seconds',
help: 'Time spent loading string blobs for snapshot',
buckets: [0.01, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50],
labelNames: ['group', 'source'],
})
function shouldSkipMetrics(request) {
return ['clsi-perf', 'health-check', 'clsi-cache-template'].includes(
request.metricsOpts.path
@@ -83,5 +97,7 @@ export default {
processOutputFilesDurationSeconds,
latexmkRuleDurationSeconds,
imageProcessingDurationSeconds,
snapshotApplyAllDurationSeconds,
snapshotLoadEagerDurationSeconds,
shouldSkipMetrics,
}
@@ -93,10 +93,11 @@ export default {
)
return outputFiles.filter(
// Ignore the pdf, clsi-cache tar-ball and also ignore the files ignored by the frontend.
// Ignore the pdf, clsi-cache tar-ball, history snapshot blob and also ignore the files ignored by the frontend.
({ path }) =>
path !== 'output.pdf' &&
path !== 'output.tar.gz' &&
path !== 'history-resync.json.gz' &&
!ignoreFiles.includes(path)
)
} catch (error) {
@@ -16,6 +16,7 @@ import Settings from '@overleaf/settings'
import { callbackify } from 'node:util'
import Path from 'node:path'
import fs from 'node:fs'
import * as HistoryResourceWriter from './HistoryResourceWriter.js'
let ProjectPersistenceManager
const oneDay = 24 * 60 * 60 * 1000
@@ -204,19 +205,15 @@ export default ProjectPersistenceManager = {
}
logger.debug({ projectId, userId }, 'clearing project for user')
return CompileManager.clearProject(projectId, userId, function (error) {
if (error != null) {
return callback(error)
}
return ProjectPersistenceManager.clearProjectFromCache(
projectId,
{ reason: 'cleared' },
function (error) {
if (error != null) {
return callback(error)
}
return callback()
}
)
if (error) return callback(error)
HistoryResourceWriter.clearCacheCb(projectId, userId, error => {
if (error) return callback(error)
ProjectPersistenceManager.clearProjectFromCache(
projectId,
{ reason: 'cleared' },
callback
)
})
})
},
+28 -2
View File
@@ -4,6 +4,7 @@ import OutputCacheManager from './OutputCacheManager.js'
const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex']
const MAX_TIMEOUT = 600
const EDITOR_ID_REGEX = /^[a-f0-9-]{36}$/ // UUID
const HISTORY_ID_REGEX = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/ // mongo id or postgres id
function parse(body, callback) {
const response = {}
@@ -112,7 +113,12 @@ function parse(body, callback) {
// resources (full) or only those resources to be updated
// in-place (incremental).
response.syncType = _parseAttribute('syncType', compile.options.syncType, {
validValues: ['full', 'incremental'],
validValues: [
'full',
'incremental',
'history-full',
'history-incremental',
],
type: 'string',
})
@@ -139,6 +145,22 @@ function parse(body, callback) {
response.resources = (compile.resources || []).map(resource =>
_parseResource(resource)
)
response.historyId = _parseAttribute(
'historyId',
compile.options.historyId,
{ type: 'string', regex: HISTORY_ID_REGEX }
)
response.baseHistoryVersion = _parseAttribute(
'baseHistoryVersion',
compile.baseHistoryVersion,
{ type: 'number' }
)
response.globalBlobs = _parseAttribute('globalBlobs', compile.globalBlobs, {
type: 'array',
})
// The snapshot and changes are validated when loading them in editor-core.
response.rawSnapshot = compile.rawSnapshot
response.rawChangeOperations = compile.rawChangeOperations
const rootResourcePath = _parseAttribute(
'rootResourcePath',
@@ -216,7 +238,11 @@ function _parseAttribute(name, attribute, options) {
)
}
}
if (options.type != null) {
if (options.type === 'array') {
if (!Array.isArray(attribute)) {
throw new Error(`${name} attribute should be an array`)
}
} else if (options.type != null) {
// eslint-disable-next-line valid-typeof
if (typeof attribute !== options.type) {
throw new Error(`${name} attribute should be a ${options.type}`)
@@ -62,6 +62,11 @@ module.exports = {
({ zone, readOnly }) => zone === process.env.ZONE && !readOnly
),
},
filestore: {
url:
process.env.FILESTORE_DOMAIN_OVERRIDE ||
`http://${process.env.FILESTORE_HOST || '127.0.0.1'}:3009`,
},
},
smokeTest: process.env.SMOKE_TEST || false,
+1
View File
@@ -30,6 +30,7 @@
"dockerode": "^4.0.9",
"express": "4.22.1",
"lodash": "^4.17.21",
"overleaf-editor-core": "*",
"p-limit": "^3.1.0",
"request": "2.88.2",
"send": "^0.19.0",
@@ -148,6 +148,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -176,6 +177,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -224,6 +226,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
})
@@ -272,6 +275,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
})
@@ -304,6 +308,7 @@ describe('CompileController', () => {
stats: ctx.stats,
timings: ctx.timings,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -339,6 +344,7 @@ describe('CompileController', () => {
// JSON.stringify will omit these undefined values
buildId: undefined,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -373,6 +379,7 @@ describe('CompileController', () => {
// JSON.stringify will omit these undefined values
buildId: undefined,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -405,6 +412,7 @@ describe('CompileController', () => {
// JSON.stringify will omit these undefined values
buildId: undefined,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -21,6 +21,14 @@ describe('ProjectPersistenceManager', () => {
default: (ctx.UrlCache = {}),
}))
vi.doMock(
'../../../app/js/HistoryResourceWriter',
() =>
(ctx.HistoryResourceWriter = {
clearCacheCb: sinon.stub().yields(null),
})
)
vi.doMock('../../../app/js/CompileManager', () => ({
default: (ctx.CompileManager = {}),
}))
@@ -163,6 +171,13 @@ describe('ProjectPersistenceManager', () => {
.should.equal(true)
})
it('should clear the history cache', ctx => {
ctx.HistoryResourceWriter.clearCacheCb.should.have.been.calledWith(
ctx.project_id,
ctx.user_id
)
})
it('should clear all the cached Urls for the project', ctx => {
return ctx.UrlCache.clearProject
.calledWith(ctx.project_id)
@@ -494,7 +494,8 @@ describe('RequestParser', () => {
it('should return an error', ctx => {
ctx.callback
.calledWithMatch({
message: 'syncType attribute should be one of: full, incremental',
message:
'syncType attribute should be one of: full, incremental, history-full, history-incremental',
})
.should.equal(true)
})
@@ -23,6 +23,10 @@ import ClsiCacheHandler from './ClsiCacheHandler.mjs'
import HistoryManager from '../History/HistoryManager.mjs'
import SplitTestHandler from '../SplitTests/SplitTestHandler.mjs'
import AnalyticsManager from '../Analytics/AnalyticsManager.mjs'
import RedisWrapper from '../../infrastructure/RedisWrapper.mjs'
// use the redis db with eviction policy enabled
const rclient = RedisWrapper.client('clsi_cookie')
const ClsiCookieManager = ClsiCookieManagerFactory(
Settings.apis.clsi?.backendGroupName
@@ -38,6 +42,41 @@ const CLSI_COOKIES_ENABLED = (Settings.clsiCookie?.key ?? '') !== ''
// The timeout in services/clsi/app.js is 10 minutes, so we'll be on the safe side with 12 minutes
const COMPILE_REQUEST_TIMEOUT_MS = 12 * 60 * 1000
function _baseHistoryVersionKey(projectId, userId) {
return `baseHistoryVersion:${projectId}:${userId}`
}
async function getBaseHistoryVersion(projectId, userId) {
let v
try {
v = await rclient.get(_baseHistoryVersionKey(projectId, userId))
} catch (err) {
logger.warn({ err, projectId, userId }, 'failed to get baseHistoryVersion')
return -1
}
if (!v) return -1
const n = parseInt(v, 10)
if (Number.isNaN(n)) return -1
return n
}
async function setBaseHistoryVersion(projectId, userId, baseHistoryVersion) {
const clsiCacheExpiryInSeconds = 8 * 24 * 60 * 60 // 8 days
try {
await rclient.setex(
_baseHistoryVersionKey(projectId, userId),
clsiCacheExpiryInSeconds,
baseHistoryVersion
)
} catch (err) {
logger.warn({ err, projectId, userId }, 'failed to set baseHistoryVersion')
}
}
async function clearBaseHistoryVersion(projectId, userId) {
await rclient.del(_baseHistoryVersionKey(projectId, userId))
}
function getNewCompileBackendClass(projectId, compileBackendClass) {
// Sample x% of projects to move up one bracket.
if (
@@ -111,7 +150,13 @@ async function sendRequest(projectId, userId, options) {
options = {}
}
let result = await sendRequestOnce(projectId, userId, options)
if (result.status === 'conflict') {
if (result.status === 'missing-updates') {
// try again with updated baseline
result = await sendRequestOnce(projectId, userId, {
...options,
baseHistoryVersion: result.baseHistoryVersion,
})
} else if (result.status === 'conflict') {
// Try again, with a full compile
result = await sendRequestOnce(projectId, userId, {
...options,
@@ -130,7 +175,7 @@ async function sendRequest(projectId, userId, options) {
async function sendRequestOnce(projectId, userId, options) {
let req
try {
req = await _buildRequest(projectId, options)
req = await _buildRequest(projectId, userId, options)
} catch (err) {
if (err.message === 'no main file specified') {
return {
@@ -178,6 +223,16 @@ async function stopCompile(projectId, userId, options) {
)
}
/**
* @param {PromiseSettledResult} result
* @private
*/
function _throwIfRejected(result) {
if (result.status === 'rejected') {
throw result.reason
}
}
async function deleteAuxFiles(projectId, userId, options, clsiserverid) {
if (options == null) {
options = {}
@@ -189,37 +244,42 @@ async function deleteAuxFiles(projectId, userId, options, clsiserverid) {
projectId,
userId
)
const opts = {
method: 'DELETE',
}
try {
await _makeRequestWithClsiServerId(
const [
clsiResult,
clsiCacheResult,
documentUpdaterResult,
clsiServerIdResult,
baseHistoryVersionResult,
] = await Promise.allSettled([
_makeRequestWithClsiServerId(
projectId,
userId,
compileGroup,
compileBackendClass,
url,
opts,
{ method: 'DELETE' },
clsiserverid
),
ClsiCacheHandler.clearCache(projectId, userId),
DocumentUpdaterHandler.promises.clearProjectState(projectId),
clearClsiServerId(projectId, userId, compileBackendClass),
clearBaseHistoryVersion(projectId, userId),
])
if (clsiCacheResult.status === 'rejected') {
logger.warn(
{ err: clsiCacheResult.reason, projectId, userId },
'purge clsi-cache failed'
)
} finally {
// always clear the clsi-cache
try {
await ClsiCacheHandler.clearCache(projectId, userId)
} catch (err) {
logger.warn({ err, projectId, userId }, 'purge clsi-cache failed')
}
// always clear the project state from the docupdater, even if there
// was a problem with the request to the clsi
try {
await DocumentUpdaterHandler.promises.clearProjectState(projectId)
} finally {
// always clear the clsi server id, even if prior actions failed
await clearClsiServerId(projectId, userId, compileBackendClass)
}
}
if (baseHistoryVersionResult.status === 'rejected') {
logger.warn(
{ err: baseHistoryVersionResult.reason, projectId, userId },
'failed to clear baseHistoryVersion'
)
}
_throwIfRejected(clsiResult)
_throwIfRejected(documentUpdaterResult)
_throwIfRejected(clsiServerIdResult)
}
async function _sendBuiltRequest(projectId, userId, req, options) {
@@ -254,6 +314,9 @@ async function _sendBuiltRequest(projectId, userId, req, options) {
)
collectMetricsOnBlgFiles(outputFiles)
const compile = response?.compile || {}
if (compile.baseHistoryVersion) {
await setBaseHistoryVersion(projectId, userId, compile.baseHistoryVersion)
}
return {
status: compile.status,
outputFiles,
@@ -263,6 +326,7 @@ async function _sendBuiltRequest(projectId, userId, req, options) {
timings: compile.timings,
outputUrlPrefix: compile.outputUrlPrefix,
clsiCacheShard: compile.clsiCacheShard,
baseHistoryVersion: compile.baseHistoryVersion,
}
}
@@ -604,6 +668,12 @@ async function _postToClsi(
if (err.response.status === 413) {
return { response: { compile: { status: 'project-too-large' } } }
} else if (err.response.status === 409) {
try {
const body = JSON.parse(err.body || '{}')
if (body.compile?.status === 'missing-updates') {
return { response: body }
}
} catch {}
return { response: { compile: { status: 'conflict' } } }
} else if (err.response.status === 423) {
return { response: { compile: { status: 'compile-in-progress' } } }
@@ -657,13 +727,12 @@ function _parseOutputFiles(projectId, rawOutputFiles = []) {
return outputFiles
}
async function _buildRequest(projectId, options) {
async function _buildRequest(projectId, userId, options) {
const project = await ProjectGetter.promises.getProject(projectId, {
compiler: 1,
rootDoc_id: 1,
imageName: 1,
rootFolder: 1,
'overleaf.history.id': 1,
...(options.compileFromHistory ? {} : { rootDoc_id: 1, rootFolder: 1 }),
})
if (project == null) {
throw new Errors.NotFoundError(`project does not exist: ${projectId}`)
@@ -671,6 +740,31 @@ async function _buildRequest(projectId, options) {
if (!VALID_COMPILERS.includes(project.compiler)) {
project.compiler = 'pdflatex'
}
const historyId = project.overleaf.history.id
let { baseHistoryVersion } = options
if (options.compileFromHistory && !baseHistoryVersion) {
baseHistoryVersion = await getBaseHistoryVersion(projectId, userId)
}
if (options.compileFromHistory && baseHistoryVersion === -1) {
// full sync
return await _buildRequestFromHistoryFull(
projectId,
historyId,
options,
project
)
} else if (options.compileFromHistory) {
// incremental sync
return await _buildRequestFromHistoryIncremental(
projectId,
historyId,
options,
project,
baseHistoryVersion
)
}
if (options.incrementalCompilesEnabled || options.syncType != null) {
// new way, either incremental or full
@@ -755,6 +849,119 @@ async function getOutputFileStream(
}
}
/**
* @param {import('overleaf-editor-core/lib/types.js').RawChange[]} changes
* @return {import('overleaf-editor-core/lib/types.js').RawOperation[][]}
* @private
*/
function _rawChangeOperationsFromChanges(changes) {
// omit timestamp (required, back-filled in clsi)
// omit authors (optional)
// omit v2Authors (optional)
// omit origin (optional)
// omit projectVersion (optional)
// omit v2DocVersions (optional)
return changes.map(change => change.operations)
}
/**
* @param {import('overleaf-editor-core/lib/types.js').RawOperation[][]} rawChangeOperations
* @return {Set<string>}
* @private
*/
function _collectGlobalBlobs(rawChangeOperations) {
const globalBlobs = new Set()
for (const operations of rawChangeOperations) {
for (const operation of operations) {
const hash = operation?.file?.hash
if (hash && HistoryManager.isGlobalBlob(hash)) {
globalBlobs.add(hash)
}
}
}
return globalBlobs
}
async function _buildRequestFromHistoryFull(
projectId,
historyId,
options,
project
) {
await HistoryManager.promises.flushProject(projectId)
const {
chunk: {
history: { snapshot: rawSnapshot, changes: rawChanges },
startVersion,
},
} = await HistoryManager.promises.getLatestHistory(historyId)
const rawChangeOperations = _rawChangeOperationsFromChanges(rawChanges)
const globalBlobs = _collectGlobalBlobs(rawChangeOperations)
for (const { hash, rangesHash } of Object.values(rawSnapshot.files)) {
if (hash && HistoryManager.isGlobalBlob(hash)) {
globalBlobs.add(hash)
}
if (rangesHash && HistoryManager.isGlobalBlob(rangesHash)) {
globalBlobs.add(rangesHash)
}
}
options = {
...options,
syncType: 'history-full',
historyId,
baseHistoryVersion: startVersion,
rawSnapshot,
rawChangeOperations,
globalBlobs: Array.from(globalBlobs),
}
return _finaliseRequest(projectId, options, project, [], [])
}
async function _buildRequestFromHistoryIncremental(
projectId,
historyId,
options,
project,
baseHistoryVersion
) {
await HistoryManager.promises.flushProject(projectId)
const rawChangeOperations = []
let hasMore = true
let since = baseHistoryVersion
let size = 0
while (hasMore) {
let changes
;({ changes, hasMore } = await HistoryManager.promises.getChanges(
historyId,
{ since }
))
since += changes.length
const newRawChangeOperations = _rawChangeOperationsFromChanges(changes)
size += Buffer.from(JSON.stringify(newRawChangeOperations)).byteLength
if (size > 6.5 * 1024 * 1024) {
// clsi has a payload limit of 7MiB. Do not send too many operations.
// Fall back to sending the latest snapshot instead.
return await _buildRequestFromHistoryFull(
projectId,
historyId,
options,
project
)
}
rawChangeOperations.push(...newRawChangeOperations)
}
const globalBlobs = _collectGlobalBlobs(rawChangeOperations)
options = {
...options,
syncType: 'history-incremental',
historyId,
baseHistoryVersion,
rawChangeOperations,
globalBlobs: Array.from(globalBlobs),
}
return _finaliseRequest(projectId, options, project, [], [])
}
function _buildRequestFromDocupdater(
projectId,
options,
@@ -812,7 +1019,7 @@ async function _getContentFromMongo(projectId) {
function _finaliseRequest(projectId, options, project, docs, files) {
const resources = []
let flags
let rootResourcePath = null
let rootResourcePath = options.rootResourcePath
let rootResourcePathOverride = null
let hasMainFile = false
let numberOfDocsInProject = 0
@@ -883,6 +1090,7 @@ function _finaliseRequest(projectId, options, project, docs, files) {
return {
compile: {
options: {
historyId: options.historyId?.toString(), // send as string, if set
buildId: options.buildId,
editorId: options.editorId,
compiler: project.compiler,
@@ -909,6 +1117,10 @@ function _finaliseRequest(projectId, options, project, docs, files) {
metricsMethod: options.compileGroup,
metricsPath: options.metricsPath,
},
baseHistoryVersion: options.baseHistoryVersion,
rawSnapshot: options.rawSnapshot,
rawChangeOperations: options.rawChangeOperations,
globalBlobs: options.globalBlobs,
rootResourcePath,
resources,
},
@@ -917,7 +1129,7 @@ function _finaliseRequest(projectId, options, project, docs, files) {
async function wordCount(projectId, userId, file, limits, clsiserverid) {
const { compileBackendClass, compileGroup } = limits
const req = await _buildRequest(projectId, limits)
const req = await _buildRequest(projectId, userId, limits)
const filename = file || req.compile.rootResourcePath
const url = _getCompilerUrl(
compileBackendClass,
@@ -52,7 +52,7 @@ function getPdfCachingMinChunkSize(req, res) {
return Settings.pdfCachingMinChunkSize
}
function _getSplitTestOptions(req, res) {
async function _getSplitTestOptions(req, res) {
// Use the query flags from the editor request for overriding the split test.
let query = {}
try {
@@ -61,12 +61,20 @@ function _getSplitTestOptions(req, res) {
} catch (e) {}
const editorReq = { ...req, query }
const { variant } = await SplitTestHandler.promises.getAssignment(
editorReq,
res,
'compile-from-history'
)
const compileFromHistory = variant === 'enabled'
const pdfDownloadDomain = Settings.pdfDownloadDomain
const enablePdfCaching = Settings.enablePdfCaching
if (!enablePdfCaching || !req.query.enable_pdf_caching) {
// The frontend does not want to do pdf caching.
return {
compileFromHistory,
pdfDownloadDomain,
enablePdfCaching: false,
}
@@ -74,6 +82,7 @@ function _getSplitTestOptions(req, res) {
const pdfCachingMinChunkSize = getPdfCachingMinChunkSize(editorReq, res)
return {
compileFromHistory,
pdfDownloadDomain,
enablePdfCaching,
pdfCachingMinChunkSize,
@@ -137,6 +146,7 @@ const _CompileController = {
fileLineErrors,
stopOnFirstError,
editorId: req.body.editorId,
rootResourcePath: req.body.rootResourcePath,
}
if (req.body.rootDoc_id) {
@@ -161,11 +171,16 @@ const _CompileController = {
options.incrementalCompilesEnabled = true
}
let { enablePdfCaching, pdfCachingMinChunkSize, pdfDownloadDomain } =
_getSplitTestOptions(req, res)
let {
enablePdfCaching,
pdfCachingMinChunkSize,
pdfDownloadDomain,
compileFromHistory,
} = await _getSplitTestOptions(req, res)
if (Features.hasFeature('saas')) {
options.compileFromClsiCache = true
options.populateClsiCache = true
options.compileFromHistory = compileFromHistory
}
options.enablePdfCaching = enablePdfCaching
if (enablePdfCaching) {
@@ -84,6 +84,7 @@ async function compile(projectId, userId, options = {}) {
outputUrlPrefix,
buildId,
clsiCacheShard,
baseHistoryVersion,
} = await ClsiManager.promises.sendRequest(projectId, compileAsUser, options)
return {
@@ -97,6 +98,7 @@ async function compile(projectId, userId, options = {}) {
outputUrlPrefix,
buildId,
clsiCacheShard,
baseHistoryVersion,
}
}
@@ -35,6 +35,10 @@ async function loadGlobalBlobs() {
// END copy from services/history-v1/storage/lib/blob_store/index.js
function isGlobalBlob(hash) {
return GLOBAL_BLOBS.has(hash)
}
function getFilestoreBlobURL(historyId, hash) {
if (GLOBAL_BLOBS.has(hash)) {
return `${settings.apis.filestore.url}/history/global/hash/${hash}`
@@ -405,6 +409,7 @@ function _userView(user) {
const loadGlobalBlobsPromise = loadGlobalBlobs()
export default {
isGlobalBlob,
getFilestoreBlobURL,
loadGlobalBlobsPromise,
initializeProject: callbackify(initializeProject),
@@ -36,13 +36,14 @@ export default class DocumentCompiler {
cleanupCompileResult: () => void
signal: AbortSignal
openDocs: OpenDocuments
projectRootDocId?: string | null
projectRootDocId: string | null
clsiServerId: string | null
currentDoc: DocumentContainer | null
error: Error | undefined
timer: number
defaultOptions: CompileOptions
debouncedAutoCompile: DebouncedFunc<() => void>
pathInFolder: (docId: string) => string | null
constructor({
compilingRef,
@@ -80,6 +81,7 @@ export default class DocumentCompiler {
this.cleanupCompileResult = cleanupCompileResult
this.signal = signal
this.openDocs = openDocs
this.pathInFolder = () => null
this.projectRootDocId = null
this.clsiServerId = null
@@ -134,10 +136,19 @@ export default class DocumentCompiler {
const t0 = performance.now()
const rootDocId = this.getRootDocOverrideId()
let rootDocIdOverride = this.getRootDocOverrideId()
let rootResourcePath
try {
// Only required for compile-from-history
rootDocIdOverride = rootDocIdOverride || this.projectRootDocId
rootResourcePath = rootDocIdOverride
? this.pathInFolder(rootDocIdOverride)
: 'main.tex'
} catch {}
const body = {
rootDoc_id: rootDocId,
rootDoc_id: rootDocIdOverride,
rootResourcePath,
draft: options.draft,
check: 'silent', // NOTE: 'error' and 'validate' are possible, but unused
// use incremental compile for all users but revert to a full compile
@@ -165,7 +176,7 @@ export default class DocumentCompiler {
this.setError(undefined)
data.options = options
data.rootDocId = rootDocId
data.rootDocId = rootDocIdOverride
if (data.clsiServerId) {
this.clsiServerId = data.clsiServerId
}
@@ -151,7 +151,7 @@ export const LocalCompileProvider: FC<React.PropsWithChildren> = ({
const { features, alphaProgram } = useUserContext()
const { fileTreeData } = useFileTreeData()
const { findEntityByPath } = useFileTreePathContext()
const { pathInFolder, findEntityByPath } = useFileTreePathContext()
// whether a compile is in progress
const [compiling, setCompiling] = useState(false)
@@ -342,8 +342,12 @@ export const LocalCompileProvider: FC<React.PropsWithChildren> = ({
// keep the project rootDocId in sync with the compiler
useEffect(() => {
compiler.projectRootDocId = rootDocId
compiler.projectRootDocId = rootDocId || null
}, [compiler, rootDocId])
// keep pathInFolder in sync with the compiler
useEffect(() => {
compiler.pathInFolder = pathInFolder
}, [compiler, pathInFolder])
// keep draft setting in sync with the compiler
useEffect(() => {
+20
View File
@@ -133,6 +133,26 @@ async function provisionSplitTests() {
Path.join(MONOREPO, 'tools/saas-e2e/split-tests.json')
)
)
// Add WIP split test, we can update the JSON blob once this is in production
SPLIT_TESTS.push({
name: 'compile-from-history',
versions: [
{
versionNumber: 1,
createdAt: '2026-02-25T14:55:31.260Z',
active: true,
analyticsEnabled: false,
phase: 'release',
variants: [
{
name: 'enabled',
rolloutPercent: 0,
rolloutStripes: [],
},
],
},
],
})
console.log(`> Importing ${SPLIT_TESTS.length} split-tests from production.`)
await SplitTestManager.replaceSplitTests(SPLIT_TESTS)
}
@@ -176,6 +176,18 @@ describe('ClsiManager', function () {
recordEventForUserInBackground: sinon.stub(),
}
ctx.redis = {
auth() {},
del: sinon.stub(),
get: sinon.stub(),
setex: sinon.stub().resolves(),
}
vi.doMock('../../../../app/src/infrastructure/RedisWrapper', () => ({
default: (ctx.RedisWrapper = {
client: () => ctx.redis,
}),
}))
vi.doMock('@overleaf/settings', () => ({
default: ctx.Settings,
}))
@@ -308,10 +308,12 @@ describe('CompileController', function () {
isAutoCompile: false,
compileFromClsiCache: true,
populateClsiCache: true,
compileFromHistory: false,
enablePdfCaching: false,
fileLineErrors: false,
stopOnFirstError: false,
editorId: undefined,
rootResourcePath: undefined,
}
)
})
@@ -350,10 +352,12 @@ describe('CompileController', function () {
isAutoCompile: true,
compileFromClsiCache: true,
populateClsiCache: true,
compileFromHistory: false,
enablePdfCaching: false,
fileLineErrors: false,
stopOnFirstError: false,
editorId: undefined,
rootResourcePath: undefined,
}
)
})
@@ -373,11 +377,13 @@ describe('CompileController', function () {
isAutoCompile: false,
compileFromClsiCache: true,
populateClsiCache: true,
compileFromHistory: false,
enablePdfCaching: false,
draft: true,
fileLineErrors: false,
stopOnFirstError: false,
editorId: undefined,
rootResourcePath: undefined,
}
)
})
@@ -397,10 +403,36 @@ describe('CompileController', function () {
isAutoCompile: false,
compileFromClsiCache: true,
populateClsiCache: true,
compileFromHistory: false,
enablePdfCaching: false,
fileLineErrors: false,
stopOnFirstError: false,
editorId: 'the-editor-id',
rootResourcePath: undefined,
}
)
})
})
describe('with a rootResourcePath', function () {
beforeEach(async function (ctx) {
ctx.req.body = { rootResourcePath: 'foo.tex' }
await ctx.CompileController.compile(ctx.req, ctx.res, ctx.next)
})
it('should pass the rootResourcePath to the compiler', function (ctx) {
ctx.CompileManager.promises.compile.should.have.been.calledWith(
ctx.projectId,
ctx.user_id,
{
isAutoCompile: false,
compileFromClsiCache: true,
populateClsiCache: true,
compileFromHistory: false,
enablePdfCaching: false,
fileLineErrors: false,
stopOnFirstError: false,
editorId: undefined,
rootResourcePath: 'foo.tex',
}
)
})