[clsi] initial implementation of compile from history (#31883)

* [clsi] initial implementation of compile from history

* [clsi] copy changes

* [saas-e2e] extend test case with nested folder

* [saas-e2e] add test case for tracked changes

* [web] fix accumulating changes from multiple chunks

* [web] optimize size check for compile request payload

* [clsi] deduplicate globalBlobs

* [clsi] add validation for request body details

* [clsi] add metrics for compile from history

* [clsi] download binary files concurrently

* [clsi] skip download of empty file blob

* [clsi] break down e2e compile time metric by compileFromHistory

GitOrigin-RevId: 0dadef93e89d8a172c35cb130a1042d9d1bec42a
This commit is contained in:
Jakob Ackermann
2026-03-03 11:20:08 +01:00
committed by Copybot
parent 5723a9589a
commit 81b7121408
28 changed files with 1135 additions and 88 deletions

View File

@@ -21,6 +21,7 @@ COPY libraries/fetch-utils/package.json /overleaf/libraries/fetch-utils/package.
COPY libraries/logger/package.json /overleaf/libraries/logger/package.json
COPY libraries/metrics/package.json /overleaf/libraries/metrics/package.json
COPY libraries/o-error/package.json /overleaf/libraries/o-error/package.json
COPY libraries/overleaf-editor-core/package.json /overleaf/libraries/overleaf-editor-core/package.json
COPY libraries/promise-utils/package.json /overleaf/libraries/promise-utils/package.json
COPY libraries/settings/package.json /overleaf/libraries/settings/package.json
COPY libraries/stream-utils/package.json /overleaf/libraries/stream-utils/package.json
@@ -32,6 +33,7 @@ COPY libraries/fetch-utils/ /overleaf/libraries/fetch-utils/
COPY libraries/logger/ /overleaf/libraries/logger/
COPY libraries/metrics/ /overleaf/libraries/metrics/
COPY libraries/o-error/ /overleaf/libraries/o-error/
COPY libraries/overleaf-editor-core/ /overleaf/libraries/overleaf-editor-core/
COPY libraries/promise-utils/ /overleaf/libraries/promise-utils/
COPY libraries/settings/ /overleaf/libraries/settings/
COPY libraries/stream-utils/ /overleaf/libraries/stream-utils/

View File

@@ -20,6 +20,7 @@ IMAGE_CACHE ?= $(IMAGE_REPO):cache-$(shell cat \
$(MONOREPO)/libraries/logger/package.json \
$(MONOREPO)/libraries/metrics/package.json \
$(MONOREPO)/libraries/o-error/package.json \
$(MONOREPO)/libraries/overleaf-editor-core/package.json \
$(MONOREPO)/libraries/promise-utils/package.json \
$(MONOREPO)/libraries/settings/package.json \
$(MONOREPO)/libraries/stream-utils/package.json \

View File

@@ -220,15 +220,45 @@ function notifyCLSICacheAboutBuild({
enqueue([{ path: 'output.tar.gz' }])
})
.catch(err => {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId, buildId, shard },
'build output.tar.gz for clsi-cache failed'
)
})
copyHistorySnapshot({ projectId, userId, buildId })
.then(() => {
enqueue([{ path: 'history-resync.json.gz' }])
})
.catch(err => {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId, buildId, shard },
'copy history-resync.json.gz for clsi-cache failed'
)
})
return shard
}
/**
* @param {Object} opts
* @param {string} opts.projectId
* @param {string} opts.userId
* @param {string} opts.buildId
* @return {Promise<void>}
*/
async function copyHistorySnapshot({ projectId, userId, buildId }) {
const src = Path.join(
Settings.path.clsiCacheDir,
userId ? `${projectId}-${userId}` : projectId,
'history.json.gz'
)
const outputDir = getOutputDir({ projectId, userId, buildId })
const dst = Path.join(outputDir, 'history-resync.json.gz')
await fs.promises.cp(src, dst)
}
/**
* @param {Object} opts
* @param {string} opts.projectId
@@ -239,12 +269,7 @@ function notifyCLSICacheAboutBuild({
*/
async function buildTarball({ projectId, userId, buildId, outputFiles }) {
const timer = new Metrics.Timer('clsi_cache_build', 1, {}, TIMING_BUCKETS)
const outputDir = Path.join(
Settings.path.outputDir,
userId ? `${projectId}-${userId}` : projectId,
CACHE_SUBDIR,
buildId
)
const outputDir = getOutputDir({ projectId, userId, buildId })
const files = outputFiles.filter(f => !isExtraneousFile(f.path))
if (files.length > MAX_ENTRIES_IN_OUTPUT_TAR) {
@@ -287,6 +312,33 @@ async function downloadOutputDotSynctexFromCompileCache(
buildId,
outputDir
) {
const requestPath = `/project/${projectId}/${
userId ? `user/${userId}/` : ''
}build/${editorId}-${buildId}/search/output/output.synctex.gz`
return await downloadSingleFile(projectId, requestPath, outputDir, 'synctex')
}
/**
* @param {string} projectId
* @param {string} userId
* @param {string} cacheDir
* @return {Promise<boolean>}
*/
async function downloadHistorySnapshot(projectId, userId, cacheDir) {
const requestPath = `/project/${projectId}/${
userId ? `user/${userId}/` : ''
}latest/output/history-resync.json.gz`
return await downloadSingleFile(projectId, requestPath, cacheDir, 'snapshot')
}
/**
* @param {string} projectId
* @param {string} requestPath
* @param {string} outputDir
* @param {string} label
* @return {Promise<boolean>}
*/
async function downloadSingleFile(projectId, requestPath, outputDir, label) {
if (!Settings.apis.clsiCache.enabled) return false
if (!OBJECT_ID_REGEX.test(projectId)) return false
const shardCfg = getAvailableShard(projectId)
@@ -296,20 +348,17 @@ async function downloadOutputDotSynctexFromCompileCache(
const timer = new Metrics.Timer(
'clsi_cache_download',
1,
{ method: 'synctex' },
{ method: label },
TIMING_BUCKETS
)
const u = new URL(url)
u.pathname = requestPath
let stream
try {
stream = await fetchStream(
`${url}/project/${projectId}/${
userId ? `user/${userId}/` : ''
}build/${editorId}-${buildId}/search/output/output.synctex.gz`,
{
method: 'GET',
signal: AbortSignal.timeout(TIMEOUT),
}
)
stream = await fetchStream(u, {
method: 'GET',
signal: AbortSignal.timeout(TIMEOUT),
})
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
closeCircuitBreaker(url)
@@ -321,13 +370,14 @@ async function downloadOutputDotSynctexFromCompileCache(
throw OError.tag(err, 'download failed', { shard })
}
await fs.promises.mkdir(outputDir, { recursive: true })
const dst = Path.join(outputDir, 'output.synctex.gz')
const name = Path.basename(requestPath)
const dst = Path.join(outputDir, name)
const tmp = dst + crypto.randomUUID()
try {
await pipeline(
stream,
new MeteredStream(Metrics, 'clsi_cache_egress', {
path: 'output.synctex.gz',
path: name,
}),
fs.createWriteStream(tmp)
)
@@ -437,8 +487,25 @@ async function downloadLatestCompileCache(projectId, userId, compileDir) {
return !abort
}
/**
* @param {Object} opts
* @param {string} opts.projectId
* @param {string} opts.userId
* @param {string} opts.buildId
* @return {string}
*/
function getOutputDir({ projectId, userId, buildId }) {
return Path.join(
Settings.path.outputDir,
userId ? `${projectId}-${userId}` : projectId,
CACHE_SUBDIR,
buildId
)
}
/**
* @param {unknown} err
* @return {boolean}
*/
function isENOENT(err) {
return err instanceof Error && 'code' in err && err.code === 'ENOENT'
@@ -447,5 +514,6 @@ function isENOENT(err) {
export default {
notifyCLSICacheAboutBuild,
downloadLatestCompileCache,
downloadHistorySnapshot,
downloadOutputDotSynctexFromCompileCache,
}

View File

@@ -40,7 +40,7 @@ function compile(req, res, next) {
stats,
timings,
(error, result) => {
let { buildId, outputFiles } = result || {}
let { buildId, outputFiles, baseHistoryVersion } = result || {}
let code, status
if (outputFiles == null) {
outputFiles = []
@@ -50,7 +50,7 @@ function compile(req, res, next) {
status = 'compile-in-progress'
} else if (error instanceof Errors.FilesOutOfSyncError) {
code = 409 // Http 409 Conflict
status = 'retry'
status = 'conflict'
logger.warn(
{
projectId: request.project_id,
@@ -58,6 +58,10 @@ function compile(req, res, next) {
},
'files out of sync, please retry'
)
} else if (error instanceof Errors.MissingUpdatesError) {
code = 409
status = 'missing-updates'
baseHistoryVersion = error.info.baseHistoryVersion
} else if (
error?.code === 'EPIPE' ||
error instanceof Errors.TooManyCompileRequestsError
@@ -146,6 +150,7 @@ function compile(req, res, next) {
compile: {
status,
error: error?.message || error,
baseHistoryVersion,
stats,
timings,
buildId,

View File

@@ -22,6 +22,7 @@ import StatsManager from './StatsManager.js'
import SafeReader from './SafeReader.js'
import LatexMetrics from './LatexMetrics.js'
import { callbackifyMultiResult } from '@overleaf/promise-utils'
import * as HistoryResourceWriter from './HistoryResourceWriter.js'
const { downloadLatestCompileCache, downloadOutputDotSynctexFromCompileCache } =
CLSICacheHandler
@@ -104,13 +105,24 @@ async function doCompile(request, stats, timings) {
'syncing resources to disk'
)
let resourceList
let resourceList, baseHistoryVersion
try {
// NOTE: resourceList is insecure, it should only be used to exclude files from the output list
resourceList = await ResourceWriter.promises.syncResourcesToDisk(
request,
compileDir
)
if (request.historyId) {
;({ resourceList, baseHistoryVersion } =
await HistoryResourceWriter.syncResourcesToDisk(
projectId,
userId,
request,
compileDir,
timings
))
} else {
// NOTE: resourceList is insecure, it should only be used to exclude files from the output list
resourceList = await ResourceWriter.promises.syncResourcesToDisk(
request,
compileDir
)
}
} catch (error) {
if (error instanceof Errors.FilesOutOfSyncError) {
OError.tag(error, 'files out of sync, please retry', {
@@ -326,7 +338,7 @@ async function doCompile(request, stats, timings) {
)
}
return { outputFiles, buildId }
return { outputFiles, buildId, baseHistoryVersion }
}
async function _saveOutputFiles({
@@ -837,6 +849,7 @@ function _emitMetrics(request, status, stats, timings) {
if (timings.compileE2E != null) {
ClsiMetrics.e2eCompileDurationSeconds.observe(
{
compileFromHistory: !!request.historyId,
compile: request.metricsOpts.compile,
group: request.compileGroup,
},

View File

@@ -33,6 +33,7 @@ export class TimedOutError extends OError {}
export class NoXrefTableError extends OError {}
export class TooManyCompileRequestsError extends OError {}
export class InvalidParameter extends OError {}
export class MissingUpdatesError extends OError {}
export default {
QueueLimitReachedError,
@@ -43,4 +44,5 @@ export default {
NoXrefTableError,
TooManyCompileRequestsError,
InvalidParameter,
MissingUpdatesError,
}

View File

@@ -0,0 +1,564 @@
// @ts-check
import logger from '@overleaf/logger'
import zlib from 'node:zlib'
import Settings from '@overleaf/settings'
import Path from 'node:path'
import fs from 'node:fs'
import CLSICacheHandler from './CLSICacheHandler.js'
import Errors from './Errors.js'
import { callbackify, promisify } from 'node:util'
import {
AddFileOperation,
Change,
EditFileOperation,
File,
MoveFileOperation,
Snapshot,
} from 'overleaf-editor-core'
import { fetchString, RequestFailedError } from '@overleaf/fetch-utils'
import { setTimeout } from 'node:timers/promises'
import ResourceWriter from './ResourceWriter.js'
import UrlCache from './UrlCache.js'
import OError from '@overleaf/o-error'
import ClsiMetrics from './Metrics.js'
import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
const gzip = promisify(zlib.gzip)
const gunzip = promisify(zlib.gunzip)
export const clearCacheCb = callbackify(clearCache)
/**
* @param {string} projectId
* @param {string} userId
* @return {Promise<void>}
*/
export async function clearCache(projectId, userId) {
const { dir } = snapshotPath(projectId, userId)
try {
await fs.promises.rm(dir, { recursive: true, force: true })
} catch (err) {
if (isENOENT(err)) return
logger.warn(
{ err, projectId, userId },
'compile from cache: failed to clear history cache'
)
}
}
/**
* @param {string} projectId
* @param {string} userId
* @return {{ dir: string, path: string, resyncPath: string }}
*/
function snapshotPath(projectId, userId) {
const dir = Path.join(
Settings.path.clsiCacheDir,
userId ? `${projectId}-${userId}` : projectId
)
const path = Path.join(dir, 'history.json.gz')
const resyncPath = Path.join(dir, 'history-resync.json.gz')
return { dir, path, resyncPath }
}
/**
* @param {unknown} err
* @return {boolean}
*/
function isENOENT(err) {
return err instanceof Error && 'code' in err && err.code === 'ENOENT'
}
/**
* @param {string} projectId
* @param {string} userId
* @param {number} remoteBaseVersion
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number}>}
*/
async function loadSnapshot(projectId, userId, remoteBaseVersion) {
const { path, resyncPath } = snapshotPath(projectId, userId)
let maxLocalBaseVersion = -1
for (const candidate of [path, resyncPath]) {
try {
const fullSync = candidate === resyncPath
return await loadSnapshotFromFile(candidate, remoteBaseVersion, fullSync)
} catch (err) {
if (err instanceof Errors.MissingUpdatesError) {
maxLocalBaseVersion = Math.max(
maxLocalBaseVersion,
err.info.baseHistoryVersion
)
} else if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: cannot read history from disk'
)
}
}
}
try {
return await loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion)
} catch (err) {
if (err instanceof Errors.MissingUpdatesError) {
maxLocalBaseVersion = Math.max(
maxLocalBaseVersion,
err.info.baseHistoryVersion
)
} else if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: cannot download from clsi-cache'
)
}
}
throw new Errors.MissingUpdatesError('needs more updates', {
baseHistoryVersion: maxLocalBaseVersion,
})
}
/**
* @param {string} projectId
* @param {string} userId
* @param {number} remoteBaseVersion
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], fullSync: boolean,localBaseVersion: number}>}
*/
async function loadSnapshotFromClsiCache(projectId, userId, remoteBaseVersion) {
const { dir, resyncPath } = snapshotPath(projectId, userId)
await fs.promises.mkdir(dir, { recursive: true })
const ok = await CLSICacheHandler.downloadHistorySnapshot(
projectId,
userId,
dir
)
if (!ok) {
throw new Errors.MissingUpdatesError('needs full sync', {
baseHistoryVersion: -1,
})
}
logger.debug(
{ projectId, userId },
'compile from cache: restored history from clsi-cache'
)
return await loadSnapshotFromFile(resyncPath, remoteBaseVersion, true)
}
/**
* @param {string} path
* @param {number} remoteBaseVersion
* @param {boolean} fullSync
* @return {Promise<{rawSnapshot: import('overleaf-editor-core/lib/types.js').RawSnapshot, globalBlobs: string[], localBaseVersion: number, fullSync: boolean}>}
*/
async function loadSnapshotFromFile(path, remoteBaseVersion, fullSync) {
let blob = await fs.promises.readFile(path)
blob = await gunzip(blob)
const { rawSnapshot, globalBlobs, localBaseVersion } = JSON.parse(
blob.toString('utf-8')
)
if (localBaseVersion < remoteBaseVersion) {
throw new Errors.MissingUpdatesError('missing updates', {
baseHistoryVersion: localBaseVersion,
})
}
return { rawSnapshot, globalBlobs, localBaseVersion, fullSync }
}
/**
* @param {string} projectId
* @param {string} userId
* @param {Snapshot} snapshot
* @param {number} localBaseVersion
* @param {string[]} globalBlobs
* @return {Promise<void>}
*/
async function saveSnapshot(
projectId,
userId,
snapshot,
localBaseVersion,
globalBlobs
) {
const { dir, path } = snapshotPath(projectId, userId)
await fs.promises.mkdir(dir, { recursive: true })
const tmp = path + '~'
await fs.promises.writeFile(
tmp,
await gzip(
JSON.stringify({
globalBlobs,
localBaseVersion,
rawSnapshot: snapshot.toRaw(),
})
),
{ flag: 'wx' }
)
await fs.promises.rename(tmp, path)
}
/**
* @param {string} projectId
* @param {string} userId
* @return {Promise<void>}
*/
async function deleteResyncSnapshot(projectId, userId) {
const { resyncPath } = snapshotPath(projectId, userId)
try {
await fs.promises.unlink(resyncPath)
} catch (err) {
if (!isENOENT(err)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: failed to clear history-resync.json.gz'
)
}
}
}
/**
* @param {string} compileDir
* @param {string} subDir
* @param {Map<string, boolean>} entries
* @return {Promise<Map<string, boolean>>}
*/
async function discoverExistingEntries(
compileDir,
subDir = '.',
entries = new Map()
) {
const dirents = await fs.promises.readdir(Path.join(compileDir, subDir), {
withFileTypes: true,
})
for (const dirent of dirents) {
const path = Path.join(subDir, dirent.name)
if (dirent.isDirectory()) {
await discoverExistingEntries(compileDir, path, entries)
} else if (dirent.isFile()) {
entries.set(path, false)
} else if (
dirent.isSymbolicLink() ||
dirent.isFIFO() ||
dirent.isSocket()
) {
// should not happen, delete right away
logger.warn(
{ compileDir, subDir, dirent },
'compile from cache: found blocked dirent'
)
await fs.promises.unlink(Path.join(compileDir, path))
} else {
throw new OError('unexpected dir entry', { compileDir, subDir, dirent })
}
}
entries.set(subDir, true)
return entries
}
/**
* @param {string} compileDir
* @param {Snapshot} snapshot
* @param {Map<string, boolean>} entriesDepthFirst
*/
async function removeExtraneousEntries(
compileDir,
snapshot,
entriesDepthFirst
) {
const keepFolders = new Set(['.'])
for (const [path, isDir] of entriesDepthFirst) {
const shouldBeFile = !!snapshot.getFile(path)
if (isDir) {
if (!shouldBeFile) {
// directory can stay directory
if (keepFolders.has(path)) {
// folder is still in use
keepFolders.add(Path.dirname(path))
} else {
// empty folder
await fs.promises.rmdir(Path.join(compileDir, path))
entriesDepthFirst.delete(path)
}
continue
}
// a folder turned into a file
// before: foo/bar.txt/baz.txt
// ^^^^^^^ folder
// now: foo/bar.txt
// ^^^^^^^ file
const needle = path + '/'
for (const [child, childIsDir] of entriesDepthFirst) {
if (!child.startsWith(needle)) continue
if (childIsDir) {
await fs.promises.rmdir(Path.join(compileDir, child))
} else {
await fs.promises.unlink(Path.join(compileDir, child))
}
entriesDepthFirst.delete(child)
}
await fs.promises.rmdir(Path.join(compileDir, path))
entriesDepthFirst.delete(path)
continue
}
if (shouldBeFile || !ResourceWriter.isExtraneousFile(path)) {
// resource or cached file
keepFolders.add(Path.dirname(path))
continue
}
await fs.promises.unlink(Path.join(compileDir, path))
entriesDepthFirst.delete(path)
}
}
/**
* @param {string} compileDir
* @param {string} path
* @param {Map<string, boolean>} entriesDepthFirst
*/
async function ensureHasParentFolder(compileDir, path, entriesDepthFirst) {
const parentFolderPath = Path.dirname(path)
if (entriesDepthFirst.has(parentFolderPath)) return
await ensureHasParentFolder(compileDir, parentFolderPath, entriesDepthFirst)
await fs.promises.mkdir(Path.join(compileDir, parentFolderPath))
entriesDepthFirst.set(parentFolderPath, true)
}
/**
* @param {import('overleaf-editor-core/lib/types.js').RawOperation[][]} raw
* @return {Change[]}
*/
function changesFromRawChangeOperations(raw) {
return raw.map(o => Change.mustFromRaw({ operations: o, timestamp: '0' }))
}
/**
* @param {string} projectId
* @param {string} userId
* @param {Object} request
* @param {string} compileDir
* @param {Record<string, number>} timings
* @return {Promise<{baseHistoryVersion: number, resourceList: {path: string}[]}>}
*/
export async function syncResourcesToDisk(
projectId,
userId,
request,
compileDir,
timings
) {
const remoteBaseVersion = request.baseHistoryVersion
let rawSnapshot, globalBlobs, localBaseVersion, source
let fullSync = true
try {
;({ rawSnapshot, globalBlobs, fullSync, localBaseVersion } =
await loadSnapshot(projectId, userId, remoteBaseVersion))
source = fullSync ? 'clsi-cache' : 'local'
logger.debug(
{ projectId, userId, localBaseVersion, remoteBaseVersion },
'compile from cache: using existing snapshot'
)
} catch (err) {
if (!request.rawSnapshot) throw err
if (!(err instanceof Errors.MissingUpdatesError)) {
logger.warn(
{ err, projectId, userId },
'compile from cache: bad local history state during full resync'
)
}
logger.debug(
{ projectId, userId },
'compile from cache: using incoming snapshot'
)
source = 'remote'
localBaseVersion = remoteBaseVersion
rawSnapshot = request.rawSnapshot
globalBlobs = []
}
globalBlobs = Array.from(new Set(globalBlobs.concat(request.globalBlobs)))
const snapshot = Snapshot.fromRaw(rawSnapshot)
const changes = changesFromRawChangeOperations(
request.rawChangeOperations.slice(localBaseVersion - remoteBaseVersion)
)
const applyAllStart = performance.now()
snapshot.applyAll(changes)
timings.snapshotApplyAll = Math.ceil(performance.now() - applyAllStart)
if (!ClsiMetrics.shouldSkipMetrics(request)) {
ClsiMetrics.snapshotApplyAllDurationSeconds.observe(
{ group: request.compileGroup, source },
timings.snapshotApplyAll / 1_000
)
}
const entriesDepthFirst = await discoverExistingEntries(compileDir)
await removeExtraneousEntries(compileDir, snapshot, entriesDepthFirst)
const changedPaths = []
if (fullSync) {
changedPaths.push(...snapshot.getFilePathnames())
logger.debug({ projectId, userId }, 'compile from cache: full sync')
} else {
const dedupe = new Set()
for (const change of changes) {
for (const operation of change.getOperations()) {
if (operation instanceof AddFileOperation) {
dedupe.add(operation.pathname)
} else if (operation instanceof MoveFileOperation) {
dedupe.add(operation.pathname)
if (!operation.isRemoveFile()) dedupe.add(operation.newPathname)
} else if (operation instanceof EditFileOperation) {
dedupe.add(operation.pathname)
}
}
}
// Restore deleted files
for (const path of snapshot.getFilePathnames()) {
if (!entriesDepthFirst.has(path)) dedupe.add(path)
}
changedPaths.push(...dedupe)
logger.debug(
{ projectId, userId, changedPaths },
'compile from cache: incremental sync'
)
}
const blobStore = new BlobStore(request.historyId, globalBlobs)
const loadEagerStart = performance.now()
await snapshot.loadFiles('eager', blobStore)
timings.snapshotLoadEager = Math.ceil(performance.now() - loadEagerStart)
if (!ClsiMetrics.shouldSkipMetrics(request)) {
ClsiMetrics.snapshotLoadEagerDurationSeconds.observe(
{ group: request.compileGroup, source },
timings.snapshotLoadEager / 1_000
)
}
for (const path of changedPaths) {
const file = snapshot.getFile(path)
if (!file) continue // deleted, handled by removeExtraneousEntries
await ensureHasParentFolder(compileDir, path, entriesDepthFirst)
}
let createCacheFolder
// Use Promise.allSettled to ensure that all writes have stopped when we exit.
const allDone = await promiseMapSettledWithLimit(
Settings.parallelFileDownloads,
changedPaths,
async path => {
const file = snapshot.getFile(path)
if (!file) return // deleted, handled by removeExtraneousEntries
const content = file.getContent({ filterTrackedDeletes: true })
if (typeof content === 'string') {
await fs.promises.writeFile(
Path.join(compileDir, path),
content,
'utf-8'
)
} else {
const hash = file.getHash()
if (!hash) {
throw new OError('unexpected file without content and hash', { path })
}
const fallbackURL = null // no fallback
const lastModified = new Date(0) // content is static
if (!createCacheFolder) {
createCacheFolder = UrlCache.promises.createProjectDir(projectId)
}
await createCacheFolder
await UrlCache.promises.downloadUrlToFile(
projectId,
blobStore.getBlobURL(hash).href,
fallbackURL,
Path.join(compileDir, path),
lastModified
)
}
}
)
for (const [idx, result] of allDone.entries()) {
if (result.status === 'fulfilled') continue
const path = changedPaths[idx]
throw OError.tag(result.reason, 'write failed', { path })
}
const baseHistoryVersion = localBaseVersion + changes.length
if (fullSync || changes.length) {
await saveSnapshot(
projectId,
userId,
snapshot,
baseHistoryVersion,
globalBlobs
)
}
if (fullSync) {
await deleteResyncSnapshot(projectId, userId)
}
return {
baseHistoryVersion,
resourceList: snapshot.getFilePathnames().map(path => ({ path })),
}
}
class BlobStore {
/** @type {string} */
#historyId
/** @type {string[]} */
#globalBlobs
/**
* @param {string} historyId
* @param {string[]} globalBlobs
*/
constructor(historyId, globalBlobs) {
this.#historyId = historyId
this.#globalBlobs = globalBlobs
}
/**
* @param {string} hash
* @return {URL}
*/
getBlobURL(hash) {
const u = new URL(Settings.apis.filestore.url)
if (this.#globalBlobs.includes(hash)) {
u.pathname = `/history/global/hash/${hash}`
} else {
u.pathname = `/history/project/${this.#historyId}/hash/${hash}`
}
return u
}
/**
* @param {string} hash
* @return {Promise<string>}
*/
async getString(hash) {
if (hash === File.EMPTY_FILE_HASH) return ''
const u = this.getBlobURL(hash)
let remainingAttempts = 3
while (true) {
try {
return await fetchString(u, { signal: AbortSignal.timeout(3_000) })
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new Errors.NotFoundError()
}
remainingAttempts--
if (remainingAttempts <= 0) throw err
logger.warn(
{ err, url: u.href, remainingAttempts },
'compile from cache: history blob download failed'
)
await setTimeout(100)
}
}
}
/**
* @param {string} hash
* @return {Promise<any>}
*/
async getObject(hash) {
const string = await this.getString(hash)
return JSON.parse(string)
}
}

View File

@@ -31,7 +31,7 @@ const e2eCompileDurationSeconds = new prom.Histogram({
name: 'clsi_e2e_compile_duration_seconds',
help: 'Duration of the entire compile request in clsi (sync, latexmk, output)',
buckets: COMPILE_TIME_BUCKETS,
labelNames: ['compile', 'group'],
labelNames: ['compile', 'group', 'compileFromHistory'],
})
const e2eCompileDurationClsiPerfSeconds = new prom.Gauge({
@@ -68,6 +68,20 @@ const imageProcessingDurationSeconds = new prom.Histogram({
labelNames: ['group', 'type'],
})
const snapshotApplyAllDurationSeconds = new prom.Histogram({
name: 'clsi_snapshot_applyAll_duration_seconds',
help: 'Time spent applying snapshot changes',
buckets: [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
labelNames: ['group', 'source'],
})
const snapshotLoadEagerDurationSeconds = new prom.Histogram({
name: 'clsi_snapshot_load_eager_duration_seconds',
help: 'Time spent loading string blobs for snapshot',
buckets: [0.01, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50],
labelNames: ['group', 'source'],
})
function shouldSkipMetrics(request) {
return ['clsi-perf', 'health-check', 'clsi-cache-template'].includes(
request.metricsOpts.path
@@ -83,5 +97,7 @@ export default {
processOutputFilesDurationSeconds,
latexmkRuleDurationSeconds,
imageProcessingDurationSeconds,
snapshotApplyAllDurationSeconds,
snapshotLoadEagerDurationSeconds,
shouldSkipMetrics,
}

View File

@@ -93,10 +93,11 @@ export default {
)
return outputFiles.filter(
// Ignore the pdf, clsi-cache tar-ball and also ignore the files ignored by the frontend.
// Ignore the pdf, clsi-cache tar-ball, history snapshot blob and also ignore the files ignored by the frontend.
({ path }) =>
path !== 'output.pdf' &&
path !== 'output.tar.gz' &&
path !== 'history-resync.json.gz' &&
!ignoreFiles.includes(path)
)
} catch (error) {

View File

@@ -16,6 +16,7 @@ import Settings from '@overleaf/settings'
import { callbackify } from 'node:util'
import Path from 'node:path'
import fs from 'node:fs'
import * as HistoryResourceWriter from './HistoryResourceWriter.js'
let ProjectPersistenceManager
const oneDay = 24 * 60 * 60 * 1000
@@ -204,19 +205,15 @@ export default ProjectPersistenceManager = {
}
logger.debug({ projectId, userId }, 'clearing project for user')
return CompileManager.clearProject(projectId, userId, function (error) {
if (error != null) {
return callback(error)
}
return ProjectPersistenceManager.clearProjectFromCache(
projectId,
{ reason: 'cleared' },
function (error) {
if (error != null) {
return callback(error)
}
return callback()
}
)
if (error) return callback(error)
HistoryResourceWriter.clearCacheCb(projectId, userId, error => {
if (error) return callback(error)
ProjectPersistenceManager.clearProjectFromCache(
projectId,
{ reason: 'cleared' },
callback
)
})
})
},

View File

@@ -4,6 +4,7 @@ import OutputCacheManager from './OutputCacheManager.js'
const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex']
const MAX_TIMEOUT = 600
const EDITOR_ID_REGEX = /^[a-f0-9-]{36}$/ // UUID
const HISTORY_ID_REGEX = /^([0-9a-f]{24}|[1-9][0-9]{0,9})$/ // mongo id or postgres id
function parse(body, callback) {
const response = {}
@@ -112,7 +113,12 @@ function parse(body, callback) {
// resources (full) or only those resources to be updated
// in-place (incremental).
response.syncType = _parseAttribute('syncType', compile.options.syncType, {
validValues: ['full', 'incremental'],
validValues: [
'full',
'incremental',
'history-full',
'history-incremental',
],
type: 'string',
})
@@ -139,6 +145,22 @@ function parse(body, callback) {
response.resources = (compile.resources || []).map(resource =>
_parseResource(resource)
)
response.historyId = _parseAttribute(
'historyId',
compile.options.historyId,
{ type: 'string', regex: HISTORY_ID_REGEX }
)
response.baseHistoryVersion = _parseAttribute(
'baseHistoryVersion',
compile.baseHistoryVersion,
{ type: 'number' }
)
response.globalBlobs = _parseAttribute('globalBlobs', compile.globalBlobs, {
type: 'array',
})
// The snapshot and changes are validated when loading them in editor-core.
response.rawSnapshot = compile.rawSnapshot
response.rawChangeOperations = compile.rawChangeOperations
const rootResourcePath = _parseAttribute(
'rootResourcePath',
@@ -216,7 +238,11 @@ function _parseAttribute(name, attribute, options) {
)
}
}
if (options.type != null) {
if (options.type === 'array') {
if (!Array.isArray(attribute)) {
throw new Error(`${name} attribute should be an array`)
}
} else if (options.type != null) {
// eslint-disable-next-line valid-typeof
if (typeof attribute !== options.type) {
throw new Error(`${name} attribute should be a ${options.type}`)

View File

@@ -62,6 +62,11 @@ module.exports = {
({ zone, readOnly }) => zone === process.env.ZONE && !readOnly
),
},
filestore: {
url:
process.env.FILESTORE_DOMAIN_OVERRIDE ||
`http://${process.env.FILESTORE_HOST || '127.0.0.1'}:3009`,
},
},
smokeTest: process.env.SMOKE_TEST || false,

View File

@@ -30,6 +30,7 @@
"dockerode": "^4.0.9",
"express": "4.22.1",
"lodash": "^4.17.21",
"overleaf-editor-core": "*",
"p-limit": "^3.1.0",
"request": "2.88.2",
"send": "^0.19.0",

View File

@@ -148,6 +148,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -176,6 +177,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -224,6 +226,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
})
@@ -272,6 +275,7 @@ describe('CompileController', () => {
...file,
})),
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
})
@@ -304,6 +308,7 @@ describe('CompileController', () => {
stats: ctx.stats,
timings: ctx.timings,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -339,6 +344,7 @@ describe('CompileController', () => {
// JSON.stringify will omit these undefined values
buildId: undefined,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -373,6 +379,7 @@ describe('CompileController', () => {
// JSON.stringify will omit these undefined values
buildId: undefined,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)
@@ -405,6 +412,7 @@ describe('CompileController', () => {
// JSON.stringify will omit these undefined values
buildId: undefined,
clsiCacheShard: undefined,
baseHistoryVersion: undefined,
},
})
.should.equal(true)

View File

@@ -21,6 +21,14 @@ describe('ProjectPersistenceManager', () => {
default: (ctx.UrlCache = {}),
}))
vi.doMock(
'../../../app/js/HistoryResourceWriter',
() =>
(ctx.HistoryResourceWriter = {
clearCacheCb: sinon.stub().yields(null),
})
)
vi.doMock('../../../app/js/CompileManager', () => ({
default: (ctx.CompileManager = {}),
}))
@@ -163,6 +171,13 @@ describe('ProjectPersistenceManager', () => {
.should.equal(true)
})
it('should clear the history cache', ctx => {
ctx.HistoryResourceWriter.clearCacheCb.should.have.been.calledWith(
ctx.project_id,
ctx.user_id
)
})
it('should clear all the cached Urls for the project', ctx => {
return ctx.UrlCache.clearProject
.calledWith(ctx.project_id)

View File

@@ -494,7 +494,8 @@ describe('RequestParser', () => {
it('should return an error', ctx => {
ctx.callback
.calledWithMatch({
message: 'syncType attribute should be one of: full, incremental',
message:
'syncType attribute should be one of: full, incremental, history-full, history-incremental',
})
.should.equal(true)
})