diff --git a/package-lock.json b/package-lock.json index bd3eb0b2e9..b0fb9ef9c2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "services/analytics", "services/chat", "services/clsi", + "services/clsi-cache", "services/clsi-perf", "services/contacts", "services/docstore", @@ -8708,6 +8709,10 @@ "resolved": "services/clsi", "link": true }, + "node_modules/@overleaf/clsi-cache": { + "resolved": "services/clsi-cache", + "link": true + }, "node_modules/@overleaf/clsi-perf": { "resolved": "services/clsi-perf", "link": true @@ -15467,6 +15472,12 @@ "deep-equal": "^2.0.5" } }, + "node_modules/b4a": { + "version": "1.6.7", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.7.tgz", + "integrity": "sha512-OnAYlL5b7LEkALw87fUVafQw5rVR9RjwGd4KUwNQ6DrrNmaVaUCgLipfVlzrPQ4tWOR9P0IXGNOx50jYCCdSJg==", + "license": "Apache-2.0" + }, "node_modules/babel-core": { "version": "7.0.0-bridge.0", "resolved": "https://registry.npmjs.org/babel-core/-/babel-core-7.0.0-bridge.0.tgz", @@ -15829,6 +15840,70 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, + "node_modules/bare-events": { + "version": "2.5.4", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.5.4.tgz", + "integrity": "sha512-+gFfDkR8pj4/TrWCGUGWmJIkBwuxPS5F+a5yWjOHQt2hHvNZd5YLzadjmDUtFmMM4y429bnKLa8bYBMHcYdnQA==", + "license": "Apache-2.0", + "optional": true + }, + "node_modules/bare-fs": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.0.1.tgz", + "integrity": "sha512-ilQs4fm/l9eMfWY2dY0WCIUplSUp7U0CT1vrqMg1MUdeZl4fypu5UP0XcDBK5WBQPJAKP1b7XEodISmekH/CEg==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "bare-events": "^2.0.0", + "bare-path": "^3.0.0", + "bare-stream": "^2.0.0" + }, + "engines": { + "bare": ">=1.7.0" + } + }, + "node_modules/bare-os": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.0.tgz", + "integrity": "sha512-BUrFS5TqSBdA0LwHop4OjPJwisqxGy6JsWVqV6qaFoe965qqtaKfDzHY5T2YA1gUL0ZeeQeA+4BBc1FJTcHiPw==", + "license": "Apache-2.0", + "optional": true, + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.6.5", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.6.5.tgz", + "integrity": "sha512-jSmxKJNJmHySi6hC42zlZnq00rga4jjxcgNZjY9N5WlOe/iOoGRtdwGsHzQv2RlH2KOYMwGUXhf2zXd32BA9RA==", + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "streamx": "^2.21.0" + }, + "peerDependencies": { + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, "node_modules/base": { "version": "0.11.2", "resolved": "https://registry.npmjs.org/base/-/base-0.11.2.tgz", @@ -22529,8 +22604,7 @@ "node_modules/fast-fifo": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", - "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", - "dev": true + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" }, "node_modules/fast-glob": { "version": "3.3.3", @@ -34466,12 +34540,6 @@ } ] }, - "node_modules/queue-tick": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", - "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==", - "dev": true - }, "node_modules/quick-lru": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-1.1.0.tgz", @@ -37523,13 +37591,16 @@ } }, "node_modules/streamx": { - "version": "2.15.1", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.1.tgz", - "integrity": "sha512-fQMzy2O/Q47rgwErk/eGeLu/roaFWV0jVsogDmrszM9uIw8L5OA+t+V93MgYlufNptfjmYR1tOMWhei/Eh7TQA==", - "dev": true, + "version": "2.22.0", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.22.0.tgz", + "integrity": "sha512-sLh1evHOzBy/iWRiR6d1zRcLao4gGZr3C1kzNz4fopCOKJb6xD9ub8Mpi9Mr1R6id5o43S+d93fI48UC5uM9aw==", + "license": "MIT", "dependencies": { - "fast-fifo": "^1.1.0", - "queue-tick": "^1.0.1" + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" } }, "node_modules/string_decoder": { @@ -38605,6 +38676,31 @@ "node": ">=10" } }, + "node_modules/tar-fs": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.8.tgz", + "integrity": "sha512-ZoROL70jptorGAlgAYiLoBLItEKw/fUxg9BSYK/dF/GAGYFJOJJJMvjPAKDJraCXFwadD456FCuvLWgfhMsPwg==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/tar-fs/node_modules/tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, "node_modules/tar-stream": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", @@ -38864,6 +38960,15 @@ "node": ">=8" } }, + "node_modules/text-decoder": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz", + "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -41897,6 +42002,7 @@ "p-limit": "^3.1.0", "request": "^2.88.2", "send": "^0.19.0", + "tar-fs": "^3.0.4", "workerpool": "^6.1.5" }, "devDependencies": { @@ -41913,6 +42019,27 @@ "typescript": "^5.0.4" } }, + "services/clsi-cache": { + "name": "@overleaf/clsi-cache", + "dependencies": { + "@overleaf/fetch-utils": "*", + "@overleaf/logger": "*", + "@overleaf/metrics": "*", + "@overleaf/o-error": "*", + "@overleaf/promise-utils": "*", + "@overleaf/settings": "*", + "body-parser": "^1.20.3", + "bunyan": "^1.8.15", + "celebrate": "^15.0.3", + "diskusage": "^1.1.3", + "express": "^4.21.2" + }, + "devDependencies": { + "chai": "^4.3.6", + "chai-as-promised": "^7.1.1", + "mocha": "^11.1.0" + } + }, "services/clsi-perf": { "name": "@overleaf/clsi-perf", "dependencies": { @@ -42011,6 +42138,18 @@ "node": ">= 8.0" } }, + "services/clsi/node_modules/dockerode/node_modules/tar-fs": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.2.tgz", + "integrity": "sha512-EsaAXwxmx8UB7FRKqeozqEPop69DXcmYwTQwXvyAPF352HJsPdkVhvTaDPYqfNgruveJIJy3TA2l+2zj8LJIJA==", + "license": "MIT", + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, "services/clsi/node_modules/nan": { "version": "2.22.2", "resolved": "https://registry.npmjs.org/nan/-/nan-2.22.2.tgz", @@ -42090,18 +42229,6 @@ "node": ">=8" } }, - "services/clsi/node_modules/tar-fs": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.2.tgz", - "integrity": "sha512-EsaAXwxmx8UB7FRKqeozqEPop69DXcmYwTQwXvyAPF352HJsPdkVhvTaDPYqfNgruveJIJy3TA2l+2zj8LJIJA==", - "license": "MIT", - "dependencies": { - "chownr": "^1.1.1", - "mkdirp-classic": "^0.5.2", - "pump": "^3.0.0", - "tar-stream": "^2.1.4" - } - }, "services/clsi/node_modules/uuid": { "version": "10.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", diff --git a/package.json b/package.json index ae25e19029..7dc9a63e29 100644 --- a/package.json +++ b/package.json @@ -55,6 +55,7 @@ "services/analytics", "services/chat", "services/clsi", + "services/clsi-cache", "services/clsi-perf", "services/contacts", "services/docstore", diff --git a/services/clsi/app.js b/services/clsi/app.js index 8715802a0e..8de9d89b9b 100644 --- a/services/clsi/app.js +++ b/services/clsi/app.js @@ -258,6 +258,8 @@ app.use(function (error, req, res, next) { if (error instanceof Errors.NotFoundError) { logger.debug({ err: error, url: req.url }, 'not found error') res.sendStatus(404) + } else if (error instanceof Errors.InvalidParameter) { + res.status(400).send(error.message) } else if (error.code === 'EPIPE') { // inspect container returns EPIPE when shutting down res.sendStatus(503) // send 503 Unavailable response diff --git a/services/clsi/app/js/CLSICacheHandler.js b/services/clsi/app/js/CLSICacheHandler.js new file mode 100644 index 0000000000..42d902c8e9 --- /dev/null +++ b/services/clsi/app/js/CLSICacheHandler.js @@ -0,0 +1,256 @@ +const fs = require('node:fs') +const Path = require('node:path') +const { pipeline } = require('node:stream/promises') +const { createGzip, createGunzip } = require('node:zlib') +const tarFs = require('tar-fs') +const _ = require('lodash') +const { + fetchNothing, + fetchStream, + RequestFailedError, +} = require('@overleaf/fetch-utils') +const logger = require('@overleaf/logger') +const Metrics = require('@overleaf/metrics') +const Settings = require('@overleaf/settings') +const { CACHE_SUBDIR } = require('./OutputCacheManager') +const { isExtraneousFile } = require('./ResourceWriter') + +const TIMING_BUCKETS = [ + 0, 10, 100, 1000, 2000, 5000, 10000, 15000, 20000, 30000, +] +const MAX_ENTRIES_IN_OUTPUT_TAR = 100 + +/** + * @param {string} projectId + * @param {string} userId + * @param {string} buildId + * @param {string} editorId + * @param {[{path: string}]} outputFiles + * @param {string} compileGroup + */ +function notifyCLSICacheAboutBuild({ + projectId, + userId, + buildId, + editorId, + outputFiles, + compileGroup, +}) { + if (!Settings.apis.clsiCache.enabled) return + + /** + * @param {[{path: string}]} files + */ + const enqueue = files => { + Metrics.count('clsi_cache_enqueue_files', files.length) + fetchNothing(`${Settings.apis.clsiCache.url}/enqueue`, { + method: 'POST', + json: { + projectId, + userId, + buildId, + editorId, + files, + downloadHost: Settings.apis.clsi.downloadHost, + clsiServerId: Settings.apis.clsi.clsiServerId, + compileGroup, + }, + signal: AbortSignal.timeout(15_000), + }).catch(err => { + logger.warn( + { err, projectId, userId, buildId }, + 'enqueue for clsi cache failed' + ) + }) + } + + // PDF preview + enqueue( + outputFiles + .filter( + f => + f.path === 'output.pdf' || + f.path === 'output.log' || + f.path.endsWith('.blg') + ) + .map(f => { + if (f.path === 'output.pdf') { + return _.pick(f, 'path', 'size', 'contentId', 'ranges') + } + return _.pick(f, 'path') + }) + ) + + // Compile Cache + buildTarball({ projectId, userId, buildId, outputFiles }) + .then(() => { + enqueue([{ path: 'output.tar.gz' }]) + }) + .catch(err => { + logger.warn( + { err, projectId, userId, buildId }, + 'build output.tar.gz for clsi cache failed' + ) + }) +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {string} buildId + * @param {[{path: string}]} outputFiles + * @return {Promise} + */ +async function buildTarball({ projectId, userId, buildId, outputFiles }) { + const timer = new Metrics.Timer('clsi_cache_build', 1, {}, TIMING_BUCKETS) + const outputDir = Path.join( + Settings.path.outputDir, + userId ? `${projectId}-${userId}` : projectId, + CACHE_SUBDIR, + buildId + ) + + const files = outputFiles.filter( + f => f.path === 'output.synctex.gz' || !isExtraneousFile(f.path) + ) + if (files.length > MAX_ENTRIES_IN_OUTPUT_TAR) { + Metrics.inc('clsi_cache_build_too_many_entries') + throw new Error('too many output files for output.tar.gz') + } + Metrics.count('clsi_cache_build_files', files.length) + + const path = Path.join(outputDir, 'output.tar.gz') + try { + await pipeline( + tarFs.pack(outputDir, { entries: files.map(f => f.path) }), + createGzip(), + fs.createWriteStream(path) + ) + } catch (err) { + try { + await fs.promises.unlink(path) + } catch (e) {} + throw err + } finally { + timer.done() + } +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {string} compileDir + * @return {Promise} + */ +async function downloadLatestCompileCache(projectId, userId, compileDir) { + return await _downloadCompileCache( + `${Settings.apis.clsiCache.url}/project/${projectId}/${ + userId ? `user/${userId}/` : '' + }latest/output/output.tar.gz`, + compileDir, + 'tar' + ) +} + +/** + * @param {string} projectId + * @param {string} userId + * @param {string} editorId + * @param {string} buildId + * @param {string} compileDir + * @return {Promise} + */ +async function downloadOldCompileCache( + projectId, + userId, + editorId, + buildId, + compileDir +) { + return await _downloadCompileCache( + `${Settings.apis.clsiCache.url}/project/${projectId}/${ + userId ? `user/${userId}/` : '' + }build/${editorId}-${buildId}/search/output/output.tar.gz`, + compileDir, + 'synctex' + ) +} + +/** + * @param {string} url + * @param {string} compileDir + * @param {string} method + * @return {Promise} + */ +async function _downloadCompileCache(url, compileDir, method) { + if (!Settings.apis.clsiCache.enabled) return false + + const timer = new Metrics.Timer( + 'clsi_cache_download', + 1, + { status: 'success', method }, + TIMING_BUCKETS + ) + let stream + try { + stream = await fetchStream(url, { + method: 'GET', + signal: AbortSignal.timeout(10_000), + }) + } catch (err) { + if (err instanceof RequestFailedError && err.response.status === 404) { + timer.labels.status = 'not-found' + timer.done() + return false + } + timer.labels.status = 'error' + timer.done() + throw err + } + let n = 0 + let abort = false + await pipeline( + stream, + createGunzip(), + tarFs.extract(compileDir, { + // use ignore hook for counting entries (files+folders) and validation. + // Include folders as they incur mkdir calls. + ignore(_, header) { + if (abort) return true // log once + n++ + if (n > MAX_ENTRIES_IN_OUTPUT_TAR) { + abort = true + logger.warn( + { + url, + compileDir, + method, + }, + 'too many entries in tar-ball from clsi-cache' + ) + } else if (header.type !== 'file' && header.type !== 'directory') { + abort = true + logger.warn( + { + url, + compileDir, + method, + entryType: header.type, + }, + 'unexpected entry in tar-ball from clsi-cache' + ) + } + return abort + }, + }) + ) + Metrics.count('clsi_cache_download_entries', n) + timer.done() + return !abort +} + +module.exports = { + notifyCLSICacheAboutBuild, + downloadLatestCompileCache, + downloadOldCompileCache, +} diff --git a/services/clsi/app/js/CompileController.js b/services/clsi/app/js/CompileController.js index 8091055afe..0075e3fe4a 100644 --- a/services/clsi/app/js/CompileController.js +++ b/services/clsi/app/js/CompileController.js @@ -5,6 +5,7 @@ const Metrics = require('./Metrics') const ProjectPersistenceManager = require('./ProjectPersistenceManager') const logger = require('@overleaf/logger') const Errors = require('./Errors') +const { notifyCLSICacheAboutBuild } = require('./CLSICacheHandler') let lastSuccessfulCompileTimestamp = 0 @@ -104,6 +105,21 @@ function compile(req, res, next) { buildId = error.buildId } + if ( + status === 'success' && + request.editorId && + request.populateClsiCache + ) { + notifyCLSICacheAboutBuild({ + projectId: request.project_id, + userId: request.user_id, + buildId: outputFiles[0].build, + editorId: request.editorId, + outputFiles, + compileGroup: request.compileGroup, + }) + } + timer.done() res.status(code || 200).send({ compile: { @@ -153,24 +169,19 @@ function clearCache(req, res, next) { } function syncFromCode(req, res, next) { - const { file } = req.query + const { file, editorId, buildId, compileFromClsiCache } = req.query const line = parseInt(req.query.line, 10) const column = parseInt(req.query.column, 10) const { imageName } = req.query const projectId = req.params.project_id const userId = req.params.user_id - - if (imageName && !_isImageNameAllowed(imageName)) { - return res.status(400).send('invalid image') - } - CompileManager.syncFromCode( projectId, userId, file, line, column, - imageName, + { imageName, editorId, buildId, compileFromClsiCache }, function (error, pdfPositions) { if (error) { return next(error) @@ -186,20 +197,16 @@ function syncFromPdf(req, res, next) { const page = parseInt(req.query.page, 10) const h = parseFloat(req.query.h) const v = parseFloat(req.query.v) - const { imageName } = req.query + const { imageName, editorId, buildId, compileFromClsiCache } = req.query const projectId = req.params.project_id const userId = req.params.user_id - - if (imageName && !_isImageNameAllowed(imageName)) { - return res.status(400).send('invalid image') - } CompileManager.syncFromPdf( projectId, userId, page, h, v, - imageName, + { imageName, editorId, buildId, compileFromClsiCache }, function (error, codePositions) { if (error) { return next(error) @@ -216,9 +223,6 @@ function wordcount(req, res, next) { const projectId = req.params.project_id const userId = req.params.user_id const { image } = req.query - if (image && !_isImageNameAllowed(image)) { - return res.status(400).send('invalid image') - } logger.debug({ image, file, projectId }, 'word count request') CompileManager.wordcount( @@ -241,12 +245,6 @@ function status(req, res, next) { res.send('OK') } -function _isImageNameAllowed(imageName) { - const ALLOWED_IMAGES = - Settings.clsi && Settings.clsi.docker && Settings.clsi.docker.allowedImages - return !ALLOWED_IMAGES || ALLOWED_IMAGES.includes(imageName) -} - module.exports = { compile, stopCompile, diff --git a/services/clsi/app/js/CompileManager.js b/services/clsi/app/js/CompileManager.js index a98b138d0a..1e0dce1eef 100644 --- a/services/clsi/app/js/CompileManager.js +++ b/services/clsi/app/js/CompileManager.js @@ -19,6 +19,10 @@ const Errors = require('./Errors') const CommandRunner = require('./CommandRunner') const { emitPdfStats } = require('./ContentCacheMetrics') const SynctexOutputParser = require('./SynctexOutputParser') +const { + downloadLatestCompileCache, + downloadOldCompileCache, +} = require('./CLSICacheHandler') const COMPILE_TIME_BUCKETS = [ // NOTE: These buckets are locked in per metric name. @@ -44,7 +48,8 @@ function getOutputDir(projectId, userId) { async function doCompileWithLock(request) { const compileDir = getCompileDir(request.project_id, request.user_id) - await fsPromises.mkdir(compileDir, { recursive: true }) + request.isInitialCompile = + (await fsPromises.mkdir(compileDir, { recursive: true })) === compileDir // prevent simultaneous compiles const lock = LockManager.acquire(compileDir) try { @@ -55,6 +60,7 @@ async function doCompileWithLock(request) { } async function doCompile(request) { + const { project_id: projectId, user_id: userId } = request const compileDir = getCompileDir(request.project_id, request.user_id) const stats = {} const timings = {} @@ -65,6 +71,25 @@ async function doCompile(request) { request.metricsOpts, COMPILE_TIME_BUCKETS ) + if (request.isInitialCompile) { + stats.isInitialCompile = 1 + request.metricsOpts.compile = 'initial' + if (request.compileFromClsiCache) { + try { + if (await downloadLatestCompileCache(projectId, userId, compileDir)) { + stats.restoredClsiCache = 1 + request.metricsOpts.compile = 'from-clsi-cache' + } + } catch (err) { + logger.warn( + { err, projectId, userId }, + 'failed to populate compile dir from cache' + ) + } + } + } else { + request.metricsOpts.compile = 'recompile' + } const writeToDiskTimer = new Metrics.Timer( 'write-to-disk', 1, @@ -408,14 +433,7 @@ async function _checkDirectory(compileDir) { return true } -async function syncFromCode( - projectId, - userId, - filename, - line, - column, - imageName -) { +async function syncFromCode(projectId, userId, filename, line, column, opts) { // If LaTeX was run in a virtual environment, the file path that synctex expects // might not match the file path on the host. The .synctex.gz file however, will be accessed // wherever it is on the host. @@ -431,7 +449,7 @@ async function syncFromCode( '-o', outputFilePath, ] - const stdout = await _runSynctex(projectId, userId, command, imageName) + const stdout = await _runSynctex(projectId, userId, command, opts) logger.debug( { projectId, userId, filename, line, column, command, stdout }, 'synctex code output' @@ -439,7 +457,7 @@ async function syncFromCode( return SynctexOutputParser.parseViewOutput(stdout) } -async function syncFromPdf(projectId, userId, page, h, v, imageName) { +async function syncFromPdf(projectId, userId, page, h, v, opts) { const compileName = getCompileName(projectId, userId) const baseDir = Settings.path.synctexBaseDir(compileName) const outputFilePath = `${baseDir}/output.pdf` @@ -449,7 +467,7 @@ async function syncFromPdf(projectId, userId, page, h, v, imageName) { '-o', `${page}:${h}:${v}:${outputFilePath}`, ] - const stdout = await _runSynctex(projectId, userId, command, imageName) + const stdout = await _runSynctex(projectId, userId, command, opts) logger.debug({ projectId, userId, page, h, v, stdout }, 'synctex pdf output') return SynctexOutputParser.parseEditOutput(stdout, baseDir) } @@ -478,14 +496,53 @@ async function _checkFileExists(dir, filename) { } } -async function _runSynctex(projectId, userId, command, imageName) { +async function _runSynctex(projectId, userId, command, opts) { + const { imageName, editorId, buildId, compileFromClsiCache } = opts + + if (imageName && !_isImageNameAllowed(imageName)) { + throw new Errors.InvalidParameter('invalid image') + } + if (editorId && !/^[a-f0-9-]+$/.test(editorId)) { + throw new Errors.InvalidParameter('invalid editorId') + } + if (buildId && !OutputCacheManager.BUILD_REGEX.test(buildId)) { + throw new Errors.InvalidParameter('invalid buildId') + } + const directory = getCompileDir(projectId, userId) const timeout = 60 * 1000 // increased to allow for large projects const compileName = getCompileName(projectId, userId) const compileGroup = 'synctex' const defaultImageName = Settings.clsi && Settings.clsi.docker && Settings.clsi.docker.image - await _checkFileExists(directory, 'output.synctex.gz') + try { + await _checkFileExists(directory, 'output.synctex.gz') + } catch (err) { + if ( + err instanceof Errors.NotFoundError && + compileFromClsiCache && + editorId && + buildId + ) { + try { + await downloadOldCompileCache( + projectId, + userId, + editorId, + buildId, + directory + ) + } catch (err) { + logger.warn( + { err, projectId, userId, editorId, buildId }, + 'failed to populate compile dir for synctex using old output' + ) + } + await _checkFileExists(directory, 'output.synctex.gz') + } else { + throw err + } + } try { const output = await CommandRunner.promises.run( compileName, @@ -515,6 +572,10 @@ async function wordcount(projectId, userId, filename, image) { const compileName = getCompileName(projectId, userId) const compileGroup = 'wordcount' + if (image && !_isImageNameAllowed(image)) { + throw new Errors.InvalidParameter('invalid image') + } + try { await fsPromises.mkdir(compileDir, { recursive: true }) } catch (err) { @@ -602,6 +663,12 @@ function _parseWordcountFromOutput(output) { return results } +function _isImageNameAllowed(imageName) { + const ALLOWED_IMAGES = + Settings.clsi && Settings.clsi.docker && Settings.clsi.docker.allowedImages + return !ALLOWED_IMAGES || ALLOWED_IMAGES.includes(imageName) +} + module.exports = { doCompileWithLock: callbackify(doCompileWithLock), stopCompile: callbackify(stopCompile), diff --git a/services/clsi/app/js/Errors.js b/services/clsi/app/js/Errors.js index 5c5fd3745a..64c3c7b59a 100644 --- a/services/clsi/app/js/Errors.js +++ b/services/clsi/app/js/Errors.js @@ -35,6 +35,7 @@ class QueueLimitReachedError extends OError {} class TimedOutError extends OError {} class NoXrefTableError extends OError {} class TooManyCompileRequestsError extends OError {} +class InvalidParameter extends OError {} module.exports = Errors = { QueueLimitReachedError, @@ -44,4 +45,5 @@ module.exports = Errors = { AlreadyCompilingError, NoXrefTableError, TooManyCompileRequestsError, + InvalidParameter, } diff --git a/services/clsi/app/js/OutputFileArchiveManager.js b/services/clsi/app/js/OutputFileArchiveManager.js index a64d634e12..64c5198392 100644 --- a/services/clsi/app/js/OutputFileArchiveManager.js +++ b/services/clsi/app/js/OutputFileArchiveManager.js @@ -93,8 +93,11 @@ module.exports = { ) return outputFiles.filter( - // Ignore the pdf and also ignore the files ignored by the frontend. - ({ path }) => path !== 'output.pdf' && !ignoreFiles.includes(path) + // Ignore the pdf, clsi-cache tar-ball and also ignore the files ignored by the frontend. + ({ path }) => + path !== 'output.pdf' && + path !== 'output.tar.gz' && + !ignoreFiles.includes(path) ) } catch (error) { if ( diff --git a/services/clsi/app/js/RequestParser.js b/services/clsi/app/js/RequestParser.js index f5c07d3bcf..4e9d722921 100644 --- a/services/clsi/app/js/RequestParser.js +++ b/services/clsi/app/js/RequestParser.js @@ -3,6 +3,7 @@ const OutputCacheManager = require('./OutputCacheManager') const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex'] const MAX_TIMEOUT = 600 +const EDITOR_ID_REGEX = /^[a-f0-9-]{36}$/ // UUID function parse(body, callback) { const response = {} @@ -28,12 +29,24 @@ function parse(body, callback) { default: '', type: 'string', }), + // Will be populated later. Must always be populated for prom library. + compile: 'initial', } response.compiler = _parseAttribute('compiler', compile.options.compiler, { validValues: VALID_COMPILERS, default: 'pdflatex', type: 'string', }) + response.compileFromClsiCache = _parseAttribute( + 'compileFromClsiCache', + compile.options.compileFromClsiCache, + { default: false, type: 'boolean' } + ) + response.populateClsiCache = _parseAttribute( + 'populateClsiCache', + compile.options.populateClsiCache, + { default: false, type: 'boolean' } + ) response.enablePdfCaching = _parseAttribute( 'enablePdfCaching', compile.options.enablePdfCaching, @@ -137,6 +150,10 @@ function parse(body, callback) { ) response.rootResourcePath = _checkPath(rootResourcePath) + response.editorId = _parseAttribute('editorId', compile.options.editorId, { + type: 'string', + regex: EDITOR_ID_REGEX, + }) response.buildId = _parseAttribute('buildId', compile.options.buildId, { type: 'string', regex: OutputCacheManager.BUILD_REGEX, diff --git a/services/clsi/app/js/ResourceWriter.js b/services/clsi/app/js/ResourceWriter.js index 1db1c2baac..bf88538746 100644 --- a/services/clsi/app/js/ResourceWriter.js +++ b/services/clsi/app/js/ResourceWriter.js @@ -262,6 +262,7 @@ module.exports = ResourceWriter = { shouldDelete = false } if ( + path === 'output.tar.gz' || path === 'output.synctex.gz' || path === 'output.pdfxref' || path === 'output.pdf' || diff --git a/services/clsi/config/settings.defaults.js b/services/clsi/config/settings.defaults.js index 0f4111dc62..51d13f9c48 100644 --- a/services/clsi/config/settings.defaults.js +++ b/services/clsi/config/settings.defaults.js @@ -1,10 +1,12 @@ const Path = require('node:path') const http = require('node:http') const https = require('node:https') +const os = require('node:os') http.globalAgent.keepAlive = false https.globalAgent.keepAlive = false const isPreEmptible = process.env.PREEMPTIBLE === 'TRUE' +const CLSI_SERVER_ID = os.hostname().replace('-ctr', '') module.exports = { compileSizeLimit: process.env.COMPILE_SIZE_LIMIT || '7mb', @@ -48,12 +50,20 @@ module.exports = { url: `http://${process.env.CLSI_HOST || '127.0.0.1'}:3013`, // External url prefix for output files, e.g. for requests via load-balancers. outputUrlPrefix: `${process.env.ZONE ? `/zone/${process.env.ZONE}` : ''}`, + clsiServerId: process.env.CLSI_SERVER_ID || CLSI_SERVER_ID, + + downloadHost: process.env.DOWNLOAD_HOST || 'http://localhost:3013', }, clsiPerf: { host: `${process.env.CLSI_PERF_HOST || '127.0.0.1'}:${ process.env.CLSI_PERF_PORT || '3043' }`, }, + clsiCache: { + enabled: !!process.env.CLSI_CACHE_HOST, + url: `http://${process.env.CLSI_CACHE_HOST}:3044`, + downloadURL: `http://${process.env.CLSI_CACHE_NGINX_HOST || process.env.CLSI_CACHE_HOST}:8080`, + }, }, smokeTest: process.env.SMOKE_TEST || false, diff --git a/services/clsi/package.json b/services/clsi/package.json index 3f05ab543d..b3a64e35df 100644 --- a/services/clsi/package.json +++ b/services/clsi/package.json @@ -34,6 +34,7 @@ "p-limit": "^3.1.0", "request": "^2.88.2", "send": "^0.19.0", + "tar-fs": "^3.0.4", "workerpool": "^6.1.5" }, "devDependencies": { diff --git a/services/clsi/test/setup.js b/services/clsi/test/setup.js index 19e1ae7165..b17507bf92 100644 --- a/services/clsi/test/setup.js +++ b/services/clsi/test/setup.js @@ -20,7 +20,7 @@ SandboxedModule.configure({ err() {}, }, }, - globals: { Buffer, console, process, URL }, + globals: { Buffer, console, process, URL, Math }, sourceTransformers: { removeNodePrefix: function (source) { return source.replace(/require\(['"]node:/g, "require('") diff --git a/services/clsi/test/unit/js/CompileControllerTests.js b/services/clsi/test/unit/js/CompileControllerTests.js index d97e433f29..b06679d994 100644 --- a/services/clsi/test/unit/js/CompileControllerTests.js +++ b/services/clsi/test/unit/js/CompileControllerTests.js @@ -1,54 +1,11 @@ const SandboxedModule = require('sandboxed-module') const sinon = require('sinon') -const { expect } = require('chai') const modulePath = require('node:path').join( __dirname, '../../../app/js/CompileController' ) const Errors = require('../../../app/js/Errors') -function tryImageNameValidation(method, imageNameField) { - describe('when allowedImages is set', function () { - beforeEach(function () { - this.Settings.clsi = { docker: {} } - this.Settings.clsi.docker.allowedImages = [ - 'repo/image:tag1', - 'repo/image:tag2', - ] - this.res.send = sinon.stub() - this.res.status = sinon.stub().returns({ send: this.res.send }) - - this.CompileManager[method].reset() - }) - - describe('with an invalid image', function () { - beforeEach(function () { - this.req.query[imageNameField] = 'something/evil:1337' - this.CompileController[method](this.req, this.res, this.next) - }) - it('should return a 400', function () { - expect(this.res.status.calledWith(400)).to.equal(true) - }) - it('should not run the query', function () { - expect(this.CompileManager[method].called).to.equal(false) - }) - }) - - describe('with a valid image', function () { - beforeEach(function () { - this.req.query[imageNameField] = 'repo/image:tag1' - this.CompileController[method](this.req, this.res, this.next) - }) - it('should not return a 400', function () { - expect(this.res.status.calledWith(400)).to.equal(false) - }) - it('should run the query', function () { - expect(this.CompileManager[method].called).to.equal(true) - }) - }) - }) -} - describe('CompileController', function () { beforeEach(function () { this.buildId = 'build-id-123' @@ -61,6 +18,11 @@ describe('CompileController', function () { clsi: { url: 'http://clsi.example.com', outputUrlPrefix: '/zone/b', + downloadHost: 'http://localhost:3013', + }, + clsiCache: { + enabled: false, + url: 'http://localhost:3044', }, }, }), @@ -68,6 +30,11 @@ describe('CompileController', function () { Timer: sinon.stub().returns({ done: sinon.stub() }), }, './ProjectPersistenceManager': (this.ProjectPersistenceManager = {}), + './CLSICacheHandler': { + notifyCLSICacheAboutBuild: sinon.stub(), + downloadLatestCompileCache: sinon.stub().resolves(), + downloadOldCompileCache: sinon.stub().resolves(), + }, './Errors': (this.Erros = Errors), }, }) @@ -439,8 +406,6 @@ describe('CompileController', function () { }) .should.equal(true) }) - - tryImageNameValidation('syncFromCode', 'imageName') }) describe('syncFromPdf', function () { @@ -476,8 +441,6 @@ describe('CompileController', function () { }) .should.equal(true) }) - - tryImageNameValidation('syncFromPdf', 'imageName') }) describe('wordcount', function () { @@ -511,7 +474,5 @@ describe('CompileController', function () { }) .should.equal(true) }) - - tryImageNameValidation('wordcount', 'image') }) }) diff --git a/services/clsi/test/unit/js/CompileManagerTests.js b/services/clsi/test/unit/js/CompileManagerTests.js index 8d7aff4910..f332c3f568 100644 --- a/services/clsi/test/unit/js/CompileManagerTests.js +++ b/services/clsi/test/unit/js/CompileManagerTests.js @@ -160,6 +160,11 @@ describe('CompileManager', function () { './LockManager': this.LockManager, './SynctexOutputParser': this.SynctexOutputParser, 'fs/promises': this.fsPromises, + './CLSICacheHandler': { + notifyCLSICacheAboutBuild: sinon.stub(), + downloadLatestCompileCache: sinon.stub().resolves(), + downloadOldCompileCache: sinon.stub().resolves(), + }, }, }) }) @@ -177,6 +182,11 @@ describe('CompileManager', function () { flags: (this.flags = ['-file-line-error']), compileGroup: (this.compileGroup = 'compile-group'), stopOnFirstError: false, + metricsOpts: { + path: 'clsi-perf', + method: 'minimal', + compile: 'initial', + }, } this.env = { OVERLEAF_PROJECT_ID: this.projectId, @@ -455,7 +465,7 @@ describe('CompileManager', function () { this.filename, this.line, this.column, - customImageName + { imageName: customImageName } ) }) @@ -497,7 +507,7 @@ describe('CompileManager', function () { this.page, this.h, this.v, - '' + { imageName: '' } ) }) @@ -532,7 +542,7 @@ describe('CompileManager', function () { this.page, this.h, this.v, - customImageName + { imageName: customImageName } ) }) diff --git a/services/web/app/src/Features/Compile/ClsiCacheHandler.js b/services/web/app/src/Features/Compile/ClsiCacheHandler.js new file mode 100644 index 0000000000..de6cde7edc --- /dev/null +++ b/services/web/app/src/Features/Compile/ClsiCacheHandler.js @@ -0,0 +1,127 @@ +const { + fetchNothing, + fetchRedirectWithResponse, + RequestFailedError, +} = require('@overleaf/fetch-utils') +const logger = require('@overleaf/logger') +const Settings = require('@overleaf/settings') +const OError = require('@overleaf/o-error') +const { NotFoundError, InvalidNameError } = require('../Errors/Errors') + +function validateFilename(filename) { + if ( + ![ + 'output.blg', + 'output.log', + 'output.pdf', + 'output.overleaf.json', + 'output.tar.gz', + ].includes(filename) || + filename.endsWith('.blg') + ) { + throw new InvalidNameError('bad filename') + } +} + +async function clearCache(projectId, userId) { + let path = `/project/${projectId}` + if (userId) { + path += `/user/${userId}` + } + path += '/output' + + await Promise.all( + Settings.apis.clsiCache.instances.map(async ({ url, zone }) => { + const u = new URL(url) + u.pathname = path + try { + await fetchNothing(u, { + method: 'DELETE', + signal: AbortSignal.timeout(15_000), + }) + } catch (err) { + throw OError.tag(err, 'clear clsi-cache', { url, zone }) + } + }) + ) +} + +async function getLatestOutputFile( + projectId, + userId, + filename, + signal = AbortSignal.timeout(15_000) +) { + validateFilename(filename) + + let path = `/project/${projectId}` + if (userId) { + path += `/user/${userId}` + } + path += `/latest/output/${filename}` + + for (const { url, zone } of Settings.apis.clsiCache.instances) { + const u = new URL(url) + u.pathname = path + try { + const { + location, + response: { headers }, + } = await fetchRedirectWithResponse(u, { + signal, + }) + // Success, return the cache entry. + return { + location, + zone: headers.get('X-Zone'), + lastModified: new Date(headers.get('X-Last-Modified')), + size: parseInt(headers.get('X-Content-Length'), 10), + allFiles: JSON.parse(headers.get('X-All-Files')), + } + } catch (err) { + if (err instanceof RequestFailedError && err.response.status === 404) { + break // No clsi-cache instance has cached something for this project/user. + } + logger.warn( + { err, projectId, userId, url, zone }, + 'getLatestOutputFile from clsi-cache failed' + ) + // This clsi-cache instance is down, try the next backend. + } + } + throw new NotFoundError('nothing cached yet') +} + +async function prepareCacheSource( + projectId, + userId, + { sourceProjectId, templateId, templateVersionId, lastUpdated, zone, signal } +) { + const url = new URL( + `/project/${projectId}/user/${userId}/import-from`, + Settings.apis.clsiCache.instances.find(i => i.zone === zone).url + ) + try { + await fetchNothing(url, { + method: 'POST', + json: { + sourceProjectId, + lastUpdated, + templateId, + templateVersionId, + }, + signal, + }) + } catch (err) { + if (err instanceof RequestFailedError && err.response.status === 404) { + throw new NotFoundError() + } + throw err + } +} + +module.exports = { + clearCache, + getLatestOutputFile, + prepareCacheSource, +} diff --git a/services/web/app/src/Features/Compile/ClsiCacheManager.js b/services/web/app/src/Features/Compile/ClsiCacheManager.js new file mode 100644 index 0000000000..7c1811dd67 --- /dev/null +++ b/services/web/app/src/Features/Compile/ClsiCacheManager.js @@ -0,0 +1,86 @@ +const { NotFoundError } = require('../Errors/Errors') +const ClsiCacheHandler = require('./ClsiCacheHandler') +const DocumentUpdaterHandler = require('../DocumentUpdater/DocumentUpdaterHandler') +const ProjectGetter = require('../Project/ProjectGetter') +const SplitTestHandler = require('../SplitTests/SplitTestHandler') + +async function getLatestBuildFromCache(projectId, userId, filename, signal) { + const [ + { location, lastModified: lastCompiled, zone, size, allFiles }, + lastUpdatedInRedis, + { lastUpdated: lastUpdatedInMongo, name: projectName }, + ] = await Promise.all([ + ClsiCacheHandler.getLatestOutputFile(projectId, userId, filename, signal), + DocumentUpdaterHandler.promises.getProjectLastUpdatedAt(projectId), + ProjectGetter.promises.getProject(projectId, { lastUpdated: 1, name: 1 }), + ]) + + const lastUpdated = + lastUpdatedInRedis > lastUpdatedInMongo + ? lastUpdatedInRedis + : lastUpdatedInMongo + const isUpToDate = lastCompiled >= lastUpdated + + return { + internal: { + location, + zone, + projectName, + }, + external: { + isUpToDate, + lastUpdated, + size, + allFiles, + }, + } +} + +async function prepareClsiCache( + projectId, + userId, + { sourceProjectId, templateId, templateVersionId } +) { + const { variant } = await SplitTestHandler.promises.getAssignmentForUser( + userId, + 'copy-clsi-cache' + ) + if (variant !== 'enabled') return + const signal = AbortSignal.timeout(5_000) + let lastUpdated + let zone = 'b' // populate template data on zone b + if (sourceProjectId) { + try { + ;({ + internal: { zone }, + external: { lastUpdated }, + } = await getLatestBuildFromCache( + sourceProjectId, + userId, + 'output.tar.gz', + signal + )) + } catch (err) { + if (err instanceof NotFoundError) return // nothing cached yet + throw err + } + } + try { + await ClsiCacheHandler.prepareCacheSource(projectId, userId, { + sourceProjectId, + templateId, + templateVersionId, + zone, + lastUpdated, + signal, + }) + } catch (err) { + if (err instanceof NotFoundError) return // nothing cached yet/expired. + throw err + } +} + +module.exports = { + getLatestBuildFromCache, + prepareClsiCache, +} diff --git a/services/web/app/src/Features/Compile/ClsiManager.js b/services/web/app/src/Features/Compile/ClsiManager.js index 68c8a9c0de..021f102324 100644 --- a/services/web/app/src/Features/Compile/ClsiManager.js +++ b/services/web/app/src/Features/Compile/ClsiManager.js @@ -25,6 +25,7 @@ const ClsiFormatChecker = require('./ClsiFormatChecker') const DocumentUpdaterHandler = require('../DocumentUpdater/DocumentUpdaterHandler') const Metrics = require('@overleaf/metrics') const Errors = require('../Errors/Errors') +const ClsiCacheHandler = require('./ClsiCacheHandler') const { getBlobLocation } = require('../History/HistoryManager') const VALID_COMPILERS = ['pdflatex', 'latex', 'xelatex', 'lualatex'] @@ -148,6 +149,13 @@ async function deleteAuxFiles(projectId, userId, options, clsiserverid) { clsiserverid ) } finally { + // always clear the clsi-cache + try { + await ClsiCacheHandler.clearCache(projectId, userId) + } catch (err) { + logger.warn({ err, projectId, userId }, 'purge clsi-cache failed') + } + // always clear the project state from the docupdater, even if there // was a problem with the request to the clsi try { @@ -766,6 +774,7 @@ function _finaliseRequest(projectId, options, project, docs, files) { compile: { options: { buildId: options.buildId, + editorId: options.editorId, compiler: project.compiler, timeout: options.timeout, imageName: project.imageName, @@ -775,6 +784,8 @@ function _finaliseRequest(projectId, options, project, docs, files) { syncType: options.syncType, syncState: options.syncState, compileGroup: options.compileGroup, + compileFromClsiCache: options.compileFromClsiCache, + populateClsiCache: options.populateClsiCache, enablePdfCaching: (Settings.enablePdfCaching && options.enablePdfCaching) || false, pdfCachingMinChunkSize: options.pdfCachingMinChunkSize, diff --git a/services/web/app/src/Features/Compile/CompileController.js b/services/web/app/src/Features/Compile/CompileController.js index 327c7cc9a3..b7726f1d72 100644 --- a/services/web/app/src/Features/Compile/CompileController.js +++ b/services/web/app/src/Features/Compile/CompileController.js @@ -66,11 +66,31 @@ const getSplitTestOptions = callbackify(async function (req, res) { } catch (e) {} const editorReq = { ...req, query } + // Lookup the clsi-cache flag in the backend. + // We may need to turn off the feature on a short notice, without requiring + // all users to reload their editor page to disable the feature. + const { variant: compileFromClsiCacheVariant } = + await SplitTestHandler.promises.getAssignment( + editorReq, + res, + 'compile-from-clsi-cache' + ) + const compileFromClsiCache = compileFromClsiCacheVariant === 'enabled' + const { variant: populateClsiCacheVariant } = + await SplitTestHandler.promises.getAssignment( + editorReq, + res, + 'populate-clsi-cache' + ) + const populateClsiCache = populateClsiCacheVariant === 'enabled' + const pdfDownloadDomain = Settings.pdfDownloadDomain if (!req.query.enable_pdf_caching) { // The frontend does not want to do pdf caching. return { + compileFromClsiCache, + populateClsiCache, pdfDownloadDomain, enablePdfCaching: false, } @@ -88,12 +108,16 @@ const getSplitTestOptions = callbackify(async function (req, res) { if (!enablePdfCaching) { // Skip the lookup of the chunk size when caching is not enabled. return { + compileFromClsiCache, + populateClsiCache, pdfDownloadDomain, enablePdfCaching: false, } } const pdfCachingMinChunkSize = await getPdfCachingMinChunkSize(editorReq, res) return { + compileFromClsiCache, + populateClsiCache, pdfDownloadDomain, enablePdfCaching, pdfCachingMinChunkSize, @@ -112,6 +136,7 @@ module.exports = CompileController = { isAutoCompile, fileLineErrors, stopOnFirstError, + editorId: req.body.editorId, } if (req.body.rootDoc_id) { @@ -138,8 +163,15 @@ module.exports = CompileController = { getSplitTestOptions(req, res, (err, splitTestOptions) => { if (err) return next(err) - let { enablePdfCaching, pdfCachingMinChunkSize, pdfDownloadDomain } = - splitTestOptions + let { + compileFromClsiCache, + populateClsiCache, + enablePdfCaching, + pdfCachingMinChunkSize, + pdfDownloadDomain, + } = splitTestOptions + options.compileFromClsiCache = compileFromClsiCache + options.populateClsiCache = populateClsiCache options.enablePdfCaching = enablePdfCaching if (enablePdfCaching) { options.pdfCachingMinChunkSize = pdfCachingMinChunkSize @@ -193,6 +225,8 @@ module.exports = CompileController = { timeout: limits.timeout === 60 ? 'short' : 'long', server: clsiServerId?.includes('-c2d-') ? 'faster' : 'normal', isAutoCompile, + isInitialCompile: stats.isInitialCompile === 1, + restoredClsiCache: stats.restoredClsiCache === 1, stopOnFirstError, } ) @@ -497,7 +531,7 @@ module.exports = CompileController = { proxySyncPdf(req, res, next) { const projectId = req.params.Project_id - const { page, h, v } = req.query + const { page, h, v, editorId, buildId } = req.query if (!page?.match(/^\d+$/)) { return next(new Error('invalid page parameter')) } @@ -515,23 +549,29 @@ module.exports = CompileController = { getImageNameForProject(projectId, (error, imageName) => { if (error) return next(error) - const url = CompileController._getUrl(projectId, userId, 'sync/pdf') - CompileController.proxyToClsi( - projectId, - 'sync-to-pdf', - url, - { page, h, v, imageName }, - req, - res, - next - ) + getSplitTestOptions(req, res, (error, splitTestOptions) => { + if (error) return next(error) + const { compileFromClsiCache } = splitTestOptions + + const url = CompileController._getUrl(projectId, userId, 'sync/pdf') + + CompileController.proxyToClsi( + projectId, + 'sync-to-pdf', + url, + { page, h, v, imageName, editorId, buildId, compileFromClsiCache }, + req, + res, + next + ) + }) }) }) }, proxySyncCode(req, res, next) { const projectId = req.params.Project_id - const { file, line, column } = req.query + const { file, line, column, editorId, buildId } = req.query if (file == null) { return next(new Error('missing file parameter')) } @@ -557,16 +597,29 @@ module.exports = CompileController = { getImageNameForProject(projectId, (error, imageName) => { if (error) return next(error) - const url = CompileController._getUrl(projectId, userId, 'sync/code') - CompileController.proxyToClsi( - projectId, - 'sync-to-code', - url, - { file, line, column, imageName }, - req, - res, - next - ) + getSplitTestOptions(req, res, (error, splitTestOptions) => { + if (error) return next(error) + const { compileFromClsiCache } = splitTestOptions + + const url = CompileController._getUrl(projectId, userId, 'sync/code') + CompileController.proxyToClsi( + projectId, + 'sync-to-code', + url, + { + file, + line, + column, + imageName, + editorId, + buildId, + compileFromClsiCache, + }, + req, + res, + next + ) + }) }) }) }, diff --git a/services/web/app/src/Features/Project/ProjectDuplicator.js b/services/web/app/src/Features/Project/ProjectDuplicator.js index da18c7e9b8..47c00ed3df 100644 --- a/services/web/app/src/Features/Project/ProjectDuplicator.js +++ b/services/web/app/src/Features/Project/ProjectDuplicator.js @@ -21,6 +21,7 @@ const TpdsProjectFlusher = require('../ThirdPartyDataStore/TpdsProjectFlusher') const _ = require('lodash') const TagsHandler = require('../Tags/TagsHandler') const Features = require('../../infrastructure/Features') +const ClsiCacheManager = require('../Compile/ClsiCacheManager') module.exports = { duplicate: callbackify(duplicate), @@ -35,6 +36,7 @@ async function duplicate(owner, originalProjectId, newProjectName, tags = []) { originalProjectId, { compiler: true, + imageName: true, rootFolder: true, rootDoc_id: true, fromV1TemplateId: true, @@ -73,6 +75,21 @@ async function duplicate(owner, originalProjectId, newProjectName, tags = []) { { segmentation } ) + let prepareClsiCacheInBackground = Promise.resolve() + if (originalProject.imageName === newProject.imageName) { + // Populate the clsi-cache unless the TeXLive release has changed. + prepareClsiCacheInBackground = ClsiCacheManager.prepareClsiCache( + newProject._id, + owner._id, + { sourceProjectId: originalProjectId } + ).catch(err => { + logger.warn( + { err, originalProjectId, projectId: newProject._id }, + 'failed to prepare clsi-cache for cloned project' + ) + }) + } + try { await ProjectOptionsHandler.promises.setCompiler( newProject._id, @@ -120,6 +137,10 @@ async function duplicate(owner, originalProjectId, newProjectName, tags = []) { }) } + try { + await prepareClsiCacheInBackground + } catch {} + return newProject } diff --git a/services/web/app/src/Features/Templates/TemplatesManager.js b/services/web/app/src/Features/Templates/TemplatesManager.js index f105b6ae85..6a2b6207c1 100644 --- a/services/web/app/src/Features/Templates/TemplatesManager.js +++ b/services/web/app/src/Features/Templates/TemplatesManager.js @@ -17,6 +17,7 @@ const settings = require('@overleaf/settings') const crypto = require('crypto') const Errors = require('../Errors/Errors') const { pipeline } = require('stream/promises') +const ClsiCacheManager = require('../Compile/ClsiCacheManager') const TemplatesManager = { async createProjectFromV1Template( @@ -63,6 +64,17 @@ const TemplatesManager = { attributes ) + const prepareClsiCacheInBackground = ClsiCacheManager.prepareClsiCache( + project._id, + userId, + { templateId, templateVersionId } + ).catch(err => { + logger.warn( + { err, templateId, templateVersionId, projectId: project._id }, + 'failed to prepare clsi-cache from template' + ) + }) + await TemplatesManager._setCompiler(project._id, compiler) await TemplatesManager._setImage(project._id, imageName) await TemplatesManager._setMainFile(project._id, mainFile) @@ -74,6 +86,8 @@ const TemplatesManager = { } await Project.updateOne({ _id: project._id }, update, {}) + await prepareClsiCacheInBackground + return project } finally { await fs.promises.unlink(dumpPath) diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js index 1f86da66a0..be567bf13e 100644 --- a/services/web/config/settings.defaults.js +++ b/services/web/config/settings.defaults.js @@ -242,6 +242,9 @@ module.exports = { submissionBackendClass: process.env.CLSI_SUBMISSION_BACKEND_CLASS || 'n2d', }, + clsiCache: { + instances: JSON.parse(process.env.CLSI_CACHE_INSTANCES || '[]'), + }, project_history: { sendProjectStructureOps: true, url: `http://${process.env.PROJECT_HISTORY_HOST || '127.0.0.1'}:3054`, diff --git a/services/web/frontend/js/features/pdf-preview/components/pdf-synctex-controls.tsx b/services/web/frontend/js/features/pdf-preview/components/pdf-synctex-controls.tsx index 5373d0975a..12013f48eb 100644 --- a/services/web/frontend/js/features/pdf-preview/components/pdf-synctex-controls.tsx +++ b/services/web/frontend/js/features/pdf-preview/components/pdf-synctex-controls.tsx @@ -144,6 +144,7 @@ function PdfSynctexControls() { const { clsiServerId, + pdfFile, pdfUrl, pdfViewer, position, @@ -239,6 +240,8 @@ function PdfSynctexControls() { if (clsiServerId) { params += `&clsiserverid=${clsiServerId}` } + if (pdfFile?.editorId) params += `&editorId=${pdfFile.editorId}` + if (pdfFile?.build) params += `&buildId=${pdfFile.build}` getJSON(`/project/${projectId}/sync/code?${params}`, { signal }) .then(data => { @@ -253,6 +256,7 @@ function PdfSynctexControls() { }) }, [ + pdfFile, clsiServerId, isMounted, projectId, @@ -344,6 +348,8 @@ function PdfSynctexControls() { if (clsiServerId) { params.set('clsiserverid', clsiServerId) } + if (pdfFile?.editorId) params.set('editorId', pdfFile.editorId) + if (pdfFile?.build) params.set('buildId', pdfFile.build) getJSON(`/project/${projectId}/sync/pdf?${params}`, { signal }) .then(data => { @@ -358,6 +364,7 @@ function PdfSynctexControls() { }) }, [ + pdfFile, clsiServerId, projectId, signal, diff --git a/services/web/frontend/js/features/pdf-preview/util/compiler.js b/services/web/frontend/js/features/pdf-preview/util/compiler.js index 495d0ea8d1..d938cb3893 100644 --- a/services/web/frontend/js/features/pdf-preview/util/compiler.js +++ b/services/web/frontend/js/features/pdf-preview/util/compiler.js @@ -2,7 +2,7 @@ import { isMainFile } from './editor-files' import getMeta from '../../../utils/meta' import { deleteJSON, postJSON } from '../../../infrastructure/fetch-json' import { debounce } from 'lodash' -import { trackPdfDownload } from './metrics' +import { EDITOR_SESSION_ID, trackPdfDownload } from './metrics' import { enablePdfCaching } from './pdf-caching-flags' import { debugConsole } from '@/utils/debugging' import { signalWithTimeout } from '@/utils/abort-signal' @@ -109,6 +109,7 @@ export default class DocumentCompiler { // if there was previously a server error incrementalCompilesEnabled: !this.error, stopOnFirstError: options.stopOnFirstError, + editorId: EDITOR_SESSION_ID, } const data = await postJSON( diff --git a/services/web/frontend/js/features/pdf-preview/util/metrics.js b/services/web/frontend/js/features/pdf-preview/util/metrics.js index e80c2138a5..435594cb6e 100644 --- a/services/web/frontend/js/features/pdf-preview/util/metrics.js +++ b/services/web/frontend/js/features/pdf-preview/util/metrics.js @@ -8,7 +8,7 @@ import { debugConsole } from '@/utils/debugging' const VERSION = 9 // editing session id -const EDITOR_SESSION_ID = uuid() +export const EDITOR_SESSION_ID = uuid() const pdfCachingMetrics = { viewerId: EDITOR_SESSION_ID, diff --git a/services/web/frontend/js/features/pdf-preview/util/output-files.js b/services/web/frontend/js/features/pdf-preview/util/output-files.js index 49724942fb..0a79786f64 100644 --- a/services/web/frontend/js/features/pdf-preview/util/output-files.js +++ b/services/web/frontend/js/features/pdf-preview/util/output-files.js @@ -4,6 +4,7 @@ import { enablePdfCaching } from './pdf-caching-flags' import { debugConsole } from '@/utils/debugging' import { dirname, findEntityByPath } from '@/features/file-tree/util/path' import '@/utils/readable-stream-async-iterator-polyfill' +import { EDITOR_SESSION_ID } from '@/features/pdf-preview/util/metrics' // Warnings that may disappear after a second LaTeX pass const TRANSIENT_WARNING_REGEX = /^(Reference|Citation).+undefined on input line/ @@ -15,6 +16,8 @@ export function handleOutputFiles(outputFiles, projectId, data) { const outputFile = outputFiles.get('output.pdf') if (!outputFile) return null + outputFile.editorId = outputFile.editorId || EDITOR_SESSION_ID + // build the URL for viewing the PDF in the preview UI const params = new URLSearchParams({ compileGroup: data.compileGroup, diff --git a/services/web/test/unit/src/Compile/ClsiManagerTests.js b/services/web/test/unit/src/Compile/ClsiManagerTests.js index ff50ac16bb..65c3d310d0 100644 --- a/services/web/test/unit/src/Compile/ClsiManagerTests.js +++ b/services/web/test/unit/src/Compile/ClsiManagerTests.js @@ -144,6 +144,9 @@ describe('ClsiManager', function () { enablePdfCaching: true, clsiCookie: { key: 'clsiserver' }, } + this.ClsiCacheHandler = { + clearCache: sinon.stub().resolves(), + } this.Features = { hasFeature: sinon.stub().withArgs('project-history-blobs').returns(true), } @@ -172,6 +175,7 @@ describe('ClsiManager', function () { this.DocumentUpdaterHandler, './ClsiCookieManager': () => this.ClsiCookieManager, './ClsiStateManager': this.ClsiStateManager, + './ClsiCacheHandler': this.ClsiCacheHandler, '@overleaf/fetch-utils': this.FetchUtils, './ClsiFormatChecker': this.ClsiFormatChecker, '@overleaf/metrics': this.Metrics, @@ -390,6 +394,8 @@ describe('ClsiManager', function () { incrementalCompilesEnabled: true, compileBackendClass: 'e2', compileGroup: 'priority', + compileFromClsiCache: true, + populateClsiCache: true, enablePdfCaching: true, pdfCachingMinChunkSize: 1337, } @@ -448,6 +454,8 @@ describe('ClsiManager', function () { syncType: 'incremental', syncState: '01234567890abcdef', compileGroup: 'priority', + compileFromClsiCache: true, + populateClsiCache: true, enablePdfCaching: true, pdfCachingMinChunkSize: 1337, metricsMethod: 'priority', @@ -945,6 +953,12 @@ describe('ClsiManager', function () { ) }) + it('should clear the output.tar.gz files in clsi-cache', function () { + this.ClsiCacheHandler.clearCache + .calledWith(this.project._id, this.user_id) + .should.equal(true) + }) + it('should clear the project state from the docupdater', function () { this.DocumentUpdaterHandler.promises.clearProjectState .calledWith(this.project._id) diff --git a/services/web/test/unit/src/Compile/CompileControllerTests.js b/services/web/test/unit/src/Compile/CompileControllerTests.js index 0e9c33f4c3..aefa197a17 100644 --- a/services/web/test/unit/src/Compile/CompileControllerTests.js +++ b/services/web/test/unit/src/Compile/CompileControllerTests.js @@ -244,9 +244,12 @@ describe('CompileController', function () { this.user_id, { isAutoCompile: false, + compileFromClsiCache: false, + populateClsiCache: false, enablePdfCaching: false, fileLineErrors: false, stopOnFirstError: false, + editorId: undefined, } ) }) @@ -284,9 +287,12 @@ describe('CompileController', function () { this.user_id, { isAutoCompile: true, + compileFromClsiCache: false, + populateClsiCache: false, enablePdfCaching: false, fileLineErrors: false, stopOnFirstError: false, + editorId: undefined, } ) }) @@ -305,10 +311,37 @@ describe('CompileController', function () { this.user_id, { isAutoCompile: false, + compileFromClsiCache: false, + populateClsiCache: false, enablePdfCaching: false, draft: true, fileLineErrors: false, stopOnFirstError: false, + editorId: undefined, + } + ) + }) + }) + + describe('with an editor id', function () { + beforeEach(function (done) { + this.res.callback = done + this.req.body = { editorId: 'the-editor-id' } + this.CompileController.compile(this.req, this.res, this.next) + }) + + it('should pass the editor id to the compiler', function () { + this.CompileManager.compile.should.have.been.calledWith( + this.projectId, + this.user_id, + { + isAutoCompile: false, + compileFromClsiCache: false, + populateClsiCache: false, + enablePdfCaching: false, + fileLineErrors: false, + stopOnFirstError: false, + editorId: 'the-editor-id', } ) }) @@ -542,14 +575,16 @@ describe('CompileController', function () { }) }) describe('proxySyncCode', function () { - let file, line, column, imageName + let file, line, column, imageName, editorId, buildId beforeEach(function (done) { this.req.params = { Project_id: this.projectId } file = 'main.tex' line = String(Date.now()) column = String(Date.now() + 1) - this.req.query = { file, line, column } + editorId = '172977cb-361e-4854-a4dc-a71cf11512e5' + buildId = '195b4a3f9e7-03e5be430a9e7796' + this.req.query = { file, line, column, editorId, buildId } imageName = 'foo/bar:tag-0' this.ProjectGetter.getProject = sinon.stub().yields(null, { imageName }) @@ -566,7 +601,15 @@ describe('CompileController', function () { this.projectId, 'sync-to-code', `/project/${this.projectId}/user/${this.user_id}/sync/code`, - { file, line, column, imageName }, + { + file, + line, + column, + imageName, + editorId, + buildId, + compileFromClsiCache: false, + }, this.req, this.res, this.next @@ -575,14 +618,16 @@ describe('CompileController', function () { }) describe('proxySyncPdf', function () { - let page, h, v, imageName + let page, h, v, imageName, editorId, buildId beforeEach(function (done) { this.req.params = { Project_id: this.projectId } page = String(Date.now()) h = String(Math.random()) v = String(Math.random()) - this.req.query = { page, h, v } + editorId = '172977cb-361e-4854-a4dc-a71cf11512e5' + buildId = '195b4a3f9e7-03e5be430a9e7796' + this.req.query = { page, h, v, editorId, buildId } imageName = 'foo/bar:tag-1' this.ProjectGetter.getProject = sinon.stub().yields(null, { imageName }) @@ -599,7 +644,15 @@ describe('CompileController', function () { this.projectId, 'sync-to-pdf', `/project/${this.projectId}/user/${this.user_id}/sync/pdf`, - { page, h, v, imageName }, + { + page, + h, + v, + imageName, + editorId, + buildId, + compileFromClsiCache: false, + }, this.req, this.res, this.next diff --git a/services/web/test/unit/src/Project/ProjectDuplicatorTests.js b/services/web/test/unit/src/Project/ProjectDuplicatorTests.js index 1a49171163..8a2006a907 100644 --- a/services/web/test/unit/src/Project/ProjectDuplicatorTests.js +++ b/services/web/test/unit/src/Project/ProjectDuplicatorTests.js @@ -245,6 +245,9 @@ describe('ProjectDuplicator', function () { '../Tags/TagsHandler': this.TagsHandler, '../History/HistoryManager': this.HistoryManager, '../../infrastructure/Features': this.Features, + '../Compile/ClsiCacheManager': { + prepareClsiCache: sinon.stub().rejects(new Error('ignore this')), + }, }, }) }) diff --git a/services/web/test/unit/src/Templates/TemplatesManagerTests.js b/services/web/test/unit/src/Templates/TemplatesManagerTests.js index f75827094d..2dcf821a05 100644 --- a/services/web/test/unit/src/Templates/TemplatesManagerTests.js +++ b/services/web/test/unit/src/Templates/TemplatesManagerTests.js @@ -121,6 +121,9 @@ describe('TemplatesManager', function () { fs: this.fs, '../../models/Project': { Project: this.Project }, 'stream/promises': { pipeline: this.pipeline }, + '../Compile/ClsiCacheManager': { + prepareClsiCache: sinon.stub().rejects(new Error('ignore this')), + }, }, }).promises return (this.zipUrl =