[clsi-cache] cache templates in GCS and create entries on-demand (#30614)

* [clsi-cache] cache templates in GCS and create entries on-demand

* [clsi-cache] add missing return

Co-authored-by: Daniel Kontsek <daniel.kontsek@overleaf.com>

* [clsi-cache] update build scripts

* [web] fix unit tests

* [web] run prettier (again)

* [clsi-cache] drop 1xx prefix from submissionId

* [clsi-cache] add bestEffortRmDir helper

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

---------

Co-authored-by: Daniel Kontsek <daniel.kontsek@overleaf.com>
Co-authored-by: Brian Gough <brian.gough@overleaf.com>
GitOrigin-RevId: e4bd7dca5611f16d9a6e76f2e7cf83e5819fa610
This commit is contained in:
Jakob Ackermann
2026-01-12 13:13:50 +00:00
committed by Copybot
parent 69b12f3075
commit 9cc2a7f1e0
7 changed files with 178 additions and 22 deletions

4
package-lock.json generated
View File

@@ -54920,6 +54920,7 @@
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
"@overleaf/object-persistor": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"@overleaf/stream-utils": "*",
@@ -54927,7 +54928,8 @@
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
"express": "^4.21.2",
"p-limit": "^3.1.0"
"p-limit": "^3.1.0",
"tar-fs": "^3.1.1"
},
"devDependencies": {
"chai": "^4.3.6",

View File

@@ -9,6 +9,7 @@ import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
import { NotFoundError, InvalidNameError } from '../Errors/Errors.js'
import Features from '../../infrastructure/Features.mjs'
import Path from 'node:path'
const TIMEOUT = 4_000
@@ -228,8 +229,8 @@ async function getRedirectWithFallback(
* @param projectId
* @param userId
* @param sourceProjectId
* @param templateId
* @param templateVersionId
* @param imageName
* @param lastUpdated
* @param shard
* @param signal
@@ -238,8 +239,9 @@ async function getRedirectWithFallback(
async function prepareCacheSource(
projectId,
userId,
{ sourceProjectId, templateId, templateVersionId, lastUpdated, shard, signal }
{ sourceProjectId, templateVersionId, imageName, lastUpdated, shard, signal }
) {
imageName = Path.basename(imageName)
const url = new URL(
`/project/${projectId}/user/${userId}/import-from`,
Settings.apis.clsiCache.instances.find(i => i.shard === shard).url
@@ -250,8 +252,8 @@ async function prepareCacheSource(
json: {
sourceProjectId,
lastUpdated,
templateId,
templateVersionId,
imageName,
},
signal,
})
@@ -263,6 +265,46 @@ async function prepareCacheSource(
}
}
/**
* Populate the clsi-cache for a template using a submission build
*
* @param clsiCacheShard
* @param submissionId
* @param buildId
* @param templateVersionId
* @param imageName
* @return {Promise<void>}
*/
async function exportSubmissionAsTemplate(
clsiCacheShard,
submissionId,
buildId,
templateVersionId,
imageName
) {
imageName = Path.basename(imageName)
const url = new URL(
`/submission/${submissionId}/build/${buildId}/export-as-template`,
Settings.apis.clsiCache.instances.find(i => i.shard === clsiCacheShard).url
)
try {
await fetchNothing(url, {
method: 'POST',
json: {
templateVersionId,
imageName,
},
// clsi-cache will poll up-to 15s for the output to be copied from clsi.
signal: AbortSignal.timeout(30_000),
})
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new NotFoundError()
}
throw err
}
}
export default {
TIMEOUT,
getEgressLabel,
@@ -270,4 +312,5 @@ export default {
getOutputFile,
getLatestOutputFile,
prepareCacheSource,
exportSubmissionAsTemplate,
}

View File

@@ -5,9 +5,18 @@ import DocumentUpdaterHandler from '../DocumentUpdater/DocumentUpdaterHandler.mj
import ProjectGetter from '../Project/ProjectGetter.mjs'
import UserGetter from '../User/UserGetter.mjs'
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import { fetchJson, RequestFailedError } from '@overleaf/fetch-utils'
import Metrics from '@overleaf/metrics'
import Features from '../../infrastructure/Features.mjs'
import ClsiManager from './ClsiManager.mjs'
import Crypto from 'node:crypto'
import ClsiCookieManagerFactory from './ClsiCookieManager.mjs'
import { ObjectId } from '../../infrastructure/mongodb.mjs'
const ClsiCookieManager = ClsiCookieManagerFactory(
Settings.apis.clsi?.backendGroupName
)
/**
* Get the most recent build and metadata
@@ -167,21 +176,24 @@ async function tryGetLatestCompileResult(projectId, userId, signal) {
/**
* Collect metadata and prepare the clsi-cache for the given project.
*
* Returns true when downloaded; false when download failed; undefined when
* disabled for env/user;
*
* @param projectId
* @param userId
* @param sourceProjectId
* @param templateId
* @param templateVersionId
* @return {Promise<void>}
* @param imageName
* @return {Promise<boolean|undefined>}
*/
async function prepareClsiCache(
projectId,
userId,
{ sourceProjectId, templateId, templateVersionId }
{ sourceProjectId, templateVersionId, imageName }
) {
if (!Features.hasFeature('saas')) return
if (!Features.hasFeature('saas')) return undefined
const features = await UserGetter.promises.getUserFeatures(userId)
if (features.compileGroup !== 'priority') return
if (features.compileGroup !== 'priority') return undefined
const signal = AbortSignal.timeout(ClsiCacheHandler.TIMEOUT)
let lastUpdated
@@ -197,27 +209,106 @@ async function prepareClsiCache(
signal
))
} catch (err) {
if (err instanceof NotFoundError) return // nothing cached yet
if (err instanceof NotFoundError) return false // nothing cached yet
throw err
}
}
try {
await ClsiCacheHandler.prepareCacheSource(projectId, userId, {
sourceProjectId,
templateId,
templateVersionId,
imageName,
shard,
lastUpdated,
signal,
})
} catch (err) {
if (err instanceof NotFoundError) return // nothing cached yet/expired.
if (err instanceof NotFoundError) return false // nothing cached yet/expired.
throw err
}
return true
}
async function createTemplateClsiCache({
templateVersionId,
project,
fileEntries,
docEntries,
}) {
const compileGroup = Settings.defaultFeatures.compileGroup
const compileBackendClass = Settings.apis.clsi.submissionBackendClass
const submissionId = new ObjectId().toString()
const editorId = Crypto.randomUUID()
const options = {
editorId,
compileGroup,
compileBackendClass,
timeout: 60,
syncType: 'full',
compileFromClsiCache: false,
populateClsiCache: true,
enablePdfCaching: false,
pdfCachingMinChunkSize: 0,
metricsPath: 'clsi-cache-template',
}
const req = ClsiManager._finaliseRequest(
submissionId,
options,
project,
Object.fromEntries(
docEntries.map(doc => [
doc.path,
{ _id: doc.doc._id, lines: doc.docLines.split('\n') },
])
),
Object.fromEntries(fileEntries.map(file => [file.path, file.file]))
)
let clsiServerId = await ClsiCookieManager.promises.getServerId(
submissionId,
undefined,
compileGroup,
compileBackendClass
)
const { imageName } = project
try {
let status, buildId, clsiCacheShard
;({ status, buildId, clsiCacheShard, clsiServerId } =
await ClsiManager.promises.sendExternalRequest(
submissionId,
req,
options
))
if (status !== 'success') {
logger.warn(
{ status, templateVersionId, imageName },
'compiling template failed'
)
return
}
if (!clsiCacheShard) {
// The circuit breaker tripped for all clsi -> clsi-cache shards. Try again later.
return
}
await ClsiCacheHandler.exportSubmissionAsTemplate(
clsiCacheShard,
submissionId,
editorId + '-' + buildId,
templateVersionId,
imageName
)
} finally {
await ClsiManager.promises.deleteAuxFiles(
submissionId,
null,
options,
clsiServerId
)
}
}
export default {
getLatestBuildFromCache,
getLatestCompileResult,
prepareClsiCache,
createTemplateClsiCache,
}

View File

@@ -904,7 +904,8 @@ function _finaliseRequest(projectId, options, project, docs, files) {
['alpha', 'priority'].includes(options.compileGroup) &&
options.compileFromClsiCache,
populateClsiCache:
['alpha', 'priority'].includes(options.compileGroup) &&
(['alpha', 'priority'].includes(options.compileGroup) ||
options.metricsPath === 'clsi-cache-template') &&
options.populateClsiCache,
enablePdfCaching:
(Settings.enablePdfCaching && options.enablePdfCaching) || false,
@@ -1010,6 +1011,7 @@ function _getClsiServerIdFromResponse(response) {
}
export default {
_finaliseRequest,
sendRequest: callbackifyMultiResult(sendRequest, [
'status',
'outputFiles',

View File

@@ -65,7 +65,7 @@ const TemplatesManager = {
)
throw new Error(`get zip failed: ${zipReq.response.status}`)
}
const project =
const { fileEntries, docEntries, project } =
await ProjectUploadManager.promises.createProjectFromZipArchiveWithName(
userId,
projectName,
@@ -76,17 +76,31 @@ const TemplatesManager = {
const prepareClsiCacheInBackground = ClsiCacheManager.prepareClsiCache(
project._id,
userId,
{ templateId, templateVersionId }
{ templateVersionId, imageName }
).catch(err => {
logger.warn(
{ err, templateId, templateVersionId, projectId: project._id },
{ err, templateVersionId, projectId: project._id },
'failed to prepare clsi-cache from template'
)
return undefined
})
await TemplatesManager._setMainFile(project, mainFile)
await prepareClsiCacheInBackground
const found = await prepareClsiCacheInBackground
if (found === false && project.rootDoc_id) {
ClsiCacheManager.createTemplateClsiCache({
templateVersionId,
project,
fileEntries,
docEntries,
}).catch(err => {
logger.error(
{ err, templateVersionId },
'failed to create template clsi-cache'
)
})
}
return project
} finally {

View File

@@ -86,12 +86,14 @@ async function createProjectFromZipArchiveWithName(
)
try {
await _initializeProjectWithZipContents(ownerId, project, contentsPath)
const { fileEntries, docEntries } =
await _initializeProjectWithZipContents(ownerId, project, contentsPath)
const rootDocId =
await ProjectRootDocManager.promises.setRootDocAutomatically(
project._id
)
if (rootDocId) project.rootDoc_id = rootDocId
return { fileEntries, docEntries, project }
} catch (err) {
// no need to wait for the cleanup here
ProjectDeleter.promises
@@ -104,7 +106,6 @@ async function createProjectFromZipArchiveWithName(
)
throw err
}
return project
} finally {
await fs.promises.rm(contentsPath, { recursive: true, force: true })
}
@@ -159,6 +160,7 @@ async function _initializeProjectWithZipContents(
newProject: { version: projectVersion },
})
await TpdsProjectFlusher.promises.flushProjectToTpds(project._id)
return { fileEntries, docEntries }
}
async function _createEntriesFromImports(project, importEntries) {

View File

@@ -37,9 +37,11 @@ describe('TemplatesManager', function () {
}
ctx.ProjectUploadManager = {
promises: {
createProjectFromZipArchiveWithName: sinon
.stub()
.resolves({ _id: ctx.project_id }),
createProjectFromZipArchiveWithName: sinon.stub().resolves({
project: { _id: ctx.project_id },
fileEntries: [],
docEntries: [],
}),
},
}
ctx.ProjectOptionsHandler = {