[clsi] minor fixes for clsi-cache (#30551)

* [clsi] fix circuit breaker for clsi-cache

* [clsi] enable ts-check for CLSICacheHandler

* [clsi] limit the number of .blg files in clsi-cache to 50

* [clsi-cache] limit the number of files per job to 100

* [clsi-cache] explain early registration of buildId

* [clsi-cache] lock down downloads via nginx to project folder

GitOrigin-RevId: 081d0c40b08db3a384c4d765b71a50b973f42151
This commit is contained in:
Jakob Ackermann
2026-01-07 08:49:20 +00:00
committed by Copybot
parent 0fc0e54cf5
commit 32ad596e54
2 changed files with 57 additions and 29 deletions

View File

@@ -97,6 +97,12 @@ function histogram(key, value, buckets, labels = {}) {
} }
class Timer { class Timer {
/**
* @param {string} key
* @param {number} sampleRate
* @param {Record<string, any>} labels
* @param {Array<number>} buckets
*/
constructor(key, sampleRate = 1, labels = {}, buckets = undefined) { constructor(key, sampleRate = 1, labels = {}, buckets = undefined) {
if (typeof sampleRate === 'object') { if (typeof sampleRate === 'object') {
// called with (key, labels, buckets) // called with (key, labels, buckets)

View File

@@ -1,3 +1,4 @@
// @ts-check
const crypto = require('node:crypto') const crypto = require('node:crypto')
const fs = require('node:fs') const fs = require('node:fs')
const Path = require('node:path') const Path = require('node:path')
@@ -26,6 +27,7 @@ const TIMING_BUCKETS = [
0, 10, 100, 1000, 2000, 5000, 10000, 15000, 20000, 30000, 0, 10, 100, 1000, 2000, 5000, 10000, 15000, 20000, 30000,
] ]
const MAX_ENTRIES_IN_OUTPUT_TAR = 100 const MAX_ENTRIES_IN_OUTPUT_TAR = 100
const MAX_BLG_FILES = 50
const OBJECT_ID_REGEX = /^[0-9a-f]{24}$/ const OBJECT_ID_REGEX = /^[0-9a-f]{24}$/
/** /**
@@ -40,6 +42,10 @@ function getShard(projectId) {
return Settings.apis.clsiCache.shards[idx] return Settings.apis.clsiCache.shards[idx]
} }
/**
* @param {string} url
* @return {boolean}
*/
function checkCircuitBreaker(url) { function checkCircuitBreaker(url) {
const lastFailure = lastFailures.get(url) ?? 0 const lastFailure = lastFailures.get(url) ?? 0
if (lastFailure) { if (lastFailure) {
@@ -52,23 +58,31 @@ function checkCircuitBreaker(url) {
return false return false
} }
/**
* @param {string} url
*/
function tripCircuitBreaker(url) { function tripCircuitBreaker(url) {
lastFailures.set(url, performance.now()) // The shard is unhealthy. Refresh timestamp of last failure. lastFailures.set(url, performance.now()) // The shard is unhealthy. Refresh timestamp of last failure.
} }
/**
* @param {string} url
*/
function closeCircuitBreaker(url) { function closeCircuitBreaker(url) {
lastFailures.delete(url) // The shard is back up. lastFailures.delete(url) // The shard is back up.
} }
/** /**
* @param {string} projectId * @param {Object} opts
* @param {string} userId * @param {string} opts.projectId
* @param {string} buildId * @param {string} opts.userId
* @param {string} editorId * @param {string} opts.buildId
* @param {[{path: string}]} outputFiles * @param {string} opts.editorId
* @param {string} compileGroup * @param {[{path: string}]} opts.outputFiles
* @param {Record<string, number>} stats * @param {string} opts.compileGroup
* @param {Record<string, number>} timings * @param {Record<string, number>} opts.stats
* @param {Record<string, any>} options * @param {Record<string, number>} opts.timings
* @param {Record<string, any>} opts.options
* @return {string | undefined} * @return {string | undefined}
*/ */
function notifyCLSICacheAboutBuild({ function notifyCLSICacheAboutBuild({
@@ -88,7 +102,7 @@ function notifyCLSICacheAboutBuild({
if (checkCircuitBreaker(url)) return undefined if (checkCircuitBreaker(url)) return undefined
/** /**
* @param {[{path: string}]} files * @param {{path: string}[]} files
*/ */
const enqueue = files => { const enqueue = files => {
const body = Buffer.from( const body = Buffer.from(
@@ -117,7 +131,11 @@ function notifyCLSICacheAboutBuild({
nFiles: files.length, nFiles: files.length,
outputPDFSize: outputPDFSize:
outputPDF && Buffer.from(JSON.stringify(outputPDF)).byteLength, outputPDF && Buffer.from(JSON.stringify(outputPDF)).byteLength,
nPDFCachingRanges: outputPDF?.ranges?.length, nPDFCachingRanges:
outputPDF &&
'ranges' in outputPDF &&
Array.isArray(outputPDF.ranges) &&
outputPDF.ranges.length,
}, },
'large clsi-cache request' 'large clsi-cache request'
) )
@@ -130,10 +148,10 @@ function notifyCLSICacheAboutBuild({
signal: AbortSignal.timeout(TIMEOUT), signal: AbortSignal.timeout(TIMEOUT),
}) })
.then(() => { .then(() => {
closeCircuitBreaker() closeCircuitBreaker(url)
}) })
.catch(err => { .catch(err => {
tripCircuitBreaker() tripCircuitBreaker(url)
logger.warn( logger.warn(
{ err, projectId, userId, buildId }, { err, projectId, userId, buildId },
'enqueue for clsi cache failed' 'enqueue for clsi cache failed'
@@ -148,14 +166,17 @@ function notifyCLSICacheAboutBuild({
f => f =>
f.path === 'output.pdf' || f.path === 'output.pdf' ||
f.path === 'output.log' || f.path === 'output.log' ||
f.path === 'output.synctex.gz' || f.path === 'output.synctex.gz'
f.path.endsWith('.blg') )
.concat(
outputFiles.filter(f => f.path.endsWith('.blg')).slice(0, MAX_BLG_FILES)
) )
.map(f => { .map(f => {
const lean = { path: f.path }
if (f.path === 'output.pdf') { if (f.path === 'output.pdf') {
return _.pick(f, 'path', 'size', 'contentId', 'ranges') Object.assign(lean, _.pick(f, 'path', 'size', 'contentId', 'ranges'))
} }
return _.pick(f, 'path') return lean
}) })
) )
@@ -175,10 +196,11 @@ function notifyCLSICacheAboutBuild({
} }
/** /**
* @param {string} projectId * @param {Object} opts
* @param {string} userId * @param {string} opts.projectId
* @param {string} buildId * @param {string} opts.userId
* @param {[{path: string}]} outputFiles * @param {string} opts.buildId
* @param {[{path: string}]} opts.outputFiles
* @return {Promise<void>} * @return {Promise<void>}
*/ */
async function buildTarball({ projectId, userId, buildId, outputFiles }) { async function buildTarball({ projectId, userId, buildId, outputFiles }) {
@@ -253,11 +275,11 @@ async function downloadOutputDotSynctexFromCompileCache(
) )
} catch (err) { } catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) { if (err instanceof RequestFailedError && err.response.status === 404) {
closeCircuitBreaker() closeCircuitBreaker(url)
timer.done({ status: 'not-found' }) timer.done({ status: 'not-found' })
return false return false
} }
tripCircuitBreaker() tripCircuitBreaker(url)
timer.done({ status: 'error' }) timer.done({ status: 'error' })
throw err throw err
} }
@@ -274,13 +296,13 @@ async function downloadOutputDotSynctexFromCompileCache(
) )
await fs.promises.rename(tmp, dst) await fs.promises.rename(tmp, dst)
} catch (err) { } catch (err) {
tripCircuitBreaker() tripCircuitBreaker(url)
try { try {
await fs.promises.unlink(tmp) await fs.promises.unlink(tmp)
} catch {} } catch {}
throw err throw err
} }
closeCircuitBreaker() closeCircuitBreaker(url)
timer.done({ status: 'success' }) timer.done({ status: 'success' })
return true return true
} }
@@ -316,11 +338,11 @@ async function downloadLatestCompileCache(projectId, userId, compileDir) {
) )
} catch (err) { } catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) { if (err instanceof RequestFailedError && err.response.status === 404) {
closeCircuitBreaker() closeCircuitBreaker(url)
timer.done({ status: 'not-found' }) timer.done({ status: 'not-found' })
return false return false
} }
tripCircuitBreaker() tripCircuitBreaker(url)
timer.done({ status: 'error' }) timer.done({ status: 'error' })
throw err throw err
} }
@@ -366,10 +388,10 @@ async function downloadLatestCompileCache(projectId, userId, compileDir) {
}) })
) )
} catch (err) { } catch (err) {
tripCircuitBreaker() tripCircuitBreaker(url)
throw err throw err
} }
closeCircuitBreaker() closeCircuitBreaker(url)
Metrics.count('clsi_cache_download_entries', n) Metrics.count('clsi_cache_download_entries', n)
timer.done({ status: 'success' }) timer.done({ status: 'success' })
return !abort return !abort