[clsi] consolidate metrics for clsi-perf (#30746)

* [clsi] remove all clsi-perf/health-check metrics

* [clsi] always emit E2E compile time metric

* [clsi] do not collect metrics for clsi-cache-template compiles

* [clsi] fix unit tests: request.metricsOpts always exists

* [clsi] use a gauge for the e2e compile time metric of clsi-perf

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* [clsi] remove metrics for binary file downloads from clsi-perf

---------

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
GitOrigin-RevId: 7995512e57c802086350e3d1a0ec5213ecdf0a05
This commit is contained in:
Jakob Ackermann
2026-01-16 13:15:58 +00:00
committed by Copybot
parent e5a82e47ce
commit 3f9a7cf463
11 changed files with 105 additions and 38 deletions

View File

@@ -27,6 +27,7 @@ const StatsManager = require('./StatsManager')
const SafeReader = require('./SafeReader')
const { enableLatexMkMetrics, addLatexFdbMetrics } = require('./LatexMetrics')
const { callbackifyMultiResult } = require('@overleaf/promise-utils')
const { shouldSkipMetrics } = require('./Metrics')
const KNOWN_LATEXMK_RULES = new Set([
'biber',
@@ -247,7 +248,7 @@ async function doCompile(request, stats, timings) {
)
}
if (!_shouldSkipMetrics(request)) {
if (!shouldSkipMetrics(request)) {
const status = error.timedout
? 'timeout'
: error.terminated
@@ -284,7 +285,7 @@ async function doCompile(request, stats, timings) {
const status = stats['latexmk-errors'] ? 'error' : 'success'
_emitMetrics(request, status, stats, timings)
if (stats['pdf-size']) {
if (stats['pdf-size'] && !shouldSkipMetrics(request)) {
emitPdfStats(stats, timings, request)
}
@@ -726,12 +727,17 @@ function _isImageNameAllowed(imageName) {
return !ALLOWED_IMAGES || ALLOWED_IMAGES.includes(imageName)
}
function _shouldSkipMetrics(request) {
return ['clsi-perf', 'health-check'].includes(request.metricsOpts.path)
}
function _emitMetrics(request, status, stats, timings) {
if (_shouldSkipMetrics(request)) {
if (request.metricsOpts.path === 'clsi-perf') {
ClsiMetrics.e2eCompileDurationClsiPerfSeconds.set(
{
compile: request.metricsOpts.compile,
variant: request.metricsOpts.method,
},
timings.compileE2E / 1000
)
}
if (shouldSkipMetrics(request)) {
return
}
@@ -832,7 +838,13 @@ function _emitMetrics(request, status, stats, timings) {
}
if (timings.compileE2E != null) {
ClsiMetrics.e2eCompileDurationSeconds.observe(timings.compileE2E / 1000)
ClsiMetrics.e2eCompileDurationSeconds.observe(
{
compile: request.metricsOpts.compile,
group: request.compileGroup,
},
timings.compileE2E / 1000
)
}
}

View File

@@ -31,6 +31,13 @@ const e2eCompileDurationSeconds = new prom.Histogram({
name: 'clsi_e2e_compile_duration_seconds',
help: 'Duration of the entire compile request in clsi (sync, latexmk, output)',
buckets: COMPILE_TIME_BUCKETS,
labelNames: ['compile', 'group'],
})
const e2eCompileDurationClsiPerfSeconds = new prom.Gauge({
name: 'clsi_e2e_compile_duration_clsi_perf_seconds',
help: 'Duration of the entire compile request in clsi (sync, latexmk, output) for clsi-perf',
labelNames: ['compile', 'variant'],
})
const syncResourcesDurationSeconds = new prom.Histogram({
@@ -61,12 +68,20 @@ const imageProcessingDurationSeconds = new prom.Histogram({
labelNames: ['group', 'type'],
})
function shouldSkipMetrics(request) {
return ['clsi-perf', 'health-check', 'clsi-cache-template'].includes(
request.metricsOpts.path
)
}
module.exports = {
compilesTotal,
compileDurationSeconds,
e2eCompileDurationSeconds,
e2eCompileDurationClsiPerfSeconds,
syncResourcesDurationSeconds,
processOutputFilesDurationSeconds,
latexmkRuleDurationSeconds,
imageProcessingDurationSeconds,
shouldSkipMetrics,
}

View File

@@ -23,6 +23,7 @@ const ResourceStateManager = require('./ResourceStateManager')
const Metrics = require('@overleaf/metrics')
const logger = require('@overleaf/logger')
const settings = require('@overleaf/settings')
const { shouldSkipMetrics } = require('./Metrics')
const parallelFileDownloads = settings.parallelFileDownloads || 1
@@ -193,7 +194,7 @@ module.exports = ResourceWriter = {
request.metricsOpts
)
const callback = function (error, ...result) {
timer.done()
if (!shouldSkipMetrics(request)) timer.done()
return _callback(error, ...Array.from(result))
}

View File

@@ -1,4 +1,5 @@
const crypto = require('node:crypto')
const { shouldSkipMetrics } = require('./Metrics')
/**
* Consistently sample a keyspace with a given sample percentage.
@@ -21,8 +22,6 @@ function sampleByHash(key, samplePercentage) {
return percentile < samplePercentage
}
const EXCLUDED_METRICS_OPTS_PATHS = new Set(['health-check', 'clsi-perf'])
/**
* Determines whether a given request should be sampled based on user ID and sampling percentage.
* The request will not be sampled if it lacks a user_id, if its metrics path is in the exclusion list,
@@ -39,7 +38,7 @@ function sampleRequest(request, samplingPercentage) {
if (!request.user_id) {
return
}
if (EXCLUDED_METRICS_OPTS_PATHS.has(request.metricsOpts?.path)) {
if (shouldSkipMetrics(request)) {
return
}
if (samplingPercentage > 0) {

View File

@@ -85,16 +85,20 @@ async function pipeUrlToFile(url, fallbackURL, filePath) {
}
const source = inferSource(url)
Metrics.inc('url_source', 1, { path: source })
if (source !== 'clsi-perf') {
Metrics.inc('url_source', 1, { path: source })
}
const atomicWrite = filePath + '~'
try {
const output = fs.createWriteStream(atomicWrite)
await pipeline(stream, output)
await fs.promises.rename(atomicWrite, filePath)
Metrics.count('UrlFetcher.downloaded_bytes', output.bytesWritten, {
path: source,
})
if (source !== 'clsi-perf') {
Metrics.count('UrlFetcher.downloaded_bytes', output.bytesWritten, {
path: source,
})
}
} catch (err) {
try {
await fs.promises.unlink(atomicWrite)

View File

@@ -1,7 +1,7 @@
clsi
--data-dirs=cache,compiles,output
--dependencies=
--env-add=ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES=quay.io/sharelatex/texlive-full:2025.1,TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output
--env-add=ALLOWED_COMPILE_GROUPS="clsi-perf simple-latex-file",ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES=quay.io/sharelatex/texlive-full:2025.1,TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output
--env-pass-through=
--esmock-loader=False
--node-version=22.18.0

View File

@@ -26,6 +26,7 @@ services:
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
ALLOWED_COMPILE_GROUPS: "clsi-perf simple-latex-file"
ENABLE_PDF_CACHING: "true"
PDF_CACHING_ENABLE_WORKER_POOL: "true"
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2025.1

View File

@@ -40,6 +40,7 @@ services:
LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
ALLOWED_COMPILE_GROUPS: "clsi-perf simple-latex-file"
ENABLE_PDF_CACHING: "true"
PDF_CACHING_ENABLE_WORKER_POOL: "true"
ALLOWED_IMAGES: quay.io/sharelatex/texlive-full:2025.1

View File

@@ -1,7 +1,8 @@
const Client = require('./helpers/Client')
const { fetchNothing } = require('@overleaf/fetch-utils')
const { fetchNothing, fetchString } = require('@overleaf/fetch-utils')
const ClsiApp = require('./helpers/ClsiApp')
const { expect } = require('chai')
const Settings = require('@overleaf/settings')
describe('Simple LaTeX file', function () {
const content = `\
@@ -15,6 +16,9 @@ Hello world
description: 'simple file',
request: {
resources: [{ path: 'main.tex', content }],
options: {
compileGroup: 'simple-latex-file',
},
},
},
{
@@ -22,8 +26,10 @@ Hello world
request: {
resources: [{ path: 'main.tex', content }],
options: {
enablePdfCaching: false,
metricsPath: 'clsi-perf',
metricsMethod: 'priority',
metricsMethod: 'memoir-manual',
compileGroup: 'clsi-perf', // only used by tests, not by the service
},
},
},
@@ -65,8 +71,46 @@ Hello world
response.status.should.equal(200)
})
if (scenario.description === 'clsi-perf request') {
it('should only emit e2e compile time metric', async function () {
const metrics = await fetchString(`${Settings.apis.clsi.url}/metrics`)
const byPath = `path="${scenario.request.options.metricsPath}"`
const byMethod = `method="${scenario.request.options.metricsMethod}"`
const byVariant = `variant="${scenario.request.options.metricsMethod}"`
const byGroup = `group="${scenario.request.options.compileGroup}"`
expect(metrics).to.not.include(byPath)
expect(metrics).to.not.include(byMethod)
expect(metrics).to.not.include(byGroup)
expect(metrics).to.include(byVariant)
expect(metrics.match(new RegExp(byVariant, 'g'))).to.have.lengthOf(1)
})
} else {
it('should shard metrics by compileGroup', async function () {
const metrics = await fetchString(`${Settings.apis.clsi.url}/metrics`)
const byGroup = `group="${scenario.request.options.compileGroup}"`
expect(metrics).to.include(byGroup)
expect(metrics.match(new RegExp(byGroup, 'g'))).to.have.lengthOf(134)
})
}
it('should return only the expected keys for stats and timings', function () {
const { stats, timings } = this.body.compile
let pdfCachingStats = []
let pdfCachingTimings = []
if (scenario.request.options.enablePdfCaching !== false) {
pdfCachingStats = [
'pdf-caching-total-ranges-size',
'pdf-caching-reclaimed-space',
'pdf-caching-new-ranges-size',
'pdf-caching-n-ranges',
'pdf-caching-n-new-ranges',
]
pdfCachingTimings = [
'compute-pdf-caching',
'pdf-caching-overhead-delete-stale-hashes',
]
}
// Note: chai's all.keys assertion rejects extra keys
stats.should.have.all.keys(
'isInitialCompile',
@@ -75,20 +119,15 @@ Hello world
'latex-runs-with-errors',
'latex-runs-1',
'latex-runs-with-errors-1',
'pdf-caching-total-ranges-size',
'pdf-caching-reclaimed-space',
'pdf-caching-new-ranges-size',
'pdf-caching-n-ranges',
'pdf-caching-n-new-ranges',
'pdf-size'
'pdf-size',
...pdfCachingStats
)
timings.should.have.all.keys(
'sync',
'compile',
'output',
'compileE2E',
'compute-pdf-caching',
'pdf-caching-overhead-delete-stale-hashes'
...pdfCachingTimings
)
})
})

View File

@@ -246,6 +246,7 @@ describe('ResourceWriter', function () {
syncType: 'incremental',
syncState: (this.syncState = '1234567890abcdef'),
resources: this.resources,
metricsOpts: { path: 'foo' },
}
this.OutputFileFinder.findOutputFiles = sinon
.stub()

View File

@@ -85,7 +85,7 @@ describe('StatsManager', function () {
describe('sampleRequest', function () {
it('should return undefined if there is no user_id', function () {
const request = {}
const request = { metricsOpts: {} }
const percentage = 50
expect(sampleRequest(request, percentage)).to.be.undefined
})
@@ -127,23 +127,17 @@ describe('StatsManager', function () {
})
it('should return undefined if the sampling percentage is 0', function () {
const request = { user_id: 'some-user' }
const request = { user_id: 'some-user', metricsOpts: {} }
const percentage = 0
expect(sampleRequest(request, percentage)).to.be.undefined
})
it('should return undefined if the sampling percentage is negative', function () {
const request = { user_id: 'some-user' }
const request = { user_id: 'some-user', metricsOpts: {} }
const percentage = -10
expect(sampleRequest(request, percentage)).to.be.undefined
})
it('should sample if there are no metricsOpts', function () {
const request = { user_id: 'test-key-in' } // percentile 13
const percentage = 40
expect(sampleRequest(request, percentage)).to.be.true
})
it('should sample if metricsOpts has no path', function () {
const request = { user_id: 'test-key-in', metricsOpts: {} } // percentile 13
const percentage = 40
@@ -151,13 +145,13 @@ describe('StatsManager', function () {
})
it('should return true for a request that should be sampled', function () {
const request = { user_id: 'test-key-in' } // percentile 13
const request = { user_id: 'test-key-in', metricsOpts: {} } // percentile 13
const percentage = 40
expect(sampleRequest(request, percentage)).to.be.true
})
it('should return false for a request that should not be sampled', function () {
const request = { user_id: 'test-key-outer' } // percentile 47
const request = { user_id: 'test-key-outer', metricsOpts: {} } // percentile 47
const percentage = 40
expect(sampleRequest(request, percentage)).to.be.false
})