diff --git a/services/clsi/app.js b/services/clsi/app.js index 872f612d9c..080c533de9 100644 --- a/services/clsi/app.js +++ b/services/clsi/app.js @@ -6,6 +6,9 @@ const ContentController = require('./app/js/ContentController') const Settings = require('@overleaf/settings') const logger = require('@overleaf/logger') logger.initialize('clsi') +const LoggerSerializers = require('./app/js/LoggerSerializers') +logger.logger.serializers.clsiRequest = LoggerSerializers.clsiRequest + const Metrics = require('@overleaf/metrics') const smokeTest = require('./test/smoke/js/SmokeTests') diff --git a/services/clsi/app/js/CompileManager.js b/services/clsi/app/js/CompileManager.js index 1b66927412..2248623c52 100644 --- a/services/clsi/app/js/CompileManager.js +++ b/services/clsi/app/js/CompileManager.js @@ -23,6 +23,7 @@ const { downloadLatestCompileCache, downloadOutputDotSynctexFromCompileCache, } = require('./CLSICacheHandler') +const StatsManager = require('./StatsManager') const { callbackifyMultiResult } = require('@overleaf/promise-utils') const COMPILE_TIME_BUCKETS = [ @@ -191,6 +192,26 @@ async function doCompile(request, stats, timings) { Metrics.inc(`compiles-with-image.${tag}`, 1, request.metricsOpts) const compileName = getCompileName(request.project_id, request.user_id) + // Record latexmk -time stats for a subset of users + const recordPerformanceMetrics = + request.user_id != null && + Settings.performanceLogSamplingPercentage > 0 && + StatsManager.sampleByHash( + request.user_id, + Settings.performanceLogSamplingPercentage + ) + // For selected users, define a `latexmk` property on the stats object + // to collect latexmk -time stats. + if (recordPerformanceMetrics) { + // To prevent any changes to the existing compile responses being sent + // to web, exclude latexmk stats from being exported by marking them + // non-enumerable. This prevents them being serialised by JSON.stringify(). + Object.defineProperty(stats, 'latexmk', { + value: {}, + enumerable: false, + }) + } + try { await LatexRunner.promises.runLatex(compileName, { directory: compileDir, @@ -257,11 +278,15 @@ async function doCompile(request, stats, timings) { Metrics.inc('compiles-succeeded', 1, request.metricsOpts) for (const metricKey in stats) { const metricValue = stats[metricKey] - Metrics.count(metricKey, metricValue, 1, request.metricsOpts) + if (typeof metricValue === 'number') { + Metrics.count(metricKey, metricValue, 1, request.metricsOpts) + } } for (const metricKey in timings) { const metricValue = timings[metricKey] - Metrics.timing(metricKey, metricValue, 1, request.metricsOpts) + if (typeof metricValue === 'number') { + Metrics.timing(metricKey, metricValue, 1, request.metricsOpts) + } } const loadavg = typeof os.loadavg === 'function' ? os.loadavg() : undefined if (loadavg != null) { @@ -320,6 +345,27 @@ async function doCompile(request, stats, timings) { emitPdfStats(stats, timings, request) } + // Record compile performance for a subset of users + if (recordPerformanceMetrics) { + logger.info( + { + userId: request.user_id, + projectId: request.project_id, + timeTaken: ts, + clsiRequest: request, + stats, + timings, + // explicitly include latexmk stats to bypass the non-enumerable property + latexmk: stats.latexmk, + loadavg1m: loadavg?.[0], + loadavg5m: loadavg?.[1], + loadavg15m: loadavg?.[2], + samplingPercentage: Settings.performanceLogSamplingPercentage, + }, + 'sampled performance log' + ) + } + return { outputFiles, buildId } } diff --git a/services/clsi/app/js/LatexMetrics.js b/services/clsi/app/js/LatexMetrics.js new file mode 100644 index 0000000000..e24fc96528 --- /dev/null +++ b/services/clsi/app/js/LatexMetrics.js @@ -0,0 +1,109 @@ +/** + * Converts a time string in seconds to an integer number of milliseconds. + * + * @param {string} timeStr - The time duration in seconds, represented as a string. + * @returns {number} The equivalent time in milliseconds, rounded down to the nearest integer. + */ +function convertToMs(timeStr) { + return Math.floor(parseFloat(timeStr, 10) * 1000) +} + +/* An array of metric parsers for `latexmk` time output (`-time` flag). + * Each entry is a tuple containing a metric name and a function to parse that + * metric from the `latexmk` log output. + * + * The parser functions generally take the log string as their first argument. + * Some may take additional arguments, such as the already computed stats, + * to derive new metrics. + * + * There are different formats of `latexmk` output depending on the version. + * The parsers attempt to handle these variations gracefully. + */ +const LATEX_MK_METRICS = [ + // Extract individual latexmk rule times as an array of objects, each with 'rule' + // and 'time_ms' properties + [ + 'latexmk-rule-times', + s => { + // Each line looks like: 'pdflatex ... options ...': time = 12.34 + // Take the command up to the first space as the rule name + const matches = s.matchAll(/^'([^' ]+).*': time = (\d+\.\d+)/gm) + return Array.from(matches, match => ({ + rule: match[1], + time_ms: convertToMs(match[2]), + })) + }, + ], + // Extract a comma-separated signature of rule names from the rule times above + [ + 'latexmk-rule-signature', + (s, latexmkStats) => { + const times = latexmkStats['latexmk-rule-times'] + if (!times) return null + // Example output: 'pdflatex,bibtex,pdflatex,pdflatex' + return times.map(t => t.rule).join(',') + }, + ], + // Total latexmk processing time, invoked processes time, and other time in ms + [ + 'latexmk-time', + s => { + const match = s.match( + /^Processing time = (\d+\.\d+), of which invoked processes = (\d+\.\d+), other = (\d+\.\d+)/m + ) + if (match) { + return { + total: convertToMs(match[1]), + invoked: convertToMs(match[2]), + other: convertToMs(match[3]), + } + } + // Older format + const fallbackMatch = s.match( + /^Accumulated processing time = (\d+\.\d+)/m + ) + if (fallbackMatch) { + return { total: convertToMs(fallbackMatch[1]) } + } + return null + }, + ], + // Total elapsed clock time in ms for latexmk + [ + 'latexmk-clock-time', + s => { + const match = s.match(/^Elapsed clock time = (\d+\.\d+)/m) + if (match) { + return convertToMs(match[1]) + } + // not present in older versions + return null + }, + ], + // Number of rules run by latexmk + [ + 'latexmk-rules-run', + (s, latexmkStats) => { + const match = s.match(/^Number of rules run = (\d+)/m) + if (match) { + return parseInt(match[1], 10) + } + // Fallback: count number of entries in rule times if available + if (latexmkStats['latexmk-rule-times']) { + return latexmkStats['latexmk-rule-times'].length + } + return null + }, + ], +] + +function addLatexMkMetrics(output, stats) { + for (const [stat, matcher] of LATEX_MK_METRICS) { + const match = matcher(output?.stdout || '', stats.latexmk) + if (match) { + stats.latexmk[stat] = match + } + } +} + +module.exports = { addLatexMkMetrics } diff --git a/services/clsi/app/js/LatexRunner.js b/services/clsi/app/js/LatexRunner.js index beefa002ab..7b8098711e 100644 --- a/services/clsi/app/js/LatexRunner.js +++ b/services/clsi/app/js/LatexRunner.js @@ -3,15 +3,16 @@ const { promisify } = require('node:util') const Settings = require('@overleaf/settings') const logger = require('@overleaf/logger') const CommandRunner = require('./CommandRunner') +const { addLatexMkMetrics } = require('./LatexMetrics') const fs = require('node:fs') const ProcessTable = {} // table of currently running jobs (pids or docker container names) -const TIME_V_METRICS = Object.entries({ - 'cpu-percent': /Percent of CPU this job got: (\d+)/m, - 'cpu-time': /User time.*: (\d+.\d+)/m, - 'sys-time': /System time.*: (\d+.\d+)/m, -}) +const TIME_V_METRICS = [ + ['cpu-percent', /Percent of CPU this job got: (\d+)/m], + ['cpu-time', /User time.*: (\d+.\d+)/m], + ['sys-time', /System time.*: (\d+.\d+)/m], +] const COMPILER_FLAGS = { latex: '-pdfdvi', @@ -75,6 +76,14 @@ function runLatex(projectId, options, callback) { if (error) { return callback(error) } + if (stats.latexmk) { + try { + addLatexMkMetrics(output, stats) + } catch (err) { + logger.error({ err, projectId }, 'error adding latexmk metrics') + } + } + // number of latex runs and whether there were errors const runs = output?.stderr?.match(/^Run number \d+ of .*latex/gm)?.length || 0 const failed = output?.stdout?.match(/^Latexmk: Errors/m) != null ? 1 : 0 @@ -161,7 +170,8 @@ function _buildLatexCommand(mainFile, opts = {}) { '-auxdir=$COMPILE_DIR', '-outdir=$COMPILE_DIR', '-synctex=1', - '-interaction=batchmode' + '-interaction=batchmode', + '-time' ) // Stop on first error option diff --git a/services/clsi/app/js/LoggerSerializers.js b/services/clsi/app/js/LoggerSerializers.js new file mode 100644 index 0000000000..20b4330b52 --- /dev/null +++ b/services/clsi/app/js/LoggerSerializers.js @@ -0,0 +1,40 @@ +const Path = require('node:path') + +const CLSI_REQUEST_SERIALIZED_PROPERTIES = [ + 'compiler', + 'compileFromClsiCache', + 'populateClsiCache', + 'enablePdfCaching', + 'pdfCachingMinChunkSize', + 'timeout', + 'imageName', + 'draft', + 'stopOnFirstError', + 'check', + 'flags', + 'compileGroup', + 'syncType', +] + +module.exports = { + /** + * Serializer for a CLSI request object. + * Only includes properties useful for logging. + * Excludes large, sensitive, or irrelevant properties (e.g., 'syncState', 'resources'). + * To add more properties, update the allowed properties above. + * + * @param {object} clsiRequest - The original CLSI request object. + * @returns {object} A summarized version of the request object for logging. + */ + clsiRequest(clsiRequest) { + const summary = {} + for (const key of CLSI_REQUEST_SERIALIZED_PROPERTIES) { + if (key === 'imageName' && clsiRequest.imageName) { + summary.imageName = Path.basename(clsiRequest.imageName) + } else if (clsiRequest[key] !== undefined) { + summary[key] = clsiRequest[key] + } + } + return summary + }, +} diff --git a/services/clsi/app/js/StatsManager.js b/services/clsi/app/js/StatsManager.js new file mode 100644 index 0000000000..9d46b6bbb1 --- /dev/null +++ b/services/clsi/app/js/StatsManager.js @@ -0,0 +1,24 @@ +const crypto = require('node:crypto') + +/** + * Consistently sample a keyspace with a given sample percentage. + * The same key will always produce a consistent percentile value that + * can be compared against the sample percentage. + * Example: if key is the userId and the samplePercentage is 10, then + * we see all the activity for the 10% of users who are selected. + * + * @param {string} key - The unique identifier to be hashed and checked. + * @param {number} samplePercentage - The percentage (0-100) of keys that should return true. + * @returns {boolean} - True if the key is within the sample, false otherwise. + */ +function sampleByHash(key, samplePercentage) { + if (samplePercentage <= 0) { + return false + } + const hash = crypto.createHash('md5').update(key).digest() + const hashValue = hash.readUInt32BE(0) + const percentile = Math.floor((hashValue / 0xffffffff) * 100) + return percentile < samplePercentage +} + +module.exports = { sampleByHash } diff --git a/services/clsi/config/settings.defaults.js b/services/clsi/config/settings.defaults.js index 1d82258a8e..3f0b77cbd0 100644 --- a/services/clsi/config/settings.defaults.js +++ b/services/clsi/config/settings.defaults.js @@ -81,6 +81,8 @@ module.exports = { pdfCachingWorkerPoolBackLogLimit: parseInt(process.env.PDF_CACHING_WORKER_POOL_BACK_LOG_LIMIT, 10) || 40, compileConcurrencyLimit: isPreEmptible ? 32 : 64, + performanceLogSamplingPercentage: + parseFloat(process.env.CLSI_PERFORMANCE_LOG_SAMPLING, 10) || 0, } if (process.env.ALLOWED_COMPILE_GROUPS) { diff --git a/services/clsi/test/acceptance/js/SimpleLatexFileTests.js b/services/clsi/test/acceptance/js/SimpleLatexFileTests.js index b8e2724197..315873b575 100644 --- a/services/clsi/test/acceptance/js/SimpleLatexFileTests.js +++ b/services/clsi/test/acceptance/js/SimpleLatexFileTests.js @@ -67,4 +67,31 @@ Hello world }) .should.equal(true) }) + + it('should return only the expected keys for stats and timings', function () { + const { stats, timings } = this.body.compile + // Note: chai's all.keys assertion rejects extra keys + stats.should.have.all.keys( + 'isInitialCompile', + 'latexmk-errors', + 'latex-runs', + 'latex-runs-with-errors', + 'latex-runs-2', + 'latex-runs-with-errors-2', + 'pdf-caching-total-ranges-size', + 'pdf-caching-reclaimed-space', + 'pdf-caching-new-ranges-size', + 'pdf-caching-n-ranges', + 'pdf-caching-n-new-ranges', + 'pdf-size' + ) + timings.should.have.all.keys( + 'sync', + 'compile', + 'output', + 'compileE2E', + 'compute-pdf-caching', + 'pdf-caching-overhead-delete-stale-hashes' + ) + }) }) diff --git a/services/clsi/test/unit/js/LatexRunnerTests.js b/services/clsi/test/unit/js/LatexRunnerTests.js index 0d250dd517..0314800d85 100644 --- a/services/clsi/test/unit/js/LatexRunnerTests.js +++ b/services/clsi/test/unit/js/LatexRunnerTests.js @@ -1,6 +1,8 @@ const SandboxedModule = require('sandboxed-module') const sinon = require('sinon') const { expect } = require('chai') +const fs = require('node:fs') +const path = require('node:path') const MODULE_PATH = require('node:path').join( __dirname, @@ -87,6 +89,7 @@ describe('LatexRunner', function () { '-outdir=$COMPILE_DIR', '-synctex=1', '-interaction=batchmode', + '-time', '-f', '-pdf', '$COMPILE_DIR/main-file.tex', @@ -140,6 +143,7 @@ describe('LatexRunner', function () { '-outdir=$COMPILE_DIR', '-synctex=1', '-interaction=batchmode', + '-time', '-f', '-lualatex', '$COMPILE_DIR/main-file.tex', @@ -209,11 +213,75 @@ describe('LatexRunner', function () { '-outdir=$COMPILE_DIR', '-synctex=1', '-interaction=batchmode', + '-time', '-halt-on-error', '-pdf', '$COMPILE_DIR/main-file.tex', ]) }) }) + + describe('with old latexmk timing output', function () { + beforeEach(function (done) { + this.commandRunnerOutput.stdout = fs.readFileSync( + path.join(__dirname, 'fixtures', 'latexmk1.txt'), + 'utf-8' + ) + // pass in the `latexmk` property to signal that we want to receive parsed stats + this.stats.latexmk = {} + this.call(done) + }) + + it('should parse latexmk 4.52c (2017) timing information', function () { + expect(this.stats.latexmk).to.deep.equal({ + 'latexmk-rule-times': [ + { rule: 'makeindex', time_ms: 30 }, + { rule: 'bibtex', time_ms: 40 }, + { rule: 'latex', time_ms: 690 }, + { rule: 'makeindex', time_ms: 40 }, + { rule: 'bibtex', time_ms: 39 }, + { rule: 'latex', time_ms: 750 }, + { rule: 'makeindex', time_ms: 39 }, + { rule: 'bibtex', time_ms: 20 }, + { rule: 'latex', time_ms: 770 }, + ], + 'latexmk-rule-signature': + 'makeindex,bibtex,latex,makeindex,bibtex,latex,makeindex,bibtex,latex', + 'latexmk-rules-run': 9, + 'latexmk-time': { total: 2930 }, + }) + }) + }) + + describe('with modern latexmk timing output', function () { + beforeEach(function (done) { + this.commandRunnerOutput.stdout = fs.readFileSync( + path.join(__dirname, 'fixtures', 'latexmk2.txt'), + 'utf-8' + ) + // pass in the `latexmk` property to signal that we want to receive parsed stats + this.stats.latexmk = {} + this.call(done) + }) + + it('should parse latexmk 4.83 (2024) timing information', function () { + expect(this.stats.latexmk).to.deep.equal({ + 'latexmk-rule-times': [ + { rule: 'latex', time_ms: 1880 }, + { rule: 'makeindex', time_ms: 50 }, + { rule: 'bibtex', time_ms: 50 }, + { rule: 'latex', time_ms: 2180 }, + ], + 'latexmk-rule-signature': 'latex,makeindex,bibtex,latex', + 'latexmk-time': { + total: 4770, + invoked: 4160, + other: 610, + }, + 'latexmk-clock-time': 4870, + 'latexmk-rules-run': 4, + }) + }) + }) }) }) diff --git a/services/clsi/test/unit/js/StatsManagerTests.js b/services/clsi/test/unit/js/StatsManagerTests.js new file mode 100644 index 0000000000..7fb520fe94 --- /dev/null +++ b/services/clsi/test/unit/js/StatsManagerTests.js @@ -0,0 +1,85 @@ +const { expect } = require('chai') +const { sampleByHash } = require('../../../app/js/StatsManager') + +describe('StatsManager', function () { + describe('sampleByHash', function () { + it('should always return false for a sample percentage of 0', function () { + for (let i = 0; i < 100; i++) { + const key = `test-key-${i}` + expect(sampleByHash(key, 0), `key ${key} should be false`).to.be.false + } + }) + + it('should always return false for a negative sample percentage', function () { + for (let i = 0; i < 100; i++) { + const key = `test-key-${i}` + expect(sampleByHash(key, -10), `key ${key} should be false`).to.be.false + } + }) + + it('should always return true for a sample percentage of 100', function () { + // This isn't strictly true, if the hash is exactly 0xffffffff, then the percentile is 100 + // and 100 < 100 is false. But the chances of that are 1 in 4 billion. + for (let i = 0; i < 100; i++) { + const key = `test-key-${i}` + expect(sampleByHash(key, 100), `key ${key} should be true`).to.be.true + } + }) + + it('should return the expected number of results for a sample percentage of 75', function () { + // This isn't strictly true, if the hash is exactly 0xffffffff, then the percentile is 100 + // and 100 < 100 is false. But the chances of that are 1 in 4 billion. + let count = 0 + for (let i = 0; i < 100; i++) { + const key = `test-key-${i}` + count += sampleByHash(key, 75) ? 1 : 0 + } + // Actual result is 74, it's deterministic but the test allows the algorithm to change + expect(count).to.be.within(70, 80) + }) + + it('should return true when the hash is within the sample percentage', function () { + // The MD5 hash of 'test-key-in' gives a percentile of 13 + const key = 'test-key-in' + const percentage = 40 + expect(sampleByHash(key, percentage)).to.be.true + }) + + it('should return false when the hash is outside the sample percentage', function () { + // The MD5 hash of 'test-key-outer' gives a percentile of 47 + const key = 'test-key-outer' + const percentage = 40 + expect(sampleByHash(key, percentage)).to.be.false + }) + + it('should produce consistent results for the same key', function () { + const key = 'consistent-key' + const percentage = 50 + const result1 = sampleByHash(key, percentage) + const result2 = sampleByHash(key, percentage) + expect(result1).to.equal(result2) + }) + + it('should handle different keys correctly', function () { + // MD5('key1') => percentile 76 + // MD5('key2') => percentile 47 + expect(sampleByHash('key1', 80)).to.be.true + expect(sampleByHash('key1', 70)).to.be.false + expect(sampleByHash('key2', 50)).to.be.true + expect(sampleByHash('key2', 40)).to.be.false + }) + + it('should be monotonic with respect to percentage', function () { + const key = 'test-key' + const percentile = 32 + for (let i = 0; i <= 100; i++) { + const result = sampleByHash(key, i) + if (i <= percentile) { + expect(result, `percentage ${i} should be false`).to.be.false + } else { + expect(result, `percentage ${i} should be true`).to.be.true + } + } + }) + }) +}) diff --git a/services/clsi/test/unit/js/fixtures/latexmk1.txt b/services/clsi/test/unit/js/fixtures/latexmk1.txt new file mode 100644 index 0000000000..a3b9a179c7 --- /dev/null +++ b/services/clsi/test/unit/js/fixtures/latexmk1.txt @@ -0,0 +1,12 @@ +Latexmk: Found bibliography file(s) [tex.bib] +Latexmk: All targets (TeXbyTopic.dvi) are up-to-date +'makeindex -o "TeXbyTopic.ind" "TeXbyTopic.idx"': time = 0.03 +'bibtex "TeXbyTopic"': time = 0.04 +'latex -recorder "TeXbyTopic.tex"': time = 0.69 +'makeindex -o "TeXbyTopic.ind" "TeXbyTopic.idx"': time = 0.04 +'bibtex "TeXbyTopic"': time = 0.0399999999999998 +'latex -recorder "TeXbyTopic.tex"': time = 0.75 +'makeindex -o "TeXbyTopic.ind" "TeXbyTopic.idx"': time = 0.0399999999999996 +'bibtex "TeXbyTopic"': time = 0.02 +'latex -recorder "TeXbyTopic.tex"': time = 0.77 +Accumulated processing time = 2.93 diff --git a/services/clsi/test/unit/js/fixtures/latexmk2.txt b/services/clsi/test/unit/js/fixtures/latexmk2.txt new file mode 100644 index 0000000000..bdb78e442a --- /dev/null +++ b/services/clsi/test/unit/js/fixtures/latexmk2.txt @@ -0,0 +1,7 @@ +'latex': time = 1.88 +'makeindex TeXbyTopic.idx': time = 0.05 +'bibtex TeXbyTopic': time = 0.05 +'latex': time = 2.18 +Processing time = 4.77, of which invoked processes = 4.16, other = 0.61. +Elapsed clock time = 4.87. +Number of rules run = 4