Merge pull request #28880 from overleaf/bg-add-time-option-to-clsi

add latexmk `-time` option to clsi and record performance logs GitOrigin-RevId: 467473859359913da73f83e10b63b45603ea175c
2026-05-23 17:19:37 +02:00 · 2025-10-08 10:37:18 +01:00
parent 3913008e02
commit d24f37d3a4
12 changed files with 441 additions and 8 deletions
--- a/services/clsi/app.js
+++ b/services/clsi/app.js
@@ -6,6 +6,9 @@ const ContentController = require('./app/js/ContentController')
 const Settings = require('@overleaf/settings')
 const logger = require('@overleaf/logger')
 logger.initialize('clsi')
+const LoggerSerializers = require('./app/js/LoggerSerializers')
+logger.logger.serializers.clsiRequest = LoggerSerializers.clsiRequest
+
 const Metrics = require('@overleaf/metrics')

 const smokeTest = require('./test/smoke/js/SmokeTests')
--- a/services/clsi/app/js/CompileManager.js
+++ b/services/clsi/app/js/CompileManager.js
@@ -23,6 +23,7 @@ const {
  downloadLatestCompileCache,
  downloadOutputDotSynctexFromCompileCache,
 } = require('./CLSICacheHandler')
+const StatsManager = require('./StatsManager')
 const { callbackifyMultiResult } = require('@overleaf/promise-utils')

 const COMPILE_TIME_BUCKETS = [
@@ -191,6 +192,26 @@ async function doCompile(request, stats, timings) {
  Metrics.inc(`compiles-with-image.${tag}`, 1, request.metricsOpts)
  const compileName = getCompileName(request.project_id, request.user_id)

+  // Record latexmk -time stats for a subset of users
+  const recordPerformanceMetrics =
+    request.user_id != null &&
+    Settings.performanceLogSamplingPercentage > 0 &&
+    StatsManager.sampleByHash(
+      request.user_id,
+      Settings.performanceLogSamplingPercentage
+    )
+  // For selected users, define a `latexmk` property on the stats object
+  // to collect latexmk -time stats.
+  if (recordPerformanceMetrics) {
+    // To prevent any changes to the existing compile responses being sent
+    // to web, exclude latexmk stats from being exported by marking them
+    // non-enumerable. This prevents them being serialised by JSON.stringify().
+    Object.defineProperty(stats, 'latexmk', {
+      value: {},
+      enumerable: false,
+    })
+  }
+
  try {
    await LatexRunner.promises.runLatex(compileName, {
      directory: compileDir,
@@ -257,11 +278,15 @@ async function doCompile(request, stats, timings) {
  Metrics.inc('compiles-succeeded', 1, request.metricsOpts)
  for (const metricKey in stats) {
    const metricValue = stats[metricKey]
-    Metrics.count(metricKey, metricValue, 1, request.metricsOpts)
+    if (typeof metricValue === 'number') {
+      Metrics.count(metricKey, metricValue, 1, request.metricsOpts)
+    }
  }
  for (const metricKey in timings) {
    const metricValue = timings[metricKey]
-    Metrics.timing(metricKey, metricValue, 1, request.metricsOpts)
+    if (typeof metricValue === 'number') {
+      Metrics.timing(metricKey, metricValue, 1, request.metricsOpts)
+    }
  }
  const loadavg = typeof os.loadavg === 'function' ? os.loadavg() : undefined
  if (loadavg != null) {
@@ -320,6 +345,27 @@ async function doCompile(request, stats, timings) {
    emitPdfStats(stats, timings, request)
  }

+  // Record compile performance for a subset of users
+  if (recordPerformanceMetrics) {
+    logger.info(
+      {
+        userId: request.user_id,
+        projectId: request.project_id,
+        timeTaken: ts,
+        clsiRequest: request,
+        stats,
+        timings,
+        // explicitly include latexmk stats to bypass the non-enumerable property
+        latexmk: stats.latexmk,
+        loadavg1m: loadavg?.[0],
+        loadavg5m: loadavg?.[1],
+        loadavg15m: loadavg?.[2],
+        samplingPercentage: Settings.performanceLogSamplingPercentage,
+      },
+      'sampled performance log'
+    )
+  }
+
  return { outputFiles, buildId }
 }

--- a/services/clsi/app/js/LatexMetrics.js
+++ b/services/clsi/app/js/LatexMetrics.js
@@ -0,0 +1,109 @@
+/**
+ * Converts a time string in seconds to an integer number of milliseconds.
+ *
+ * @param {string} timeStr - The time duration in seconds, represented as a string.
+ * @returns {number} The equivalent time in milliseconds, rounded down to the nearest integer.
+ */
+function convertToMs(timeStr) {
+  return Math.floor(parseFloat(timeStr, 10) * 1000)
+}
+
+/* An array of metric parsers for `latexmk` time output (`-time` flag).
+ * Each entry is a tuple containing a metric name and a function to parse that
+ * metric from the `latexmk` log output.
+ *
+ * The parser functions generally take the log string as their first argument.
+ * Some may take additional arguments, such as the already computed stats,
+ * to derive new metrics.
+ *
+ * There are different formats of `latexmk` output depending on the version.
+ * The parsers attempt to handle these variations gracefully.
+ */
+const LATEX_MK_METRICS = [
+  // Extract individual latexmk rule times as an array of objects, each with 'rule'
+  // and 'time_ms' properties
+  [
+    'latexmk-rule-times',
+    s => {
+      // Each line looks like:  'pdflatex ... options ...': time = 12.34
+      // Take the command up to the first space as the rule name
+      const matches = s.matchAll(/^'([^' ]+).*': time = (\d+\.\d+)/gm)
+      return Array.from(matches, match => ({
+        rule: match[1],
+        time_ms: convertToMs(match[2]),
+      }))
+    },
+  ],
+  // Extract a comma-separated signature of rule names from the rule times above
+  [
+    'latexmk-rule-signature',
+    (s, latexmkStats) => {
+      const times = latexmkStats['latexmk-rule-times']
+      if (!times) return null
+      // Example output: 'pdflatex,bibtex,pdflatex,pdflatex'
+      return times.map(t => t.rule).join(',')
+    },
+  ],
+  // Total latexmk processing time, invoked processes time, and other time in ms
+  [
+    'latexmk-time',
+    s => {
+      const match = s.match(
+        /^Processing time = (\d+\.\d+), of which invoked processes = (\d+\.\d+), other = (\d+\.\d+)/m
+      )
+      if (match) {
+        return {
+          total: convertToMs(match[1]),
+          invoked: convertToMs(match[2]),
+          other: convertToMs(match[3]),
+        }
+      }
+      // Older format
+      const fallbackMatch = s.match(
+        /^Accumulated processing time = (\d+\.\d+)/m
+      )
+      if (fallbackMatch) {
+        return { total: convertToMs(fallbackMatch[1]) }
+      }
+      return null
+    },
+  ],
+  // Total elapsed clock time in ms for latexmk
+  [
+    'latexmk-clock-time',
+    s => {
+      const match = s.match(/^Elapsed clock time = (\d+\.\d+)/m)
+      if (match) {
+        return convertToMs(match[1])
+      }
+      // not present in older versions
+      return null
+    },
+  ],
+  // Number of rules run by latexmk
+  [
+    'latexmk-rules-run',
+    (s, latexmkStats) => {
+      const match = s.match(/^Number of rules run = (\d+)/m)
+      if (match) {
+        return parseInt(match[1], 10)
+      }
+      // Fallback: count number of entries in rule times if available
+      if (latexmkStats['latexmk-rule-times']) {
+        return latexmkStats['latexmk-rule-times'].length
+      }
+      return null
+    },
+  ],
+]
+
+function addLatexMkMetrics(output, stats) {
+  for (const [stat, matcher] of LATEX_MK_METRICS) {
+    const match = matcher(output?.stdout || '', stats.latexmk)
+    if (match) {
+      stats.latexmk[stat] = match
+    }
+  }
+}
+
+module.exports = { addLatexMkMetrics }
--- a/services/clsi/app/js/LatexRunner.js
+++ b/services/clsi/app/js/LatexRunner.js
@@ -3,15 +3,16 @@ const { promisify } = require('node:util')
 const Settings = require('@overleaf/settings')
 const logger = require('@overleaf/logger')
 const CommandRunner = require('./CommandRunner')
+const { addLatexMkMetrics } = require('./LatexMetrics')
 const fs = require('node:fs')

 const ProcessTable = {} // table of currently running jobs (pids or docker container names)

-const TIME_V_METRICS = Object.entries({
-  'cpu-percent': /Percent of CPU this job got: (\d+)/m,
-  'cpu-time': /User time.*: (\d+.\d+)/m,
-  'sys-time': /System time.*: (\d+.\d+)/m,
-})
+const TIME_V_METRICS = [
+  ['cpu-percent', /Percent of CPU this job got: (\d+)/m],
+  ['cpu-time', /User time.*: (\d+.\d+)/m],
+  ['sys-time', /System time.*: (\d+.\d+)/m],
+]

 const COMPILER_FLAGS = {
  latex: '-pdfdvi',
@@ -75,6 +76,14 @@ function runLatex(projectId, options, callback) {
      if (error) {
        return callback(error)
      }
+      if (stats.latexmk) {
+        try {
+          addLatexMkMetrics(output, stats)
+        } catch (err) {
+          logger.error({ err, projectId }, 'error adding latexmk metrics')
+        }
+      }
+      // number of latex runs and whether there were errors
      const runs =
        output?.stderr?.match(/^Run number \d+ of .*latex/gm)?.length || 0
      const failed = output?.stdout?.match(/^Latexmk: Errors/m) != null ? 1 : 0
@@ -161,7 +170,8 @@ function _buildLatexCommand(mainFile, opts = {}) {
    '-auxdir=$COMPILE_DIR',
    '-outdir=$COMPILE_DIR',
    '-synctex=1',
-    '-interaction=batchmode'
+    '-interaction=batchmode',
+    '-time'
  )

  // Stop on first error option
--- a/services/clsi/app/js/LoggerSerializers.js
+++ b/services/clsi/app/js/LoggerSerializers.js
@@ -0,0 +1,40 @@
+const Path = require('node:path')
+
+const CLSI_REQUEST_SERIALIZED_PROPERTIES = [
+  'compiler',
+  'compileFromClsiCache',
+  'populateClsiCache',
+  'enablePdfCaching',
+  'pdfCachingMinChunkSize',
+  'timeout',
+  'imageName',
+  'draft',
+  'stopOnFirstError',
+  'check',
+  'flags',
+  'compileGroup',
+  'syncType',
+]
+
+module.exports = {
+  /**
+   * Serializer for a CLSI request object.
+   * Only includes properties useful for logging.
+   * Excludes large, sensitive, or irrelevant properties (e.g., 'syncState', 'resources').
+   * To add more properties, update the allowed properties above.
+   *
+   * @param {object} clsiRequest - The original CLSI request object.
+   * @returns {object} A summarized version of the request object for logging.
+   */
+  clsiRequest(clsiRequest) {
+    const summary = {}
+    for (const key of CLSI_REQUEST_SERIALIZED_PROPERTIES) {
+      if (key === 'imageName' && clsiRequest.imageName) {
+        summary.imageName = Path.basename(clsiRequest.imageName)
+      } else if (clsiRequest[key] !== undefined) {
+        summary[key] = clsiRequest[key]
+      }
+    }
+    return summary
+  },
+}
--- a/services/clsi/app/js/StatsManager.js
+++ b/services/clsi/app/js/StatsManager.js
@@ -0,0 +1,24 @@
+const crypto = require('node:crypto')
+
+/**
+ * Consistently sample a keyspace with a given sample percentage.
+ * The same key will always produce a consistent percentile value that
+ * can be compared against the sample percentage.
+ * Example: if key is the userId and the samplePercentage is 10, then
+ * we see all the activity for the 10% of users who are selected.
+ *
+ * @param {string} key - The unique identifier to be hashed and checked.
+ * @param {number} samplePercentage - The percentage (0-100) of keys that should return true.
+ * @returns {boolean} - True if the key is within the sample, false otherwise.
+ */
+function sampleByHash(key, samplePercentage) {
+  if (samplePercentage <= 0) {
+    return false
+  }
+  const hash = crypto.createHash('md5').update(key).digest()
+  const hashValue = hash.readUInt32BE(0)
+  const percentile = Math.floor((hashValue / 0xffffffff) * 100)
+  return percentile < samplePercentage
+}
+
+module.exports = { sampleByHash }
--- a/services/clsi/config/settings.defaults.js
+++ b/services/clsi/config/settings.defaults.js
@@ -81,6 +81,8 @@ module.exports = {
  pdfCachingWorkerPoolBackLogLimit:
    parseInt(process.env.PDF_CACHING_WORKER_POOL_BACK_LOG_LIMIT, 10) || 40,
  compileConcurrencyLimit: isPreEmptible ? 32 : 64,
+  performanceLogSamplingPercentage:
+    parseFloat(process.env.CLSI_PERFORMANCE_LOG_SAMPLING, 10) || 0,
 }

 if (process.env.ALLOWED_COMPILE_GROUPS) {
--- a/services/clsi/test/acceptance/js/SimpleLatexFileTests.js
+++ b/services/clsi/test/acceptance/js/SimpleLatexFileTests.js
@@ -67,4 +67,31 @@ Hello world
      })
      .should.equal(true)
  })
+
+  it('should return only the expected keys for stats and timings', function () {
+    const { stats, timings } = this.body.compile
+    // Note: chai's all.keys assertion rejects extra keys
+    stats.should.have.all.keys(
+      'isInitialCompile',
+      'latexmk-errors',
+      'latex-runs',
+      'latex-runs-with-errors',
+      'latex-runs-2',
+      'latex-runs-with-errors-2',
+      'pdf-caching-total-ranges-size',
+      'pdf-caching-reclaimed-space',
+      'pdf-caching-new-ranges-size',
+      'pdf-caching-n-ranges',
+      'pdf-caching-n-new-ranges',
+      'pdf-size'
+    )
+    timings.should.have.all.keys(
+      'sync',
+      'compile',
+      'output',
+      'compileE2E',
+      'compute-pdf-caching',
+      'pdf-caching-overhead-delete-stale-hashes'
+    )
+  })
 })
--- a/services/clsi/test/unit/js/LatexRunnerTests.js
+++ b/services/clsi/test/unit/js/LatexRunnerTests.js
@@ -1,6 +1,8 @@
 const SandboxedModule = require('sandboxed-module')
 const sinon = require('sinon')
 const { expect } = require('chai')
+const fs = require('node:fs')
+const path = require('node:path')

 const MODULE_PATH = require('node:path').join(
  __dirname,
@@ -87,6 +89,7 @@ describe('LatexRunner', function () {
            '-outdir=$COMPILE_DIR',
            '-synctex=1',
            '-interaction=batchmode',
+            '-time',
            '-f',
            '-pdf',
            '$COMPILE_DIR/main-file.tex',
@@ -140,6 +143,7 @@ describe('LatexRunner', function () {
          '-outdir=$COMPILE_DIR',
          '-synctex=1',
          '-interaction=batchmode',
+          '-time',
          '-f',
          '-lualatex',
          '$COMPILE_DIR/main-file.tex',
@@ -209,11 +213,75 @@ describe('LatexRunner', function () {
          '-outdir=$COMPILE_DIR',
          '-synctex=1',
          '-interaction=batchmode',
+          '-time',
          '-halt-on-error',
          '-pdf',
          '$COMPILE_DIR/main-file.tex',
        ])
      })
    })
+
+    describe('with old latexmk timing output', function () {
+      beforeEach(function (done) {
+        this.commandRunnerOutput.stdout = fs.readFileSync(
+          path.join(__dirname, 'fixtures', 'latexmk1.txt'),
+          'utf-8'
+        )
+        // pass in the `latexmk` property to signal that we want to receive parsed stats
+        this.stats.latexmk = {}
+        this.call(done)
+      })
+
+      it('should parse latexmk 4.52c (2017) timing information', function () {
+        expect(this.stats.latexmk).to.deep.equal({
+          'latexmk-rule-times': [
+            { rule: 'makeindex', time_ms: 30 },
+            { rule: 'bibtex', time_ms: 40 },
+            { rule: 'latex', time_ms: 690 },
+            { rule: 'makeindex', time_ms: 40 },
+            { rule: 'bibtex', time_ms: 39 },
+            { rule: 'latex', time_ms: 750 },
+            { rule: 'makeindex', time_ms: 39 },
+            { rule: 'bibtex', time_ms: 20 },
+            { rule: 'latex', time_ms: 770 },
+          ],
+          'latexmk-rule-signature':
+            'makeindex,bibtex,latex,makeindex,bibtex,latex,makeindex,bibtex,latex',
+          'latexmk-rules-run': 9,
+          'latexmk-time': { total: 2930 },
+        })
+      })
+    })
+
+    describe('with modern latexmk timing output', function () {
+      beforeEach(function (done) {
+        this.commandRunnerOutput.stdout = fs.readFileSync(
+          path.join(__dirname, 'fixtures', 'latexmk2.txt'),
+          'utf-8'
+        )
+        // pass in the `latexmk` property to signal that we want to receive parsed stats
+        this.stats.latexmk = {}
+        this.call(done)
+      })
+
+      it('should parse latexmk 4.83 (2024) timing information', function () {
+        expect(this.stats.latexmk).to.deep.equal({
+          'latexmk-rule-times': [
+            { rule: 'latex', time_ms: 1880 },
+            { rule: 'makeindex', time_ms: 50 },
+            { rule: 'bibtex', time_ms: 50 },
+            { rule: 'latex', time_ms: 2180 },
+          ],
+          'latexmk-rule-signature': 'latex,makeindex,bibtex,latex',
+          'latexmk-time': {
+            total: 4770,
+            invoked: 4160,
+            other: 610,
+          },
+          'latexmk-clock-time': 4870,
+          'latexmk-rules-run': 4,
+        })
+      })
+    })
  })
 })
--- a/services/clsi/test/unit/js/StatsManagerTests.js
+++ b/services/clsi/test/unit/js/StatsManagerTests.js
@@ -0,0 +1,85 @@
+const { expect } = require('chai')
+const { sampleByHash } = require('../../../app/js/StatsManager')
+
+describe('StatsManager', function () {
+  describe('sampleByHash', function () {
+    it('should always return false for a sample percentage of 0', function () {
+      for (let i = 0; i < 100; i++) {
+        const key = `test-key-${i}`
+        expect(sampleByHash(key, 0), `key ${key} should be false`).to.be.false
+      }
+    })
+
+    it('should always return false for a negative sample percentage', function () {
+      for (let i = 0; i < 100; i++) {
+        const key = `test-key-${i}`
+        expect(sampleByHash(key, -10), `key ${key} should be false`).to.be.false
+      }
+    })
+
+    it('should always return true for a sample percentage of 100', function () {
+      // This isn't strictly true, if the hash is exactly 0xffffffff, then the percentile is 100
+      // and 100 < 100 is false. But the chances of that are 1 in 4 billion.
+      for (let i = 0; i < 100; i++) {
+        const key = `test-key-${i}`
+        expect(sampleByHash(key, 100), `key ${key} should be true`).to.be.true
+      }
+    })
+
+    it('should return the expected number of results for a sample percentage of 75', function () {
+      // This isn't strictly true, if the hash is exactly 0xffffffff, then the percentile is 100
+      // and 100 < 100 is false. But the chances of that are 1 in 4 billion.
+      let count = 0
+      for (let i = 0; i < 100; i++) {
+        const key = `test-key-${i}`
+        count += sampleByHash(key, 75) ? 1 : 0
+      }
+      // Actual result is 74, it's deterministic but the test allows the algorithm to change
+      expect(count).to.be.within(70, 80)
+    })
+
+    it('should return true when the hash is within the sample percentage', function () {
+      // The MD5 hash of 'test-key-in' gives a percentile of 13
+      const key = 'test-key-in'
+      const percentage = 40
+      expect(sampleByHash(key, percentage)).to.be.true
+    })
+
+    it('should return false when the hash is outside the sample percentage', function () {
+      // The MD5 hash of 'test-key-outer' gives a percentile of 47
+      const key = 'test-key-outer'
+      const percentage = 40
+      expect(sampleByHash(key, percentage)).to.be.false
+    })
+
+    it('should produce consistent results for the same key', function () {
+      const key = 'consistent-key'
+      const percentage = 50
+      const result1 = sampleByHash(key, percentage)
+      const result2 = sampleByHash(key, percentage)
+      expect(result1).to.equal(result2)
+    })
+
+    it('should handle different keys correctly', function () {
+      // MD5('key1') => percentile 76
+      // MD5('key2') => percentile 47
+      expect(sampleByHash('key1', 80)).to.be.true
+      expect(sampleByHash('key1', 70)).to.be.false
+      expect(sampleByHash('key2', 50)).to.be.true
+      expect(sampleByHash('key2', 40)).to.be.false
+    })
+
+    it('should be monotonic with respect to percentage', function () {
+      const key = 'test-key'
+      const percentile = 32
+      for (let i = 0; i <= 100; i++) {
+        const result = sampleByHash(key, i)
+        if (i <= percentile) {
+          expect(result, `percentage ${i} should be false`).to.be.false
+        } else {
+          expect(result, `percentage ${i} should be true`).to.be.true
+        }
+      }
+    })
+  })
+})
--- a/services/clsi/test/unit/js/fixtures/latexmk1.txt
+++ b/services/clsi/test/unit/js/fixtures/latexmk1.txt
@@ -0,0 +1,12 @@
+Latexmk: Found bibliography file(s) [tex.bib]
+Latexmk: All targets (TeXbyTopic.dvi) are up-to-date
+'makeindex  -o "TeXbyTopic.ind" "TeXbyTopic.idx"': time = 0.03
+'bibtex  "TeXbyTopic"': time = 0.04
+'latex  -recorder  "TeXbyTopic.tex"': time = 0.69
+'makeindex  -o "TeXbyTopic.ind" "TeXbyTopic.idx"': time = 0.04
+'bibtex  "TeXbyTopic"': time = 0.0399999999999998
+'latex  -recorder  "TeXbyTopic.tex"': time = 0.75
+'makeindex  -o "TeXbyTopic.ind" "TeXbyTopic.idx"': time = 0.0399999999999996
+'bibtex  "TeXbyTopic"': time = 0.02
+'latex  -recorder  "TeXbyTopic.tex"': time = 0.77
+Accumulated processing time = 2.93
--- a/services/clsi/test/unit/js/fixtures/latexmk2.txt
+++ b/services/clsi/test/unit/js/fixtures/latexmk2.txt
@@ -0,0 +1,7 @@
+'latex': time = 1.88
+'makeindex TeXbyTopic.idx': time = 0.05
+'bibtex TeXbyTopic': time = 0.05
+'latex': time = 2.18
+Processing time = 4.77, of which invoked processes = 4.16, other = 0.61.
+Elapsed clock time = 4.87.
+Number of rules run = 4