Files
overleaf-cep/services/clsi/app/js/LatexMetrics.js
Andrew Rumble cd7da983d1 Merge pull request #30232 from overleaf/ar/convert-clsi-to-es-modules
[clsi] convert to ES modules

GitOrigin-RevId: fb7fa52cc8f678ee31be352e62a5dff95e88008b
2026-01-22 09:06:23 +00:00

343 lines
10 KiB
JavaScript

/**
* Converts a time string in seconds to an integer number of milliseconds.
*
* @param {string} timeStr - The time duration in seconds, represented as a string.
* @returns {number} The equivalent time in milliseconds, rounded down to the nearest integer.
*/
function convertToMs(timeStr) {
return Math.floor(parseFloat(timeStr, 10) * 1000)
}
const NOTEWORTHY_DEPENDENCIES_REGEXP =
/\/(beamer\.cls|tikz\.sty|microtype\.sty|minted\.sty)$/
/* An array of metric parsers for `latexmk` time output (`-time` flag).
* Each entry is a tuple containing a metric name and a function to parse that
* metric from the `latexmk` log output.
*
* The parser functions generally take the log string as their first argument.
* Some may take additional arguments, such as the already computed stats,
* to derive new metrics.
*
* There are different formats of `latexmk` output depending on the version.
* The parsers attempt to handle these variations gracefully.
*/
const LATEX_MK_METRICS_STDOUT = [
// Extract individual latexmk rule times as an array of objects, each with 'rule'
// and 'time_ms' properties
[
'latexmk-rule-times',
s => {
// Each line looks like: 'pdflatex ... options ...': time = 12.34
// Take the command up to the first space as the rule name
const matches = s.matchAll(/^'([^' ]+).*': time = (\d+\.\d+)/gm)
return Array.from(matches, match => ({
rule: match[1],
time_ms: convertToMs(match[2]),
}))
},
],
// Extract a comma-separated signature of rule names from the rule times above
[
'latexmk-rule-signature',
(s, latexmkStats) => {
const times = latexmkStats['latexmk-rule-times']
if (!times) return null
// Example output: 'pdflatex,bibtex,pdflatex,pdflatex'
return times.map(t => t.rule).join(',')
},
],
// Total latexmk processing time, invoked processes time, and other time in ms
[
'latexmk-time',
s => {
const match = s.match(
/^Processing time = (\d+\.\d+), of which invoked processes = (\d+\.\d+), other = (\d+\.\d+)/m
)
if (match) {
return {
total: convertToMs(match[1]),
invoked: convertToMs(match[2]),
other: convertToMs(match[3]),
}
}
// Older format
const fallbackMatch = s.match(
/^Accumulated processing time = (\d+\.\d+)/m
)
if (fallbackMatch) {
return { total: convertToMs(fallbackMatch[1]) }
}
return null
},
],
// Total elapsed clock time in ms for latexmk
[
'latexmk-clock-time',
s => {
const match = s.match(/^Elapsed clock time = (\d+\.\d+)/m)
if (match) {
return convertToMs(match[1])
}
// not present in older versions
return null
},
],
// Number of rules run by latexmk
[
'latexmk-rules-run',
(s, latexmkStats) => {
const match = s.match(/^Number of rules run = (\d+)/m)
if (match) {
return parseInt(match[1], 10)
}
// Fallback: count number of entries in rule times if available
if (latexmkStats['latexmk-rule-times']) {
return latexmkStats['latexmk-rule-times'].length
}
return null
},
],
]
const LATEX_MK_METRICS_STDERR = [
[
'latexmk-img-times',
s => {
const pngCategoriesByFile = new Map()
const pngCopyMatches = s.matchAll(/^PNG copy: (.*)$/gm)
for (const match of pngCopyMatches) {
const filename = match[1]
pngCategoriesByFile.set(filename, 'fast-copy')
}
const pngCopySkipMatches = s.matchAll(
/^PNG copy skipped \((alpha|gamma|palette|interlaced)\): (.*)$/gm
)
for (const match of pngCopySkipMatches) {
const category = match[1]
const filename = match[2]
pngCategoriesByFile.set(filename, category)
}
const timingMatches = s.matchAll(
/^Image written \((PNG|JPG|JBIG2|PDF), (\d+) ms\): (.*)$/gm
)
const timingsByType = new Map()
for (const match of timingMatches) {
let type = match[1]
const timeMs = parseInt(match[2], 10)
const filename = match[3]
if (type === 'PNG') {
const pngCategory = pngCategoriesByFile.get(filename)
if (pngCategory != null) {
type = `PNG-${pngCategory}`
}
}
const accumulatedTime = timingsByType.get(type) ?? 0
timingsByType.set(type, accumulatedTime + timeMs)
}
return Array.from(timingsByType.entries()).map(([type, timeMs]) => ({
type,
time_ms: timeMs,
}))
},
],
]
/**
* Parses latexmk stdout for metrics and adds them to the stats object.
* It iterates through a predefined list of metric matchers (LATEX_MK_METRICS),
* applies them to the stdout, and adds any successful matches to the
* `stats.latexmk` object.
*
* @param {{stdout?: string, stderr?: string}} output - The output from the latexmk process.
* @param {{latexmk: object}} stats - The statistics object to update. This object is mutated.
*/
function addLatexMkMetrics(output, stats) {
for (const [stat, matcher] of LATEX_MK_METRICS_STDOUT) {
const match = matcher(output?.stdout || '', stats.latexmk)
if (match) {
stats.latexmk[stat] = match
}
}
for (const [stat, matcher] of LATEX_MK_METRICS_STDERR) {
const match = matcher(output?.stderr || '', stats.latexmk)
if (match) {
stats.latexmk[stat] = match
}
}
}
/**
* Adds a non-enumerable `latexmk` property to the stats object.
*
* This property is used to store statistics from the latexmk compilation
* process. It is made non-enumerable to prevent it from being serialized by
* `JSON.stringify()`, which means it is not sent in the compile response
* to web where it would added to the analytics events.
*
* @param {object} stats - The compile stats object to be modified.
*/
function enableLatexMkMetrics(stats) {
Object.defineProperty(stats, 'latexmk', {
value: {},
enumerable: false,
})
}
/**
* Parses the content of a latexmk .fdb file to extract and record metrics
* about the files used during compilation.
*
* It categorizes files into 'system' (paths starting with '/') and 'user'
* files. For each category, it aggregates the count and total size of files
* grouped by their extension. The results are added to the provided stats
* object under `stats.latexmk['fdb-file-types']`.
*
* Each file is counted only once, even if it appears multiple times in the
* .fdb content. Files without a valid extension are counted as 'other'.
*
* @param {string | null | undefined} fdbContent - The content of the .fdb_latexmk file.
* @param {object} stats - The statistics object to be populated. The metrics will be added to `stats.latexmk`.
* @returns {void}
*/
function addLatexFdbMetrics(fdbContent, stats) {
if (!fdbContent) {
return
}
const { systemFileTypes, userFileTypes, dependencies } =
parseFdbContent(fdbContent)
if (
Object.keys(systemFileTypes).length > 0 ||
Object.keys(userFileTypes).length > 0
) {
const userSummary = summarizeFileTypes(userFileTypes)
const systemSummary = summarizeFileTypes(systemFileTypes)
stats.latexmk['fdb-file-types'] = {
total: {
systemFileCount: systemSummary.total.count,
systemFileSize: systemSummary.total.size,
imageFileCount: userSummary.image.count,
imageFileSize: userSummary.image.size,
textFileCount: userSummary.text.count,
textFileSize: userSummary.text.size,
fontFileCount: userSummary.font.count,
fontFileSize: userSummary.font.size,
otherFileCount: userSummary.other.count,
otherFileSize: userSummary.other.size,
},
system: convertToArray(systemFileTypes),
user: convertToArray(userFileTypes),
}
}
if (dependencies.length > 0) {
stats.latexmk['fdb-dependencies'] = dependencies
}
}
function parseFdbContent(fdbContent) {
const systemFileTypes = {}
const userFileTypes = {}
const seenFiles = new Set()
const dependencies = new Set()
// Extract the file path and size from lines like:
// FILENAME TIMESTAMP SIZE CHECKSUM ...
// "main.tex" 1760016467 6147 9da336eb1132ecb1d61cd1f5a70cfa62 ""
// The timestamp can be an integer or a float
const lineRegex = /^\s*"([^"]+)"\s+\d+(?:\.\d*)?\s+(\d+)/gm
for (const match of fdbContent.matchAll(lineRegex)) {
// strip leading /compile/ from paths
const filePath = match[1].replace(/^\/compile\//, '')
if (seenFiles.has(filePath)) {
continue
}
const fileSize = parseInt(match[2], 10)
const isSystemFile = filePath.startsWith('/')
const extMatch = filePath.match(/\.([a-z]{1,4})$/i)
const ext = extMatch ? extMatch[1].toLowerCase() : 'other'
const fileTypes = isSystemFile ? systemFileTypes : userFileTypes
if (!fileTypes[ext]) {
fileTypes[ext] = { count: 0, size: 0 }
}
fileTypes[ext].count++
fileTypes[ext].size += fileSize
const depMatch = filePath.match(NOTEWORTHY_DEPENDENCIES_REGEXP)
if (depMatch) {
dependencies.add(depMatch[1])
}
seenFiles.add(filePath)
}
return {
systemFileTypes,
userFileTypes,
dependencies: Array.from(dependencies),
}
}
function getFileTypeCategory(ext) {
switch (ext) {
case 'png':
case 'jpg':
case 'jpeg':
case 'tif':
case 'tiff':
case 'bmp':
case 'gif':
case 'svg':
case 'eps':
return 'image'
case 'tex':
case 'sty':
case 'cls':
case 'bib':
return 'text'
case 'ttf':
case 'otf':
case 'pfb':
return 'font'
default:
return 'other'
}
}
function summarizeFileTypes(fileTypes) {
const summary = {
image: { count: 0, size: 0 },
text: { count: 0, size: 0 },
font: { count: 0, size: 0 },
other: { count: 0, size: 0 },
total: { count: 0, size: 0 },
}
for (const [ext, info] of Object.entries(fileTypes)) {
const category = getFileTypeCategory(ext)
summary[category].count += info.count
summary[category].size += info.size
summary.total.count += info.count
summary.total.size += info.size
}
return summary
}
function convertToArray(object) {
return Object.entries(object)
.map(([ext, value]) => ({
ext,
count: value.count,
size: value.size,
}))
.sort((a, b) => b.size - a.size) // sort by size descending
}
export default { enableLatexMkMetrics, addLatexMkMetrics, addLatexFdbMetrics }