From 812a2704fa2f52a61bff19678e2ea315bc7903ad Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Tue, 15 Jul 2025 18:24:57 +0200 Subject: [PATCH] [clsi-cache] meter ingress and egress bandwidth (#27143) * [mics] fix "app" label in clsi-cache metrics in dev-env * [clsi-cache] validate filePath when processing file * [clsi-cache] meter ingress and egress bandwidth Files are downloaded directly from nginx, hence we cannot meter egress in clsi-cache easily. GitOrigin-RevId: 24de8c41728f0e9c984113c1470dec6153e75f20 --- libraries/stream-utils/index.js | 19 +++++++++++++++++++ package-lock.json | 3 +++ services/clsi/app/js/CLSICacheHandler.js | 10 +++++++++- services/clsi/package.json | 1 + .../Features/Compile/ClsiCacheController.js | 10 +++++++++- .../src/Features/Compile/ClsiCacheHandler.js | 16 ++++++++++++++++ .../src/Features/Compile/ClsiCacheManager.js | 12 +++++++++++- services/web/package.json | 1 + 8 files changed, 69 insertions(+), 3 deletions(-) diff --git a/libraries/stream-utils/index.js b/libraries/stream-utils/index.js index e4c7d60c94..7719d409a4 100644 --- a/libraries/stream-utils/index.js +++ b/libraries/stream-utils/index.js @@ -145,6 +145,24 @@ class LoggerStream extends Transform { } } +class MeteredStream extends Transform { + #Metrics + #metric + #labels + + constructor(Metrics, metric, labels) { + super() + this.#Metrics = Metrics + this.#metric = metric + this.#labels = labels + } + + _transform(chunk, encoding, callback) { + this.#Metrics.count(this.#metric, chunk.byteLength, 1, this.#labels) + callback(null, chunk) + } +} + // Export our classes module.exports = { @@ -153,6 +171,7 @@ module.exports = { LoggerStream, LimitedStream, TimeoutStream, + MeteredStream, SizeExceededError, AbortError, } diff --git a/package-lock.json b/package-lock.json index d615e38f9a..bb23eb3f78 100644 --- a/package-lock.json +++ b/package-lock.json @@ -42590,6 +42590,7 @@ "@overleaf/o-error": "*", "@overleaf/promise-utils": "*", "@overleaf/settings": "*", + "@overleaf/stream-utils": "*", "archiver": "5.3.2", "async": "^3.2.5", "body-parser": "^1.20.3", @@ -42625,6 +42626,7 @@ "@overleaf/o-error": "*", "@overleaf/promise-utils": "*", "@overleaf/settings": "*", + "@overleaf/stream-utils": "*", "body-parser": "^1.20.3", "bunyan": "^1.8.15", "celebrate": "^15.0.3", @@ -44807,6 +44809,7 @@ "@overleaf/promise-utils": "*", "@overleaf/redis-wrapper": "*", "@overleaf/settings": "*", + "@overleaf/stream-utils": "*", "@phosphor-icons/react": "^2.1.7", "@slack/webhook": "^7.0.2", "@stripe/stripe-js": "^7.3.0", diff --git a/services/clsi/app/js/CLSICacheHandler.js b/services/clsi/app/js/CLSICacheHandler.js index b9415ae3ec..26acd221f9 100644 --- a/services/clsi/app/js/CLSICacheHandler.js +++ b/services/clsi/app/js/CLSICacheHandler.js @@ -13,6 +13,7 @@ const { const logger = require('@overleaf/logger') const Metrics = require('@overleaf/metrics') const Settings = require('@overleaf/settings') +const { MeteredStream } = require('@overleaf/stream-utils') const { CACHE_SUBDIR } = require('./OutputCacheManager') const { isExtraneousFile } = require('./ResourceWriter') @@ -204,7 +205,13 @@ async function downloadOutputDotSynctexFromCompileCache( const dst = Path.join(outputDir, 'output.synctex.gz') const tmp = dst + crypto.randomUUID() try { - await pipeline(stream, fs.createWriteStream(tmp)) + await pipeline( + stream, + new MeteredStream(Metrics, 'clsi_cache_egress', { + path: 'output.synctex.gz', + }), + fs.createWriteStream(tmp) + ) await fs.promises.rename(tmp, dst) } catch (err) { try { @@ -253,6 +260,7 @@ async function downloadLatestCompileCache(projectId, userId, compileDir) { let abort = false await pipeline( stream, + new MeteredStream(Metrics, 'clsi_cache_egress', { path: 'output.tar.gz' }), createGunzip(), tarFs.extract(compileDir, { // use ignore hook for counting entries (files+folders) and validation. diff --git a/services/clsi/package.json b/services/clsi/package.json index f6fa7aff01..fe31c430bd 100644 --- a/services/clsi/package.json +++ b/services/clsi/package.json @@ -23,6 +23,7 @@ "@overleaf/o-error": "*", "@overleaf/promise-utils": "*", "@overleaf/settings": "*", + "@overleaf/stream-utils": "*", "archiver": "5.3.2", "async": "^3.2.5", "body-parser": "^1.20.3", diff --git a/services/web/app/src/Features/Compile/ClsiCacheController.js b/services/web/app/src/Features/Compile/ClsiCacheController.js index d76f0a02bd..6065b25578 100644 --- a/services/web/app/src/Features/Compile/ClsiCacheController.js +++ b/services/web/app/src/Features/Compile/ClsiCacheController.js @@ -11,6 +11,8 @@ const CompileController = require('./CompileController') const { expressify } = require('@overleaf/promise-utils') const ClsiCacheHandler = require('./ClsiCacheHandler') const ProjectGetter = require('../Project/ProjectGetter') +const { MeteredStream } = require('@overleaf/stream-utils') +const Metrics = require('@overleaf/metrics') /** * Download a file from a specific build on the clsi-cache. @@ -64,7 +66,13 @@ async function downloadFromCache(req, res) { ) try { res.writeHead(response.status) - await pipeline(stream, res) + await pipeline( + stream, + new MeteredStream(Metrics, 'clsi_cache_egress', { + path: ClsiCacheHandler.getEgressLabel(filename), + }), + res + ) } catch (err) { const reqAborted = Boolean(req.destroyed) const streamingStarted = Boolean(res.headersSent) diff --git a/services/web/app/src/Features/Compile/ClsiCacheHandler.js b/services/web/app/src/Features/Compile/ClsiCacheHandler.js index bb0414bf03..4f74d237d6 100644 --- a/services/web/app/src/Features/Compile/ClsiCacheHandler.js +++ b/services/web/app/src/Features/Compile/ClsiCacheHandler.js @@ -34,6 +34,21 @@ function validateFilename(filename) { } } +/** + * Keep in sync with getIngressLabel in services/clsi-cache/app/js/utils.js + * + * @param {string} fsPath + * @return {string} + */ +function getEgressLabel(fsPath) { + if (fsPath.endsWith('.blg')) { + // .blg files may have custom names and can be in nested folders. + return 'output.blg' + } + // The rest is limited to 5 file names via validateFilename: output.pdf, etc. + return fsPath +} + /** * Clear the cache on all clsi-cache instances. * @@ -224,6 +239,7 @@ async function prepareCacheSource( } module.exports = { + getEgressLabel, clearCache, getOutputFile, getLatestOutputFile, diff --git a/services/web/app/src/Features/Compile/ClsiCacheManager.js b/services/web/app/src/Features/Compile/ClsiCacheManager.js index b1ef2a46ac..0c172642ee 100644 --- a/services/web/app/src/Features/Compile/ClsiCacheManager.js +++ b/services/web/app/src/Features/Compile/ClsiCacheManager.js @@ -7,6 +7,7 @@ const SplitTestHandler = require('../SplitTests/SplitTestHandler') const UserGetter = require('../User/UserGetter') const Settings = require('@overleaf/settings') const { fetchJson, RequestFailedError } = require('@overleaf/fetch-utils') +const Metrics = require('@overleaf/metrics') /** * Get the most recent build and metadata @@ -71,7 +72,13 @@ async function getLatestCompileResult(projectId, userId) { async function tryGetLatestCompileResult(projectId, userId, signal) { const { internal: { location: metaLocation }, - external: { isUpToDate, allFiles, zone, shard: clsiCacheShard }, + external: { + isUpToDate, + allFiles, + zone, + shard: clsiCacheShard, + size: jsonSize, + }, } = await getLatestBuildFromCache( projectId, userId, @@ -93,6 +100,9 @@ async function tryGetLatestCompileResult(projectId, userId, signal) { } throw err } + Metrics.count('clsi_cache_egress', jsonSize, 1, { + path: ClsiCacheHandler.getEgressLabel('output.overleaf.json'), + }) const [, editorId, buildId] = metaLocation.match( /\/build\/([a-f0-9-]+?)-([a-f0-9]+-[a-f0-9]+)\// diff --git a/services/web/package.json b/services/web/package.json index 45f8453242..b0cee1af06 100644 --- a/services/web/package.json +++ b/services/web/package.json @@ -91,6 +91,7 @@ "@overleaf/promise-utils": "*", "@overleaf/redis-wrapper": "*", "@overleaf/settings": "*", + "@overleaf/stream-utils": "*", "@phosphor-icons/react": "^2.1.7", "@slack/webhook": "^7.0.2", "@stripe/stripe-js": "^7.3.0",