From 07e4eb4dfb9b044a58aff277ca97cd5c5d65c201 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 23 Oct 2019 17:07:23 +0100 Subject: [PATCH 1/3] Add mechanism to expire old prometheus metrics Adds a wrapper around the prometheus client, which keeps track of the last time a metrics was accessed, and removes old ones once they have not been accessed for a period of time. --- libraries/metrics/metrics.coffee | 63 +++----------- libraries/metrics/package.json | 2 +- libraries/metrics/prom_wrapper.coffee | 114 ++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 51 deletions(-) create mode 100644 libraries/metrics/prom_wrapper.coffee diff --git a/libraries/metrics/metrics.coffee b/libraries/metrics/metrics.coffee index 0f65c7b1ea..03e492be8e 100644 --- a/libraries/metrics/metrics.coffee +++ b/libraries/metrics/metrics.coffee @@ -3,30 +3,25 @@ if process.env["USE_PROM_METRICS"] != "true" else console.log("using prometheus") - -prom = require('prom-client') -Register = require('prom-client').register +prom = require('./prom_wrapper') collectDefaultMetrics = prom.collectDefaultMetrics appname = "unknown" hostname = require('os').hostname() -buildKey = (key)-> "#{name}.#{hostname}.#{key}" -buildGlobalKey = (key)-> "#{name}.global.#{key}" - -promMetrics = {} - destructors = [] require "./uv_threadpool_size" module.exports = Metrics = - register:Register - initialize: (_name) -> + register: prom.registry + + initialize: (_name, opts = {}) -> appname = _name collectDefaultMetrics({ timeout: 5000, prefix: Metrics.buildPromKey()}) - + if opts.ttlInMinutes + prom.ttlInMinutes = opts.ttlInMinutes console.log("ENABLE_TRACE_AGENT set to #{process.env['ENABLE_TRACE_AGENT']}") if process.env['ENABLE_TRACE_AGENT'] == "true" @@ -67,8 +62,8 @@ module.exports = Metrics = injectMetricsRoute: (app) -> app.get('/metrics', (req, res) -> - res.set('Content-Type', Register.contentType) - res.end(Register.metrics()) + res.set('Content-Type', prom.registry.contentType) + res.end(prom.registry.metrics()) ) buildPromKey: (key = "")-> @@ -82,43 +77,23 @@ module.exports = Metrics = inc : (key, sampleRate = 1, opts = {})-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Counter({ - name: key, - help: key, - labelNames: ['app','host','status','method', 'path'] - }) opts.app = appname opts.host = hostname - promMetrics[key].inc(opts) + prom.metric('counter', key).inc(opts) if process.env['DEBUG_METRICS'] console.log("doing inc", key, opts) count : (key, count, sampleRate = 1)-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Counter({ - name: key, - help: key, - labelNames: ['app','host'] - }) - promMetrics[key].inc({app: appname, host: hostname}, count) + prom.metric('counter', key).inc({app: appname, host: hostname}, count) if process.env['DEBUG_METRICS'] console.log("doing count/inc", key, opts) timing: (key, timeSpan, sampleRate, opts = {})-> key = Metrics.buildPromKey("timer_" + key) - if !promMetrics[key]? - promMetrics[key] = new prom.Summary({ - name: key, - help: key, - maxAgeSeconds: 600, - ageBuckets: 10, - labelNames: ['app', 'host', 'path', 'status_code', 'method', 'collection', 'query'] - }) opts.app = appname opts.host = hostname - promMetrics[key].observe(opts, timeSpan) + prom.metric('summary', key).observe(opts, timeSpan) if process.env['DEBUG_METRICS'] console.log("doing timing", key, opts) @@ -137,25 +112,13 @@ module.exports = Metrics = gauge : (key, value, sampleRate = 1, opts)-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Gauge({ - name: key, - help: key, - labelNames: ['app','host', 'status'] - }) - promMetrics[key].set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value)) + prom.metric('gague', key).set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value)) if process.env['DEBUG_METRICS'] console.log("doing gauge", key, opts) globalGauge: (key, value, sampleRate = 1, opts)-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Gauge({ - name: key, - help: key, - labelNames: ['app','host', 'status'] - }) - promMetrics[key].set({app: appname, status: opts?.status},this.sanitizeValue(value)) + prom.metric('gague', key).set({app: appname, status: opts?.status},this.sanitizeValue(value)) mongodb: require "./mongodb" http: require "./http" diff --git a/libraries/metrics/package.json b/libraries/metrics/package.json index 24daa5da46..86f2e39a9e 100644 --- a/libraries/metrics/package.json +++ b/libraries/metrics/package.json @@ -1,6 +1,6 @@ { "name": "metrics-sharelatex", - "version": "2.2.0", + "version": "2.3.0", "description": "A drop-in metrics and monitoring module for node.js apps", "repository": { "type": "git", diff --git a/libraries/metrics/prom_wrapper.coffee b/libraries/metrics/prom_wrapper.coffee new file mode 100644 index 0000000000..eb2c132314 --- /dev/null +++ b/libraries/metrics/prom_wrapper.coffee @@ -0,0 +1,114 @@ +prom = require('prom-client') +registry = require('prom-client').register +metrics = {} + + +optsKey = (opts) -> + keys = Object.keys(opts) + return '' if keys.length == 0 + + keys = keys.sort() + + hash = ''; + for key in keys + hash += "," if hash.length + hash += "#{key}:#{opts[key]}" + + return hash + +extendOpts = (opts, labelNames) -> + for label in labelNames + opts[label] ||= '' + return opts + +optsAsArgs = (opts, labelNames) -> + args = [] + for label in labelNames + args.push(opts[label] || '') + return args + + +PromWrapper = + ttlInMinutes: 0 + registry: registry + + metric: (type, name) -> + registry.getSingleMetric(name) || new MetricWrapper(type, name) + + collectDefaultMetrics: prom.collectDefaultMetrics + + +class MetricWrapper + constructor: (type, name) -> + metrics[name] = this + @name = name + @instances = {} + @lastAccess = new Date() + @metric = switch type + when "counter" + new prom.Counter({ + name: name, + help: name, + labelNames: ['app','host','status','method', 'path'] + }) + when "summary" + new prom.Summary({ + name: name, + help: name, + maxAgeSeconds: 600, + ageBuckets: 10, + labelNames: ['app', 'host', 'path', 'status_code', 'method', 'collection', 'query'] + }) + when "gauge" + prom.Gauge({ + name: name, + help: name, + labelNames: ['app','host', 'status'] + }) + + inc: (opts, value) -> + @_execMethod 'inc', opts, value + + observe: (opts, value) -> + @_execMethod 'observe', opts, value + + set: (opts, value) -> + @_execMethod 'set', opts, value + + sweep: () -> + thresh = new Date(Date.now() - 1000 * 60 * PromWrapper.ttlInMinutes) + for key in Object.keys(@instances) + if thresh > @instances[key].time + if process.env['DEBUG_METRICS'] + console.log("Sweeping stale metric instance", @name, opts: @instances[key].opts, key) + @metric.remove(optsAsArgs(@instances[key].opts, @metric.labelNames)...) + + if thresh > @lastAccess + if process.env['DEBUG_METRICS'] + console.log("Sweeping stale metric", @name) + delete metrics[@name] + registry.removeSingleMetric(@name) + + _execMethod: (method, opts, value) -> + opts = extendOpts(opts, @metric.labelNames) + key = optsKey(opts) + @instances[key] = { time: new Date(), opts } unless key == '' + @lastAccess = new Date() + @metric[method](opts, value) + + +unless PromWrapper.sweepRegistered + if process.env['DEBUG_METRICS'] + console.log("Registering sweep method") + PromWrapper.sweepRegistered = true + setInterval( + () -> + if PromWrapper.ttlInMinutes + if process.env['DEBUG_METRICS'] + console.log("Sweeping metrics") + for key in Object.keys(metrics) + metrics[key].sweep() + 60000) + + +module.exports = PromWrapper From e0cf10a88684434dcb2004d8ede293ce000293b2 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 28 Oct 2019 12:34:04 +0000 Subject: [PATCH 2/3] Fix typo, gague -> gauge --- libraries/metrics/metrics.coffee | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libraries/metrics/metrics.coffee b/libraries/metrics/metrics.coffee index 03e492be8e..256eb3a854 100644 --- a/libraries/metrics/metrics.coffee +++ b/libraries/metrics/metrics.coffee @@ -112,13 +112,13 @@ module.exports = Metrics = gauge : (key, value, sampleRate = 1, opts)-> key = Metrics.buildPromKey(key) - prom.metric('gague', key).set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value)) + prom.metric('gauge', key).set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value)) if process.env['DEBUG_METRICS'] console.log("doing gauge", key, opts) globalGauge: (key, value, sampleRate = 1, opts)-> key = Metrics.buildPromKey(key) - prom.metric('gague', key).set({app: appname, status: opts?.status},this.sanitizeValue(value)) + prom.metric('gauge', key).set({app: appname, status: opts?.status},this.sanitizeValue(value)) mongodb: require "./mongodb" http: require "./http" From feecda8ea8efa71e3bac8aac0ca2e402500e6873 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Mon, 28 Oct 2019 12:40:12 +0000 Subject: [PATCH 3/3] Use map instead of hash for metrics --- libraries/metrics/prom_wrapper.coffee | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/libraries/metrics/prom_wrapper.coffee b/libraries/metrics/prom_wrapper.coffee index eb2c132314..cfcca4c8e4 100644 --- a/libraries/metrics/prom_wrapper.coffee +++ b/libraries/metrics/prom_wrapper.coffee @@ -1,6 +1,6 @@ prom = require('prom-client') registry = require('prom-client').register -metrics = {} +metrics = new Map() optsKey = (opts) -> @@ -33,16 +33,16 @@ PromWrapper = registry: registry metric: (type, name) -> - registry.getSingleMetric(name) || new MetricWrapper(type, name) + metrics.get(name) || new MetricWrapper(type, name) collectDefaultMetrics: prom.collectDefaultMetrics class MetricWrapper constructor: (type, name) -> - metrics[name] = this + metrics.set(name, this) @name = name - @instances = {} + @instances = new Map() @lastAccess = new Date() @metric = switch type when "counter" @@ -77,22 +77,22 @@ class MetricWrapper sweep: () -> thresh = new Date(Date.now() - 1000 * 60 * PromWrapper.ttlInMinutes) - for key in Object.keys(@instances) - if thresh > @instances[key].time + @instances.forEach (instance, key) => + if thresh > instance.time if process.env['DEBUG_METRICS'] - console.log("Sweeping stale metric instance", @name, opts: @instances[key].opts, key) - @metric.remove(optsAsArgs(@instances[key].opts, @metric.labelNames)...) + console.log("Sweeping stale metric instance", @name, opts: instance.opts, key) + @metric.remove(optsAsArgs(instance.opts, @metric.labelNames)...) if thresh > @lastAccess if process.env['DEBUG_METRICS'] - console.log("Sweeping stale metric", @name) - delete metrics[@name] + console.log("Sweeping stale metric", @name, thresh, @lastAccess) + metrics.delete(@name) registry.removeSingleMetric(@name) _execMethod: (method, opts, value) -> opts = extendOpts(opts, @metric.labelNames) key = optsKey(opts) - @instances[key] = { time: new Date(), opts } unless key == '' + @instances.set(key, { time: new Date(), opts }) unless key == '' @lastAccess = new Date() @metric[method](opts, value) @@ -106,8 +106,8 @@ unless PromWrapper.sweepRegistered if PromWrapper.ttlInMinutes if process.env['DEBUG_METRICS'] console.log("Sweeping metrics") - for key in Object.keys(metrics) - metrics[key].sweep() + metrics.forEach (metric, key) => + metric.sweep() 60000)