Files
overleaf-cep/services/clsi/app.js
Mathias Jakobsen 9c97876268 [web]+clsi] Allow docx import via pandoc (#32004)
Co-authored-by: Jakob Ackermann <jakob.ackermann@overleaf.com>
GitOrigin-RevId: 246b3290ec04867f71545b1a7c5d95d0f68379ff
2026-03-27 09:06:23 +00:00

332 lines
9.9 KiB
JavaScript

// Metrics must be initialized before importing anything else
import '@overleaf/metrics/initialize.js'
import CompileController from './app/js/CompileController.js'
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import LoggerSerializers from './app/js/LoggerSerializers.js'
import Metrics from '@overleaf/metrics'
import smokeTest from './test/smoke/js/SmokeTests.js'
import Errors from './app/js/Errors.js'
import OutputController from './app/js/OutputController.js'
import ProjectPersistenceManager from './app/js/ProjectPersistenceManager.js'
import OutputCacheManager from './app/js/OutputCacheManager.js'
import express from 'express'
import bodyParser from 'body-parser'
import net from 'node:net'
import os from 'node:os'
import OError from '@overleaf/o-error'
import ConversionController from './app/js/ConversionController.js'
import FileUploadMiddleware from './app/js/FileUploadMiddleware.js'
logger.initialize('clsi')
logger.logger.serializers.clsiRequest = LoggerSerializers.clsiRequest
Metrics.open_sockets.monitor(true)
Metrics.memory.monitor(logger)
Metrics.leaked_sockets.monitor(logger)
ProjectPersistenceManager.init()
OutputCacheManager.init()
const app = express()
Metrics.injectMetricsRoute(app)
app.use(Metrics.http.monitor(logger))
// Compile requests can take longer than the default two
// minutes (including file download time), so bump up the
// timeout a bit.
const TIMEOUT = 630 * 1000 // 10.5 minutes - 30 seconds download allowance
app.use(function (req, res, next) {
req.setTimeout(TIMEOUT)
res.setTimeout(TIMEOUT)
res.removeHeader('X-Powered-By')
next()
})
app.param('project_id', function (req, res, next, projectId) {
if (projectId?.match(/^[a-zA-Z0-9_-]+$/)) {
next()
} else {
next(new Error('invalid project id'))
}
})
app.param('user_id', function (req, res, next, userId) {
if (userId?.match(/^[0-9a-f]{24}$/)) {
next()
} else {
next(new Error('invalid user id'))
}
})
app.param('build_id', function (req, res, next, buildId) {
if (buildId?.match(OutputCacheManager.BUILD_REGEX)) {
next()
} else {
next(new OError('invalid build id', { buildId }))
}
})
app.post(
'/project/:project_id/compile',
bodyParser.json({ limit: Settings.compileSizeLimit }),
CompileController.compile
)
app.post('/project/:project_id/compile/stop', CompileController.stopCompile)
app.delete('/project/:project_id', CompileController.clearCache)
app.get('/project/:project_id/sync/code', CompileController.syncFromCode)
app.get('/project/:project_id/sync/pdf', CompileController.syncFromPdf)
app.get('/project/:project_id/wordcount', CompileController.wordcount)
app.get('/project/:project_id/status', CompileController.status)
app.post('/project/:project_id/status', CompileController.status)
// Per-user containers
app.post(
'/project/:project_id/user/:user_id/compile',
bodyParser.json({ limit: Settings.compileSizeLimit }),
CompileController.compile
)
app.post(
'/project/:project_id/user/:user_id/compile/stop',
CompileController.stopCompile
)
app.delete('/project/:project_id/user/:user_id', CompileController.clearCache)
app.get(
'/project/:project_id/user/:user_id/sync/code',
CompileController.syncFromCode
)
app.get(
'/project/:project_id/user/:user_id/sync/pdf',
CompileController.syncFromPdf
)
app.get(
'/project/:project_id/user/:user_id/wordcount',
CompileController.wordcount
)
// This needs to be before GET /project/:project_id/build/:build_id/output/*
app.get(
'/project/:project_id/build/:build_id/output/output.zip',
bodyParser.json(),
OutputController.createOutputZip
)
// This needs to be before GET /project/:project_id/user/:user_id/build/:build_id/output/*
app.get(
'/project/:project_id/user/:user_id/build/:build_id/output/output.zip',
bodyParser.json(),
OutputController.createOutputZip
)
// Conversion endpoints
app.post(
'/convert/docx-to-latex',
FileUploadMiddleware.multerMiddleware,
ConversionController.convertDocxToLaTeX
)
if (process.env.NODE_ENV === 'development' && global.__coverage__) {
app.get('/coverage', (req, res) => {
const coverage = {}
for (const [key, value] of Object.entries(global.__coverage__)) {
coverage[key] = {
...value,
path: value.path.replace('/overleaf/', '/workspace/'),
}
}
res.json({ coverage })
})
}
app.get('/status', (req, res, next) => res.send('CLSI is alive\n'))
Settings.processTooOld = false
if (Settings.processLifespanLimitMs) {
// Pre-emp instances have a maximum lifespan of 24h after which they will be
// shutdown, with a 30s grace period.
// Spread cycling of VMs by up-to 2.4h _before_ their limit to avoid large
// numbers of VMs that are temporarily unavailable (while they reboot).
Settings.processLifespanLimitMs -=
Settings.processLifespanLimitMs * (Math.random() / 10)
logger.info(
{ target: new Date(Date.now() + Settings.processLifespanLimitMs) },
'Lifespan limited'
)
setTimeout(() => {
logger.info({}, 'shutting down, process is too old')
Settings.processTooOld = true
}, Settings.processLifespanLimitMs)
}
function runSmokeTest() {
if (Settings.processTooOld) return
const INTERVAL = 30 * 1000
if (
smokeTest.lastRunSuccessful() &&
CompileController.timeSinceLastSuccessfulCompile() < INTERVAL / 2
) {
logger.debug('skipping smoke tests, got recent successful user compile')
return setTimeout(runSmokeTest, INTERVAL / 2)
}
logger.debug('running smoke tests')
smokeTest.triggerRun(err => {
if (err) logger.error({ err }, 'smoke tests failed')
setTimeout(runSmokeTest, INTERVAL)
})
}
if (Settings.smokeTest) {
runSmokeTest()
}
app.get('/health_check', function (req, res) {
if (Settings.processTooOld) {
return res.status(500).json({ processTooOld: true })
}
if (ProjectPersistenceManager.isAnyDiskCriticalLow()) {
return res.status(500).json({ diskCritical: true })
}
smokeTest.sendLastResult(res)
})
app.get('/smoke_test_force', (req, res) => smokeTest.sendNewResult(res))
app.use(function (error, req, res, next) {
if (error instanceof Errors.NotFoundError) {
logger.debug({ err: error, url: req.url }, 'not found error')
res.sendStatus(404)
} else if (error instanceof Errors.InvalidParameter) {
res.status(400).send(error.message)
} else if (error.code === 'EPIPE') {
// inspect container returns EPIPE when shutting down
res.sendStatus(503) // send 503 Unavailable response
} else {
logger.error({ err: error, url: req.url }, 'server error')
res.sendStatus(error.statusCode || 500)
}
})
let STATE = 'up'
const loadTcpServer = net.createServer(function (socket) {
socket.on('error', function (err) {
if (err.code === 'ECONNRESET') {
// this always comes up, we don't know why
return
}
logger.err({ err }, 'error with socket on load check')
socket.destroy()
})
if (STATE === 'up' && Settings.internal.load_balancer_agent.report_load) {
let availableWorkingCpus
const currentLoad = os.loadavg()[0]
// staging clis's have 1 cpu core only
if (os.cpus().length === 1) {
availableWorkingCpus = 1
} else {
availableWorkingCpus = os.cpus().length - 1
}
const freeLoad = availableWorkingCpus - currentLoad
let freeLoadPercentage = Math.round((freeLoad / availableWorkingCpus) * 100)
if (ProjectPersistenceManager.isAnyDiskCriticalLow()) {
freeLoadPercentage = 0
}
if (ProjectPersistenceManager.isAnyDiskLow()) {
freeLoadPercentage = freeLoadPercentage / 2
}
if (
Settings.internal.load_balancer_agent.allow_maintenance &&
freeLoadPercentage <= 0
) {
// When its 0 the server is set to drain implicitly.
// Drain will move new projects to different servers.
// Drain will keep existing projects assigned to the same server.
// Maint will more existing and new projects to different servers.
socket.write(`maint, 0%\n`, 'ASCII')
} else {
// Ready will cancel the maint state.
socket.write(`up, ready, ${Math.max(freeLoadPercentage, 1)}%\n`, 'ASCII')
if (freeLoadPercentage <= 0) {
// This metric records how often we would have gone into maintenance mode.
Metrics.inc('clsi-prevented-maint')
}
}
socket.end()
} else {
socket.write(`${STATE}\n`, 'ASCII')
socket.end()
}
})
const loadHttpServer = express()
loadHttpServer.post('/state/up', function (req, res, next) {
STATE = 'up'
logger.debug('getting message to set server to down')
res.sendStatus(204)
})
loadHttpServer.post('/state/down', function (req, res, next) {
STATE = 'down'
logger.debug('getting message to set server to down')
res.sendStatus(204)
})
loadHttpServer.post('/state/maint', function (req, res, next) {
STATE = 'maint'
logger.debug('getting message to set server to maint')
res.sendStatus(204)
})
const port = Settings.internal.clsi.port
const host = Settings.internal.clsi.host
const loadTcpPort = Settings.internal.load_balancer_agent.load_port
const loadHttpPort = Settings.internal.load_balancer_agent.local_port
if (import.meta.main) {
// Called directly
// handle uncaught exceptions when running in production
if (Settings.catchErrors) {
process.removeAllListeners('uncaughtException')
process.on('uncaughtException', error =>
logger.error({ err: error }, 'uncaughtException')
)
}
app.listen(port, host, error => {
if (error) {
logger.fatal({ error }, `Error starting CLSI on ${host}:${port}`)
} else {
logger.debug(`CLSI starting up, listening on ${host}:${port}`)
}
})
loadTcpServer.listen(loadTcpPort, host, function (error) {
if (error != null) {
throw error
}
logger.debug(`Load tcp agent listening on load port ${loadTcpPort}`)
})
loadHttpServer.listen(loadHttpPort, host, function (error) {
if (error != null) {
throw error
}
logger.debug(`Load http agent listening on load port ${loadHttpPort}`)
})
}
export default app