mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-02 21:59:00 +02:00
8c39add865
* [mics] fix "app" label in clsi-cache metrics in dev-env * [clsi-cache] validate filePath when processing file * [clsi-cache] meter ingress and egress bandwidth Files are downloaded directly from nginx, hence we cannot meter egress in clsi-cache easily. GitOrigin-RevId: 24de8c41728f0e9c984113c1470dec6153e75f20
248 lines
6.4 KiB
JavaScript
248 lines
6.4 KiB
JavaScript
const _ = require('lodash')
|
|
const {
|
|
fetchNothing,
|
|
fetchRedirectWithResponse,
|
|
RequestFailedError,
|
|
} = require('@overleaf/fetch-utils')
|
|
const logger = require('@overleaf/logger')
|
|
const Settings = require('@overleaf/settings')
|
|
const OError = require('@overleaf/o-error')
|
|
const { NotFoundError, InvalidNameError } = require('../Errors/Errors')
|
|
|
|
/**
|
|
* Keep in sync with validateFilename in services/clsi-cache/app/js/utils.js
|
|
*
|
|
* @param {string} filename
|
|
*/
|
|
function validateFilename(filename) {
|
|
if (filename.split('/').includes('..')) {
|
|
throw new InvalidNameError('path traversal')
|
|
}
|
|
if (
|
|
!(
|
|
[
|
|
'output.blg',
|
|
'output.log',
|
|
'output.pdf',
|
|
'output.synctex.gz',
|
|
'output.overleaf.json',
|
|
'output.tar.gz',
|
|
].includes(filename) || filename.endsWith('.blg')
|
|
)
|
|
) {
|
|
throw new InvalidNameError('bad filename')
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Keep in sync with getIngressLabel in services/clsi-cache/app/js/utils.js
|
|
*
|
|
* @param {string} fsPath
|
|
* @return {string}
|
|
*/
|
|
function getEgressLabel(fsPath) {
|
|
if (fsPath.endsWith('.blg')) {
|
|
// .blg files may have custom names and can be in nested folders.
|
|
return 'output.blg'
|
|
}
|
|
// The rest is limited to 5 file names via validateFilename: output.pdf, etc.
|
|
return fsPath
|
|
}
|
|
|
|
/**
|
|
* Clear the cache on all clsi-cache instances.
|
|
*
|
|
* @param projectId
|
|
* @param userId
|
|
* @return {Promise<void>}
|
|
*/
|
|
async function clearCache(projectId, userId) {
|
|
let path = `/project/${projectId}`
|
|
if (userId) {
|
|
path += `/user/${userId}`
|
|
}
|
|
path += '/output'
|
|
|
|
await Promise.all(
|
|
Settings.apis.clsiCache.instances.map(async ({ url, shard }) => {
|
|
const u = new URL(url)
|
|
u.pathname = path
|
|
try {
|
|
await fetchNothing(u, {
|
|
method: 'DELETE',
|
|
signal: AbortSignal.timeout(15_000),
|
|
})
|
|
} catch (err) {
|
|
throw OError.tag(err, 'clear clsi-cache', { url, shard })
|
|
}
|
|
})
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Get an output file from a specific build.
|
|
*
|
|
* @param projectId
|
|
* @param userId
|
|
* @param buildId
|
|
* @param filename
|
|
* @param signal
|
|
* @return {Promise<{size: number, zone: string, shard: string, location: string, lastModified: Date, allFiles: string[]}>}
|
|
*/
|
|
async function getOutputFile(
|
|
projectId,
|
|
userId,
|
|
buildId,
|
|
filename,
|
|
signal = AbortSignal.timeout(15_000)
|
|
) {
|
|
validateFilename(filename)
|
|
if (!/^[a-f0-9-]+$/.test(buildId)) {
|
|
throw new InvalidNameError('bad buildId')
|
|
}
|
|
|
|
let path = `/project/${projectId}`
|
|
if (userId) {
|
|
path += `/user/${userId}`
|
|
}
|
|
path += `/build/${buildId}/search/output/${filename}`
|
|
return getRedirectWithFallback(projectId, userId, path, signal)
|
|
}
|
|
|
|
/**
|
|
* Get an output file from the most recent build.
|
|
*
|
|
* @param projectId
|
|
* @param userId
|
|
* @param filename
|
|
* @param signal
|
|
* @return {Promise<{size: number, zone: string, shard: string, location: string, lastModified: Date, allFiles: string[]}>}
|
|
*/
|
|
async function getLatestOutputFile(
|
|
projectId,
|
|
userId,
|
|
filename,
|
|
signal = AbortSignal.timeout(15_000)
|
|
) {
|
|
validateFilename(filename)
|
|
|
|
let path = `/project/${projectId}`
|
|
if (userId) {
|
|
path += `/user/${userId}`
|
|
}
|
|
path += `/latest/output/${filename}`
|
|
return getRedirectWithFallback(projectId, userId, path, signal)
|
|
}
|
|
|
|
/**
|
|
* Request the given path from any of the clsi-cache instances.
|
|
*
|
|
* Some of them might be down temporarily. Try the next one until we receive a redirect or 404.
|
|
*
|
|
* This function is similar to the Coordinator in the clsi-cache, notable differences:
|
|
* - all the logic for sorting builds is in clsi-cache (re-used by clsi and web)
|
|
* - fan-out (1 client performs lookup on many clsi-cache instances) is "central" in clsi-cache, resulting in better connection re-use
|
|
* - we only cross the k8s cluster boundary via an internal GCLB once ($$$)
|
|
*
|
|
* @param projectId
|
|
* @param userId
|
|
* @param path
|
|
* @param signal
|
|
* @return {Promise<{size: number, zone: string, shard: string, location: string, lastModified: Date, allFiles: string[]}>}
|
|
*/
|
|
async function getRedirectWithFallback(
|
|
projectId,
|
|
userId,
|
|
path,
|
|
signal = AbortSignal.timeout(15_000)
|
|
) {
|
|
// Avoid hitting the same instance first all the time.
|
|
const instances = _.shuffle(Settings.apis.clsiCache.instances)
|
|
for (const { url, shard } of instances) {
|
|
const u = new URL(url)
|
|
u.pathname = path
|
|
try {
|
|
const {
|
|
location,
|
|
response: { headers },
|
|
} = await fetchRedirectWithResponse(u, {
|
|
signal,
|
|
})
|
|
let allFilesRaw = headers.get('X-All-Files')
|
|
if (!allFilesRaw.startsWith('[')) {
|
|
allFilesRaw = Buffer.from(allFilesRaw, 'base64url').toString()
|
|
}
|
|
// Success, return the cache entry.
|
|
return {
|
|
location,
|
|
zone: headers.get('X-Zone'),
|
|
shard: headers.get('X-Shard') || 'cache',
|
|
lastModified: new Date(headers.get('X-Last-Modified')),
|
|
size: parseInt(headers.get('X-Content-Length'), 10),
|
|
allFiles: JSON.parse(allFilesRaw),
|
|
}
|
|
} catch (err) {
|
|
if (err instanceof RequestFailedError && err.response.status === 404) {
|
|
break // No clsi-cache instance has cached something for this project/user.
|
|
}
|
|
logger.warn(
|
|
{ err, projectId, userId, url, shard },
|
|
'getLatestOutputFile from clsi-cache failed'
|
|
)
|
|
// This clsi-cache instance is down, try the next backend.
|
|
}
|
|
}
|
|
throw new NotFoundError('nothing cached yet')
|
|
}
|
|
|
|
/**
|
|
* Populate the clsi-cache for the given project/user with the provided source
|
|
*
|
|
* This is either another project, or a template (id+version).
|
|
*
|
|
* @param projectId
|
|
* @param userId
|
|
* @param sourceProjectId
|
|
* @param templateId
|
|
* @param templateVersionId
|
|
* @param lastUpdated
|
|
* @param shard
|
|
* @param signal
|
|
* @return {Promise<void>}
|
|
*/
|
|
async function prepareCacheSource(
|
|
projectId,
|
|
userId,
|
|
{ sourceProjectId, templateId, templateVersionId, lastUpdated, shard, signal }
|
|
) {
|
|
const url = new URL(
|
|
`/project/${projectId}/user/${userId}/import-from`,
|
|
Settings.apis.clsiCache.instances.find(i => i.shard === shard).url
|
|
)
|
|
try {
|
|
await fetchNothing(url, {
|
|
method: 'POST',
|
|
json: {
|
|
sourceProjectId,
|
|
lastUpdated,
|
|
templateId,
|
|
templateVersionId,
|
|
},
|
|
signal,
|
|
})
|
|
} catch (err) {
|
|
if (err instanceof RequestFailedError && err.response.status === 404) {
|
|
throw new NotFoundError()
|
|
}
|
|
throw err
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
getEgressLabel,
|
|
clearCache,
|
|
getOutputFile,
|
|
getLatestOutputFile,
|
|
prepareCacheSource,
|
|
}
|