Merge pull request #22915 from overleaf/em-blob-caching

Improved caching for blobs

GitOrigin-RevId: c5113106ef239b201ae8f66bb3539a52c65ddb60
This commit is contained in:
Eric Mc Sween
2025-01-27 09:29:08 -05:00
committed by Copybot
parent 15ca1d003a
commit 77bf734d06
5 changed files with 136 additions and 51 deletions

View File

@@ -3,9 +3,12 @@
import { pipeline } from 'node:stream/promises'
import logger from '@overleaf/logger'
import { expressify } from '@overleaf/promise-utils'
import Metrics from '@overleaf/metrics'
import FileStoreHandler from './FileStoreHandler.js'
import ProjectLocator from '../Project/ProjectLocator.js'
import HistoryManager from '../History/HistoryManager.js'
import Errors from '../Errors/Errors.js'
import Features from '../../infrastructure/Features.js'
import { preparePlainTextResponse } from '../../infrastructure/Response.js'
async function getFile(req, res) {
@@ -44,16 +47,51 @@ async function getFile(req, res) {
}
}
const stream = await FileStoreHandler.promises.getFileStream(
projectId,
fileId,
queryString
)
// This metric has this name because it used to be recorded in a middleware.
// It tracks how many files have a hash and can be served by the history
// system.
Metrics.inc('fileToBlobRedirectMiddleware', 1, {
method: 'GET',
status: Boolean(file?.hash),
})
let stream, contentLength
try {
if (Features.hasFeature('project-history-blobs') && file?.hash) {
// Get the file from history
;({ stream, contentLength } =
await HistoryManager.promises.requestBlobWithFallback(
projectId,
file.hash,
fileId
))
} else {
// The file-hash is missing. Fall back to filestore.
stream = await FileStoreHandler.promises.getFileStream(
projectId,
fileId,
queryString
)
}
} catch (err) {
if (err instanceof Errors.NotFoundError) {
return res.status(404).end()
} else {
logger.err(
{ err, projectId, fileId, queryString },
'error finding element for downloading file'
)
return res.status(500).end()
}
}
// mobile safari will try to render html files, prevent this
if (isMobileSafari(userAgent) && isHtml(file)) {
preparePlainTextResponse(res)
}
if (contentLength) {
res.setHeader('Content-Length', contentLength)
}
res.setContentDisposition('attachment', { filename: file.name })
// allow the browser to cache these immutable files
// note: both "private" and "max-age" appear to be required for caching
@@ -77,11 +115,17 @@ async function getFileHead(req, res) {
const projectId = req.params.Project_id
const fileId = req.params.File_id
let fileSize
let file
try {
fileSize = await FileStoreHandler.promises.getFileSize(projectId, fileId)
file = await ProjectLocator.promises.findElement({
project_id: projectId,
element_id: fileId,
type: 'file',
})
} catch (err) {
if (err instanceof Errors.NotFoundError) {
// res.sendStatus() sends a description of the status as body.
// Using res.status().end() avoids sending that fake body.
return res.status(404).end()
} else {
// Instead of using the global error handler, we send an empty response in
@@ -96,6 +140,37 @@ async function getFileHead(req, res) {
}
}
// This metric has this name because it used to be recorded in a middleware.
// It tracks how many files have a hash and can be served by the history
// system.
Metrics.inc('fileToBlobRedirectMiddleware', 1, {
method: 'HEAD',
status: Boolean(file?.hash),
})
let fileSize
try {
if (Features.hasFeature('project-history-blobs') && file?.hash) {
const { contentLength } =
await HistoryManager.promises.requestBlobWithFallback(
projectId,
file.hash,
fileId,
'HEAD'
)
fileSize = contentLength
} else {
fileSize = await FileStoreHandler.promises.getFileSize(projectId, fileId)
}
} catch (err) {
if (err instanceof Errors.NotFoundError) {
return res.status(404).end()
} else {
logger.err({ err, projectId, fileId }, 'error obtaining file size')
return res.status(500).end()
}
}
res.setHeader('Content-Length', fileSize)
res.status(200).end()
}

View File

@@ -2,7 +2,6 @@ let HistoryController
const OError = require('@overleaf/o-error')
const async = require('async')
const logger = require('@overleaf/logger')
const metrics = require('@overleaf/metrics')
const request = require('request')
const settings = require('@overleaf/settings')
const SessionManager = require('../Authentication/SessionManager')
@@ -12,7 +11,6 @@ const Errors = require('../Errors/Errors')
const HistoryManager = require('./HistoryManager')
const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler')
const ProjectEntityUpdateHandler = require('../Project/ProjectEntityUpdateHandler')
const ProjectLocator = require('../Project/ProjectLocator')
const RestoreManager = require('./RestoreManager')
const { pipeline } = require('stream')
const Stream = require('stream')
@@ -20,6 +18,14 @@ const { prepareZipAttachment } = require('../../infrastructure/Response')
const Features = require('../../infrastructure/Features')
const { expressify } = require('@overleaf/promise-utils')
// Number of seconds after which the browser should send a request to revalidate
// blobs
const REVALIDATE_BLOB_AFTER_SECONDS = 86400 // 1 day
// Number of seconds during which the browser can serve a stale response while
// revalidating
const STALE_WHILE_REVALIDATE_SECONDS = 365 * 86400 // 1 year
async function getBlob(req, res) {
await requestBlob('GET', req, res)
}
@@ -30,6 +36,13 @@ async function headBlob(req, res) {
async function requestBlob(method, req, res) {
const { project_id: projectId, hash } = req.params
// Handle conditional GET request
if (req.get('If-None-Match') === hash) {
setBlobCacheHeaders(res, hash)
return res.status(304).end()
}
const range = req.get('Range')
let url, stream, source, contentLength
try {
@@ -47,12 +60,9 @@ async function requestBlob(method, req, res) {
}
res.appendHeader('X-Served-By', source)
// allow the browser to cache these immutable files
// note: both "private" and "max-age" appear to be required for caching
res.setHeader('Cache-Control', 'private, max-age=3600')
if (contentLength) res.setHeader('Content-Length', contentLength) // set on HEAD
res.setHeader('Content-Type', 'application/octet-stream')
setBlobCacheHeaders(res, hash)
try {
await Stream.promises.pipeline(stream, res)
@@ -66,42 +76,22 @@ async function requestBlob(method, req, res) {
}
}
function setBlobCacheHeaders(res, etag) {
// Blobs are immutable, so they can in principle be cached indefinitely. Here,
// we ask the browser to cache them for some time, but then check back
// regularly in case they changed (even though they shouldn't). This is a
// precaution in case a bug makes us send bad data through that endpoint.
res.set(
'Cache-Control',
`private, max-age=${REVALIDATE_BLOB_AFTER_SECONDS}, stale-while-revalidate=${STALE_WHILE_REVALIDATE_SECONDS}`
)
res.set('ETag', etag)
}
module.exports = HistoryController = {
getBlob: expressify(getBlob),
headBlob: expressify(headBlob),
/** Middleware to translate fileId requests to use the blob API.
*
* e.g. incoming requests to /project/:project_id/file/:file_id are
* internally redirected to /project/:project_id/blob/:hash if the file
* has a hash.
* */
fileToBlobRedirectMiddleware(req, res, next) {
if (!Features.hasFeature('project-history-blobs')) {
return next()
}
const projectId = req.params.Project_id
const fileId = req.params.File_id
ProjectLocator.findElement(
{ project_id: projectId, element_id: fileId, type: 'file' },
(err, file) => {
if (err) {
return next(err)
}
metrics.inc('fileToBlobRedirectMiddleware', 1, {
method: req.method,
status: Boolean(file?.hash),
})
if (file?.hash) {
req.url = `/project/${projectId}/blob/${file.hash}`
next('route') // redirect to blob route
} else {
next() // continue with file route
}
}
)
},
proxyToHistoryApi(req, res, next) {
const userId = SessionManager.getLoggedInUserId(req.session)
const url = settings.apis.project_history.url + req.url

View File

@@ -31,7 +31,6 @@ import HealthCheckController from './Features/HealthCheck/HealthCheckController.
import ProjectDownloadsController from './Features/Downloads/ProjectDownloadsController.mjs'
import FileStoreController from './Features/FileStore/FileStoreController.mjs'
import DocumentUpdaterController from './Features/DocumentUpdater/DocumentUpdaterController.mjs'
import HistoryController from './Features/History/HistoryController.js'
import HistoryRouter from './Features/History/HistoryRouter.mjs'
import ExportsController from './Features/Exports/ExportsController.mjs'
import PasswordResetRouter from './Features/PasswordReset/PasswordResetRouter.mjs'
@@ -279,6 +278,7 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
TemplatesRouter.apply(webRouter)
UserMembershipRouter.apply(webRouter)
TokenAccessRouter.apply(webRouter)
HistoryRouter.apply(webRouter, privateApiRouter)
await Modules.applyRouter(webRouter, privateApiRouter, publicApiRouter)
@@ -533,19 +533,14 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
webRouter.head(
'/Project/:Project_id/file/:File_id',
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.fileToBlobRedirectMiddleware,
FileStoreController.getFileHead
)
webRouter.get(
'/Project/:Project_id/file/:File_id',
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.fileToBlobRedirectMiddleware,
FileStoreController.getFile
)
// Has to be applied after any route using fileToBlobRedirectMiddleware
HistoryRouter.apply(webRouter, privateApiRouter)
webRouter.get(
'/Project/:Project_id/doc/:Doc_id/download', // "download" suffix to avoid conflict with private API route at doc/:doc_id
AuthorizationMiddleware.ensureUserCanReadProject,

View File

@@ -146,6 +146,22 @@ describe('HistoryTests', function () {
expect(body).to.equal(fileContent)
await expectHistoryV1Hit()
})
it('should set cache headers', async function () {
const { response } = await user.doRequest('GET', fileURL)
expect(response.headers['cache-control']).to.equal(
'private, max-age=86400, stale-while-revalidate=31536000'
)
expect(response.headers.etag).to.equal(fileHash)
})
it('should return a 304 when revalidating', async function () {
const { response, body } = await user.doRequest('GET', {
url: fileURL,
headers: { 'If-None-Match': fileHash },
})
expect(response.statusCode).to.equal(304)
expect(response.headers.etag).to.equal(fileHash)
expect(body).to.equal('')
})
}
it('should return 404 without fallback', async function () {
MockV1HistoryApi.reset()
@@ -153,6 +169,12 @@ describe('HistoryTests', function () {
expect(response.statusCode).to.equal(404)
await expectNoIncrement()
})
it('should not set cache headers on 404', async function () {
MockV1HistoryApi.reset()
const { response } = await user.doRequest('GET', fileURL)
expect(response.headers).not.to.have.property('cache-control')
expect(response.headers).not.to.have.property('etag')
})
it('should fetch the file size from filestore when missing in history-v1', async function () {
MockV1HistoryApi.reset()
const { response, body } = await user.doRequest(

View File

@@ -21,6 +21,7 @@ describe('FileStoreController', function () {
}
this.ProjectLocator = { promises: { findElement: sinon.stub() } }
this.Stream = { pipeline: sinon.stub().resolves() }
this.HistoryManager = {}
this.controller = await esmock.strict(MODULE_PATH, {
'node:stream/promises': this.Stream,
'@overleaf/settings': this.settings,
@@ -28,6 +29,8 @@ describe('FileStoreController', function () {
this.ProjectLocator,
'../../../../app/src/Features/FileStore/FileStoreHandler':
this.FileStoreHandler,
'../../../../app/src/Features/History/HistoryManager':
this.HistoryManager,
})
this.stream = {}
this.projectId = '2k3j1lk3j21lk3j'