mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-01 21:31:36 +02:00
Merge pull request #22915 from overleaf/em-blob-caching
Improved caching for blobs GitOrigin-RevId: c5113106ef239b201ae8f66bb3539a52c65ddb60
This commit is contained in:
@@ -3,9 +3,12 @@
|
||||
import { pipeline } from 'node:stream/promises'
|
||||
import logger from '@overleaf/logger'
|
||||
import { expressify } from '@overleaf/promise-utils'
|
||||
import Metrics from '@overleaf/metrics'
|
||||
import FileStoreHandler from './FileStoreHandler.js'
|
||||
import ProjectLocator from '../Project/ProjectLocator.js'
|
||||
import HistoryManager from '../History/HistoryManager.js'
|
||||
import Errors from '../Errors/Errors.js'
|
||||
import Features from '../../infrastructure/Features.js'
|
||||
import { preparePlainTextResponse } from '../../infrastructure/Response.js'
|
||||
|
||||
async function getFile(req, res) {
|
||||
@@ -44,16 +47,51 @@ async function getFile(req, res) {
|
||||
}
|
||||
}
|
||||
|
||||
const stream = await FileStoreHandler.promises.getFileStream(
|
||||
projectId,
|
||||
fileId,
|
||||
queryString
|
||||
)
|
||||
// This metric has this name because it used to be recorded in a middleware.
|
||||
// It tracks how many files have a hash and can be served by the history
|
||||
// system.
|
||||
Metrics.inc('fileToBlobRedirectMiddleware', 1, {
|
||||
method: 'GET',
|
||||
status: Boolean(file?.hash),
|
||||
})
|
||||
|
||||
let stream, contentLength
|
||||
try {
|
||||
if (Features.hasFeature('project-history-blobs') && file?.hash) {
|
||||
// Get the file from history
|
||||
;({ stream, contentLength } =
|
||||
await HistoryManager.promises.requestBlobWithFallback(
|
||||
projectId,
|
||||
file.hash,
|
||||
fileId
|
||||
))
|
||||
} else {
|
||||
// The file-hash is missing. Fall back to filestore.
|
||||
stream = await FileStoreHandler.promises.getFileStream(
|
||||
projectId,
|
||||
fileId,
|
||||
queryString
|
||||
)
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof Errors.NotFoundError) {
|
||||
return res.status(404).end()
|
||||
} else {
|
||||
logger.err(
|
||||
{ err, projectId, fileId, queryString },
|
||||
'error finding element for downloading file'
|
||||
)
|
||||
return res.status(500).end()
|
||||
}
|
||||
}
|
||||
|
||||
// mobile safari will try to render html files, prevent this
|
||||
if (isMobileSafari(userAgent) && isHtml(file)) {
|
||||
preparePlainTextResponse(res)
|
||||
}
|
||||
if (contentLength) {
|
||||
res.setHeader('Content-Length', contentLength)
|
||||
}
|
||||
res.setContentDisposition('attachment', { filename: file.name })
|
||||
// allow the browser to cache these immutable files
|
||||
// note: both "private" and "max-age" appear to be required for caching
|
||||
@@ -77,11 +115,17 @@ async function getFileHead(req, res) {
|
||||
const projectId = req.params.Project_id
|
||||
const fileId = req.params.File_id
|
||||
|
||||
let fileSize
|
||||
let file
|
||||
try {
|
||||
fileSize = await FileStoreHandler.promises.getFileSize(projectId, fileId)
|
||||
file = await ProjectLocator.promises.findElement({
|
||||
project_id: projectId,
|
||||
element_id: fileId,
|
||||
type: 'file',
|
||||
})
|
||||
} catch (err) {
|
||||
if (err instanceof Errors.NotFoundError) {
|
||||
// res.sendStatus() sends a description of the status as body.
|
||||
// Using res.status().end() avoids sending that fake body.
|
||||
return res.status(404).end()
|
||||
} else {
|
||||
// Instead of using the global error handler, we send an empty response in
|
||||
@@ -96,6 +140,37 @@ async function getFileHead(req, res) {
|
||||
}
|
||||
}
|
||||
|
||||
// This metric has this name because it used to be recorded in a middleware.
|
||||
// It tracks how many files have a hash and can be served by the history
|
||||
// system.
|
||||
Metrics.inc('fileToBlobRedirectMiddleware', 1, {
|
||||
method: 'HEAD',
|
||||
status: Boolean(file?.hash),
|
||||
})
|
||||
|
||||
let fileSize
|
||||
try {
|
||||
if (Features.hasFeature('project-history-blobs') && file?.hash) {
|
||||
const { contentLength } =
|
||||
await HistoryManager.promises.requestBlobWithFallback(
|
||||
projectId,
|
||||
file.hash,
|
||||
fileId,
|
||||
'HEAD'
|
||||
)
|
||||
fileSize = contentLength
|
||||
} else {
|
||||
fileSize = await FileStoreHandler.promises.getFileSize(projectId, fileId)
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof Errors.NotFoundError) {
|
||||
return res.status(404).end()
|
||||
} else {
|
||||
logger.err({ err, projectId, fileId }, 'error obtaining file size')
|
||||
return res.status(500).end()
|
||||
}
|
||||
}
|
||||
|
||||
res.setHeader('Content-Length', fileSize)
|
||||
res.status(200).end()
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ let HistoryController
|
||||
const OError = require('@overleaf/o-error')
|
||||
const async = require('async')
|
||||
const logger = require('@overleaf/logger')
|
||||
const metrics = require('@overleaf/metrics')
|
||||
const request = require('request')
|
||||
const settings = require('@overleaf/settings')
|
||||
const SessionManager = require('../Authentication/SessionManager')
|
||||
@@ -12,7 +11,6 @@ const Errors = require('../Errors/Errors')
|
||||
const HistoryManager = require('./HistoryManager')
|
||||
const ProjectDetailsHandler = require('../Project/ProjectDetailsHandler')
|
||||
const ProjectEntityUpdateHandler = require('../Project/ProjectEntityUpdateHandler')
|
||||
const ProjectLocator = require('../Project/ProjectLocator')
|
||||
const RestoreManager = require('./RestoreManager')
|
||||
const { pipeline } = require('stream')
|
||||
const Stream = require('stream')
|
||||
@@ -20,6 +18,14 @@ const { prepareZipAttachment } = require('../../infrastructure/Response')
|
||||
const Features = require('../../infrastructure/Features')
|
||||
const { expressify } = require('@overleaf/promise-utils')
|
||||
|
||||
// Number of seconds after which the browser should send a request to revalidate
|
||||
// blobs
|
||||
const REVALIDATE_BLOB_AFTER_SECONDS = 86400 // 1 day
|
||||
|
||||
// Number of seconds during which the browser can serve a stale response while
|
||||
// revalidating
|
||||
const STALE_WHILE_REVALIDATE_SECONDS = 365 * 86400 // 1 year
|
||||
|
||||
async function getBlob(req, res) {
|
||||
await requestBlob('GET', req, res)
|
||||
}
|
||||
@@ -30,6 +36,13 @@ async function headBlob(req, res) {
|
||||
|
||||
async function requestBlob(method, req, res) {
|
||||
const { project_id: projectId, hash } = req.params
|
||||
|
||||
// Handle conditional GET request
|
||||
if (req.get('If-None-Match') === hash) {
|
||||
setBlobCacheHeaders(res, hash)
|
||||
return res.status(304).end()
|
||||
}
|
||||
|
||||
const range = req.get('Range')
|
||||
let url, stream, source, contentLength
|
||||
try {
|
||||
@@ -47,12 +60,9 @@ async function requestBlob(method, req, res) {
|
||||
}
|
||||
res.appendHeader('X-Served-By', source)
|
||||
|
||||
// allow the browser to cache these immutable files
|
||||
// note: both "private" and "max-age" appear to be required for caching
|
||||
res.setHeader('Cache-Control', 'private, max-age=3600')
|
||||
|
||||
if (contentLength) res.setHeader('Content-Length', contentLength) // set on HEAD
|
||||
res.setHeader('Content-Type', 'application/octet-stream')
|
||||
setBlobCacheHeaders(res, hash)
|
||||
|
||||
try {
|
||||
await Stream.promises.pipeline(stream, res)
|
||||
@@ -66,42 +76,22 @@ async function requestBlob(method, req, res) {
|
||||
}
|
||||
}
|
||||
|
||||
function setBlobCacheHeaders(res, etag) {
|
||||
// Blobs are immutable, so they can in principle be cached indefinitely. Here,
|
||||
// we ask the browser to cache them for some time, but then check back
|
||||
// regularly in case they changed (even though they shouldn't). This is a
|
||||
// precaution in case a bug makes us send bad data through that endpoint.
|
||||
res.set(
|
||||
'Cache-Control',
|
||||
`private, max-age=${REVALIDATE_BLOB_AFTER_SECONDS}, stale-while-revalidate=${STALE_WHILE_REVALIDATE_SECONDS}`
|
||||
)
|
||||
res.set('ETag', etag)
|
||||
}
|
||||
|
||||
module.exports = HistoryController = {
|
||||
getBlob: expressify(getBlob),
|
||||
headBlob: expressify(headBlob),
|
||||
|
||||
/** Middleware to translate fileId requests to use the blob API.
|
||||
*
|
||||
* e.g. incoming requests to /project/:project_id/file/:file_id are
|
||||
* internally redirected to /project/:project_id/blob/:hash if the file
|
||||
* has a hash.
|
||||
* */
|
||||
fileToBlobRedirectMiddleware(req, res, next) {
|
||||
if (!Features.hasFeature('project-history-blobs')) {
|
||||
return next()
|
||||
}
|
||||
const projectId = req.params.Project_id
|
||||
const fileId = req.params.File_id
|
||||
ProjectLocator.findElement(
|
||||
{ project_id: projectId, element_id: fileId, type: 'file' },
|
||||
(err, file) => {
|
||||
if (err) {
|
||||
return next(err)
|
||||
}
|
||||
metrics.inc('fileToBlobRedirectMiddleware', 1, {
|
||||
method: req.method,
|
||||
status: Boolean(file?.hash),
|
||||
})
|
||||
if (file?.hash) {
|
||||
req.url = `/project/${projectId}/blob/${file.hash}`
|
||||
next('route') // redirect to blob route
|
||||
} else {
|
||||
next() // continue with file route
|
||||
}
|
||||
}
|
||||
)
|
||||
},
|
||||
|
||||
proxyToHistoryApi(req, res, next) {
|
||||
const userId = SessionManager.getLoggedInUserId(req.session)
|
||||
const url = settings.apis.project_history.url + req.url
|
||||
|
||||
@@ -31,7 +31,6 @@ import HealthCheckController from './Features/HealthCheck/HealthCheckController.
|
||||
import ProjectDownloadsController from './Features/Downloads/ProjectDownloadsController.mjs'
|
||||
import FileStoreController from './Features/FileStore/FileStoreController.mjs'
|
||||
import DocumentUpdaterController from './Features/DocumentUpdater/DocumentUpdaterController.mjs'
|
||||
import HistoryController from './Features/History/HistoryController.js'
|
||||
import HistoryRouter from './Features/History/HistoryRouter.mjs'
|
||||
import ExportsController from './Features/Exports/ExportsController.mjs'
|
||||
import PasswordResetRouter from './Features/PasswordReset/PasswordResetRouter.mjs'
|
||||
@@ -279,6 +278,7 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
|
||||
TemplatesRouter.apply(webRouter)
|
||||
UserMembershipRouter.apply(webRouter)
|
||||
TokenAccessRouter.apply(webRouter)
|
||||
HistoryRouter.apply(webRouter, privateApiRouter)
|
||||
|
||||
await Modules.applyRouter(webRouter, privateApiRouter, publicApiRouter)
|
||||
|
||||
@@ -533,19 +533,14 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
|
||||
webRouter.head(
|
||||
'/Project/:Project_id/file/:File_id',
|
||||
AuthorizationMiddleware.ensureUserCanReadProject,
|
||||
HistoryController.fileToBlobRedirectMiddleware,
|
||||
FileStoreController.getFileHead
|
||||
)
|
||||
webRouter.get(
|
||||
'/Project/:Project_id/file/:File_id',
|
||||
AuthorizationMiddleware.ensureUserCanReadProject,
|
||||
HistoryController.fileToBlobRedirectMiddleware,
|
||||
FileStoreController.getFile
|
||||
)
|
||||
|
||||
// Has to be applied after any route using fileToBlobRedirectMiddleware
|
||||
HistoryRouter.apply(webRouter, privateApiRouter)
|
||||
|
||||
webRouter.get(
|
||||
'/Project/:Project_id/doc/:Doc_id/download', // "download" suffix to avoid conflict with private API route at doc/:doc_id
|
||||
AuthorizationMiddleware.ensureUserCanReadProject,
|
||||
|
||||
@@ -146,6 +146,22 @@ describe('HistoryTests', function () {
|
||||
expect(body).to.equal(fileContent)
|
||||
await expectHistoryV1Hit()
|
||||
})
|
||||
it('should set cache headers', async function () {
|
||||
const { response } = await user.doRequest('GET', fileURL)
|
||||
expect(response.headers['cache-control']).to.equal(
|
||||
'private, max-age=86400, stale-while-revalidate=31536000'
|
||||
)
|
||||
expect(response.headers.etag).to.equal(fileHash)
|
||||
})
|
||||
it('should return a 304 when revalidating', async function () {
|
||||
const { response, body } = await user.doRequest('GET', {
|
||||
url: fileURL,
|
||||
headers: { 'If-None-Match': fileHash },
|
||||
})
|
||||
expect(response.statusCode).to.equal(304)
|
||||
expect(response.headers.etag).to.equal(fileHash)
|
||||
expect(body).to.equal('')
|
||||
})
|
||||
}
|
||||
it('should return 404 without fallback', async function () {
|
||||
MockV1HistoryApi.reset()
|
||||
@@ -153,6 +169,12 @@ describe('HistoryTests', function () {
|
||||
expect(response.statusCode).to.equal(404)
|
||||
await expectNoIncrement()
|
||||
})
|
||||
it('should not set cache headers on 404', async function () {
|
||||
MockV1HistoryApi.reset()
|
||||
const { response } = await user.doRequest('GET', fileURL)
|
||||
expect(response.headers).not.to.have.property('cache-control')
|
||||
expect(response.headers).not.to.have.property('etag')
|
||||
})
|
||||
it('should fetch the file size from filestore when missing in history-v1', async function () {
|
||||
MockV1HistoryApi.reset()
|
||||
const { response, body } = await user.doRequest(
|
||||
|
||||
@@ -21,6 +21,7 @@ describe('FileStoreController', function () {
|
||||
}
|
||||
this.ProjectLocator = { promises: { findElement: sinon.stub() } }
|
||||
this.Stream = { pipeline: sinon.stub().resolves() }
|
||||
this.HistoryManager = {}
|
||||
this.controller = await esmock.strict(MODULE_PATH, {
|
||||
'node:stream/promises': this.Stream,
|
||||
'@overleaf/settings': this.settings,
|
||||
@@ -28,6 +29,8 @@ describe('FileStoreController', function () {
|
||||
this.ProjectLocator,
|
||||
'../../../../app/src/Features/FileStore/FileStoreHandler':
|
||||
this.FileStoreHandler,
|
||||
'../../../../app/src/Features/History/HistoryManager':
|
||||
this.HistoryManager,
|
||||
})
|
||||
this.stream = {}
|
||||
this.projectId = '2k3j1lk3j21lk3j'
|
||||
|
||||
Reference in New Issue
Block a user