[clsi-cache] frontend (#24389)

* [clsi-lb] forward ?clsiserverid=cache requests to clsi-cache

* [web] use clsi-cache in frontend

* [web] upgrade compile from cache to full compile when triggered inflight

* [web] fix pdf-js-viewer.spec.tsx tests -- add ?clsiserverid=foo to url

* [web] fix renamed reference after merge

* [web] fix download of other output files and use specific build

* [web] consolidate validation of filename into ClsiCacheHandler

* [web] remove unused projectName from getLatestBuildFromCache

* [web] avoid hitting the same clsi-cache instance first all the time

* [web] update documentation

GitOrigin-RevId: d48265a7ba89d6731092640e1492bc9f103f5c33
This commit is contained in:
Jakob Ackermann
2025-04-09 13:39:45 +01:00
committed by Copybot
parent ddc15e4c8d
commit e6edb4bf23
20 changed files with 737 additions and 93 deletions
@@ -0,0 +1,191 @@
const { NotFoundError } = require('../Errors/Errors')
const {
fetchStreamWithResponse,
RequestFailedError,
fetchJson,
} = require('@overleaf/fetch-utils')
const Path = require('path')
const { pipeline } = require('stream/promises')
const logger = require('@overleaf/logger')
const ClsiCacheManager = require('./ClsiCacheManager')
const CompileController = require('./CompileController')
const { expressify } = require('@overleaf/promise-utils')
const ClsiCacheHandler = require('./ClsiCacheHandler')
const ProjectGetter = require('../Project/ProjectGetter')
/**
* Download a file from a specific build on the clsi-cache.
*
* @param req
* @param res
* @return {Promise<*>}
*/
async function downloadFromCache(req, res) {
const { Project_id: projectId, buildId, filename } = req.params
const userId = CompileController._getUserIdForCompile(req)
const signal = AbortSignal.timeout(60 * 1000)
let location, projectName
try {
;[{ location }, { name: projectName }] = await Promise.all([
ClsiCacheHandler.getOutputFile(
projectId,
userId,
buildId,
filename,
signal
),
ProjectGetter.promises.getProject(projectId, { name: 1 }),
])
} catch (err) {
if (err instanceof NotFoundError) {
// res.sendStatus() sends a description of the status as body.
// Using res.status().end() avoids sending that fake body.
return res.status(404).end()
} else {
throw err
}
}
const { stream, response } = await fetchStreamWithResponse(location, {
signal,
})
if (req.destroyed) {
// The client has disconnected already, avoid trying to write into the broken connection.
return
}
for (const key of ['Content-Length', 'Content-Type']) {
if (response.headers.has(key)) res.setHeader(key, response.headers.get(key))
}
const ext = Path.extname(filename)
res.attachment(
ext === '.pdf'
? `${CompileController._getSafeProjectName({ name: projectName })}.pdf`
: filename
)
try {
res.writeHead(response.status)
await pipeline(stream, res)
} catch (err) {
const reqAborted = Boolean(req.destroyed)
const streamingStarted = Boolean(res.headersSent)
if (!streamingStarted) {
if (err instanceof RequestFailedError) {
res.sendStatus(err.response.status)
} else {
res.sendStatus(500)
}
}
if (
streamingStarted &&
reqAborted &&
err.code === 'ERR_STREAM_PREMATURE_CLOSE'
) {
// Ignore noisy spurious error
return
}
logger.warn(
{
err,
projectId,
location,
filename,
reqAborted,
streamingStarted,
},
'CLSI-cache proxy error'
)
}
}
/**
* Prepare a compile response from the clsi-cache.
*
* @param req
* @param res
* @return {Promise<void>}
*/
async function getLatestBuildFromCache(req, res) {
const { Project_id: projectId } = req.params
const userId = CompileController._getUserIdForCompile(req)
try {
const {
internal: { location: metaLocation, zone },
external: { isUpToDate, allFiles },
} = await ClsiCacheManager.getLatestBuildFromCache(
projectId,
userId,
'output.overleaf.json'
)
if (!isUpToDate) return res.sendStatus(410)
const meta = await fetchJson(metaLocation, {
signal: AbortSignal.timeout(5 * 1000),
})
const [, editorId, buildId] = metaLocation.match(
/\/build\/([a-f0-9-]+?)-([a-f0-9]+-[a-f0-9]+)\//
)
let baseURL = `/project/${projectId}`
if (userId) {
baseURL += `/user/${userId}`
}
const { ranges, contentId, clsiServerId, compileGroup, size } = meta
const outputFiles = allFiles
.filter(
path => path !== 'output.overleaf.json' && path !== 'output.tar.gz'
)
.map(path => {
const f = {
url: `${baseURL}/build/${editorId}-${buildId}/output/${path}`,
downloadURL: `/download/project/${projectId}/build/${editorId}-${buildId}/output/cached/${path}`,
build: buildId,
path,
type: path.split('.').pop(),
}
if (path === 'output.pdf') {
Object.assign(f, {
size,
editorId,
})
if (clsiServerId !== 'cache') {
// Enable PDF caching and attempt to download from VM first.
// (clsi VMs do not have the editorId in the path on disk, omit it).
Object.assign(f, {
url: `${baseURL}/build/${buildId}/output/output.pdf`,
ranges,
contentId,
})
}
}
return f
})
let { pdfCachingMinChunkSize, pdfDownloadDomain } =
await CompileController._getSplitTestOptions(req, res)
pdfDownloadDomain += `/zone/${zone}`
res.json({
fromCache: true,
status: 'success',
outputFiles,
compileGroup,
clsiServerId,
pdfDownloadDomain,
pdfCachingMinChunkSize,
})
} catch (err) {
if (err instanceof NotFoundError) {
res.sendStatus(404)
} else {
throw err
}
}
}
module.exports = {
downloadFromCache: expressify(downloadFromCache),
getLatestBuildFromCache: expressify(getLatestBuildFromCache),
}
@@ -1,3 +1,4 @@
const _ = require('lodash')
const {
fetchNothing,
fetchRedirectWithResponse,
@@ -23,6 +24,13 @@ function validateFilename(filename) {
}
}
/**
* Clear the cache on all clsi-cache instances.
*
* @param projectId
* @param userId
* @return {Promise<void>}
*/
async function clearCache(projectId, userId) {
let path = `/project/${projectId}`
if (userId) {
@@ -46,6 +54,45 @@ async function clearCache(projectId, userId) {
)
}
/**
* Get an output file from a specific build.
*
* @param projectId
* @param userId
* @param buildId
* @param filename
* @param signal
* @return {Promise<{size: number, zone: string, location: string, lastModified: Date, allFiles: string[]}>}
*/
async function getOutputFile(
projectId,
userId,
buildId,
filename,
signal = AbortSignal.timeout(15_000)
) {
validateFilename(filename)
if (!/^[a-f0-9-]+$/.test(buildId)) {
throw new InvalidNameError('bad buildId')
}
let path = `/project/${projectId}`
if (userId) {
path += `/user/${userId}`
}
path += `/build/${buildId}/search/output/${filename}`
return getRedirectWithFallback(projectId, userId, path, signal)
}
/**
* Get an output file from the most recent build.
*
* @param projectId
* @param userId
* @param filename
* @param signal
* @return {Promise<{size: number, zone: string, location: string, lastModified: Date, allFiles: string[]}>}
*/
async function getLatestOutputFile(
projectId,
userId,
@@ -59,8 +106,34 @@ async function getLatestOutputFile(
path += `/user/${userId}`
}
path += `/latest/output/${filename}`
return getRedirectWithFallback(projectId, userId, path, signal)
}
for (const { url, zone } of Settings.apis.clsiCache.instances) {
/**
* Request the given path from any of the clsi-cache instances.
*
* Some of them might be down temporarily. Try the next one until we receive a redirect or 404.
*
* This function is similar to the Coordinator in the clsi-cache, notable differences:
* - all the logic for sorting builds is in clsi-cache (re-used by clsi and web)
* - fan-out (1 client performs lookup on many clsi-cache instances) is "central" in clsi-cache, resulting in better connection re-use
* - we only cross the k8s cluster boundary via an internal GCLB once ($$$)
*
* @param projectId
* @param userId
* @param path
* @param signal
* @return {Promise<{size: number, zone: string, location: string, lastModified: Date, allFiles: string[]}>}
*/
async function getRedirectWithFallback(
projectId,
userId,
path,
signal = AbortSignal.timeout(15_000)
) {
// Avoid hitting the same instance first all the time.
const instances = _.shuffle(Settings.apis.clsiCache.instances)
for (const { url, zone } of instances) {
const u = new URL(url)
u.pathname = path
try {
@@ -92,6 +165,21 @@ async function getLatestOutputFile(
throw new NotFoundError('nothing cached yet')
}
/**
* Populate the clsi-cache for the given project/user with the provided source
*
* This is either another project, or a template (id+version).
*
* @param projectId
* @param userId
* @param sourceProjectId
* @param templateId
* @param templateVersionId
* @param lastUpdated
* @param zone
* @param signal
* @return {Promise<void>}
*/
async function prepareCacheSource(
projectId,
userId,
@@ -122,6 +210,7 @@ async function prepareCacheSource(
module.exports = {
clearCache,
getOutputFile,
getLatestOutputFile,
prepareCacheSource,
}
@@ -4,15 +4,26 @@ const DocumentUpdaterHandler = require('../DocumentUpdater/DocumentUpdaterHandle
const ProjectGetter = require('../Project/ProjectGetter')
const SplitTestHandler = require('../SplitTests/SplitTestHandler')
/**
* Get the most recent build and metadata
*
* Internal: internal metadata; External: fine to send to user as-is.
*
* @param projectId
* @param userId
* @param filename
* @param signal
* @return {Promise<{internal: {zone: string, location: string}, external: {isUpToDate: boolean, lastUpdated: Date, size: number, allFiles: string[]}}>}
*/
async function getLatestBuildFromCache(projectId, userId, filename, signal) {
const [
{ location, lastModified: lastCompiled, zone, size, allFiles },
lastUpdatedInRedis,
{ lastUpdated: lastUpdatedInMongo, name: projectName },
{ lastUpdated: lastUpdatedInMongo },
] = await Promise.all([
ClsiCacheHandler.getLatestOutputFile(projectId, userId, filename, signal),
DocumentUpdaterHandler.promises.getProjectLastUpdatedAt(projectId),
ProjectGetter.promises.getProject(projectId, { lastUpdated: 1, name: 1 }),
ProjectGetter.promises.getProject(projectId, { lastUpdated: 1 }),
])
const lastUpdated =
@@ -25,7 +36,6 @@ async function getLatestBuildFromCache(projectId, userId, filename, signal) {
internal: {
location,
zone,
projectName,
},
external: {
isUpToDate,
@@ -36,6 +46,16 @@ async function getLatestBuildFromCache(projectId, userId, filename, signal) {
}
}
/**
* Collect metadata and prepare the clsi-cache for the given project.
*
* @param projectId
* @param userId
* @param sourceProjectId
* @param templateId
* @param templateVersionId
* @return {Promise<void>}
*/
async function prepareClsiCache(
projectId,
userId,
@@ -57,7 +57,7 @@ async function getPdfCachingMinChunkSize(req, res) {
return parseInt(variant, 10)
}
const getSplitTestOptions = callbackify(async function (req, res) {
async function _getSplitTestOptions(req, res) {
// Use the query flags from the editor request for overriding the split test.
let query = {}
try {
@@ -122,7 +122,8 @@ const getSplitTestOptions = callbackify(async function (req, res) {
enablePdfCaching,
pdfCachingMinChunkSize,
}
})
}
const getSplitTestOptionsCb = callbackify(_getSplitTestOptions)
module.exports = CompileController = {
compile(req, res, next) {
@@ -161,7 +162,7 @@ module.exports = CompileController = {
options.incrementalCompilesEnabled = true
}
getSplitTestOptions(req, res, (err, splitTestOptions) => {
getSplitTestOptionsCb(req, res, (err, splitTestOptions) => {
if (err) return next(err)
let {
compileFromClsiCache,
@@ -305,25 +306,20 @@ module.exports = CompileController = {
)
},
_compileAsUser(req, callback) {
// callback with userId if per-user, undefined otherwise
if (!Settings.disablePerUserCompiles) {
const userId = SessionManager.getLoggedInUserId(req.session)
callback(null, userId)
} else {
callback()
}
}, // do a per-project compile, not per-user
_getSplitTestOptions,
_downloadAsUser(req, callback) {
// callback with userId if per-user, undefined otherwise
_getUserIdForCompile(req) {
if (!Settings.disablePerUserCompiles) {
const userId = SessionManager.getLoggedInUserId(req.session)
callback(null, userId)
} else {
callback()
return SessionManager.getLoggedInUserId(req.session)
}
}, // do a per-project compile, not per-user
return null
},
_compileAsUser(req, callback) {
callback(null, CompileController._getUserIdForCompile(req))
},
_downloadAsUser(req, callback) {
callback(null, CompileController._getUserIdForCompile(req))
},
downloadPdf(req, res, next) {
Metrics.inc('pdf-downloads')
@@ -549,7 +545,7 @@ module.exports = CompileController = {
getImageNameForProject(projectId, (error, imageName) => {
if (error) return next(error)
getSplitTestOptions(req, res, (error, splitTestOptions) => {
getSplitTestOptionsCb(req, res, (error, splitTestOptions) => {
if (error) return next(error)
const { compileFromClsiCache } = splitTestOptions
@@ -597,7 +593,7 @@ module.exports = CompileController = {
getImageNameForProject(projectId, (error, imageName) => {
if (error) return next(error)
getSplitTestOptions(req, res, (error, splitTestOptions) => {
getSplitTestOptionsCb(req, res, (error, splitTestOptions) => {
if (error) return next(error)
const { compileFromClsiCache } = splitTestOptions
@@ -338,6 +338,8 @@ const _ProjectController = {
'external-socket-heartbeat',
'full-project-search',
'null-test-share-modal',
'fall-back-to-clsi-cache',
'initial-compile-from-clsi-cache',
'pdf-caching-cached-url-lookup',
'pdf-caching-mode',
'pdf-caching-prefetch-large',
+13
View File
@@ -66,6 +66,7 @@ import _ from 'lodash'
import { plainTextResponse } from './infrastructure/Response.js'
import PublicAccessLevels from './Features/Authorization/PublicAccessLevels.js'
import SocketDiagnostics from './Features/SocketDiagnostics/SocketDiagnostics.mjs'
import ClsiCacheController from './Features/Compile/ClsiCacheController.js'
const ClsiCookieManager = ClsiCookieManagerFactory(
Settings.apis.clsi != null ? Settings.apis.clsi.backendGroupName : undefined
)
@@ -625,6 +626,18 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
CompileController.downloadPdf
)
webRouter.get(
'/project/:Project_id/output/cached/output.overleaf.json',
AuthorizationMiddleware.ensureUserCanReadProject,
ClsiCacheController.getLatestBuildFromCache
)
webRouter.get(
'/download/project/:Project_id/build/:buildId/output/cached/:filename',
AuthorizationMiddleware.ensureUserCanReadProject,
ClsiCacheController.downloadFromCache
)
// PDF Download button for specific build
webRouter.get(
/^\/download\/project\/([^/]*)\/build\/([0-9a-f-]+)\/output\/output\.pdf$/,
@@ -43,23 +43,60 @@ const outputFiles = () => {
]
}
export const interceptCompile = (prefix = 'compile', times = 1) => {
cy.intercept(
{ method: 'POST', pathname: '/project/*/compile', times },
{
body: {
status: 'success',
clsiServerId: 'foo',
compileGroup: 'priority',
pdfDownloadDomain: 'https://clsi.test-overleaf.com',
outputFiles: outputFiles(),
},
}
).as(`${prefix}`)
export const interceptCompile = ({
prefix = 'compile',
times = 1,
cached = false,
outputPDFFixture = 'output.pdf',
} = {}) => {
if (cached) {
cy.intercept(
{ pathname: '/project/*/output/cached/output.overleaf.json', times },
{
body: {
fromCache: true,
status: 'success',
clsiServerId: 'foo',
compileGroup: 'priority',
pdfDownloadDomain: 'https://clsi.test-overleaf.com',
outputFiles: outputFiles(),
},
}
).as(`${prefix}-cached`)
cy.intercept(
{ method: 'POST', pathname: '/project/*/compile', times },
{
body: {
status: 'unavailable',
clsiServerId: 'foo',
compileGroup: 'priority',
pdfDownloadDomain: 'https://clsi.test-overleaf.com',
outputFiles: [],
},
}
).as(`${prefix}`)
} else {
cy.intercept(
{ pathname: '/project/*/output/cached/output.overleaf.json', times },
{ statusCode: 404 }
).as(`${prefix}-cached`)
cy.intercept(
{ method: 'POST', pathname: '/project/*/compile', times },
{
body: {
status: 'success',
clsiServerId: 'foo',
compileGroup: 'priority',
pdfDownloadDomain: 'https://clsi.test-overleaf.com',
outputFiles: outputFiles(),
},
}
).as(`${prefix}`)
}
cy.intercept(
{ pathname: '/build/*/output.pdf', times },
{ fixture: 'build/output.pdf,null' }
{ fixture: `build/${outputPDFFixture},null` }
).as(`${prefix}-pdf`)
cy.intercept(
@@ -73,12 +110,26 @@ export const interceptCompile = (prefix = 'compile', times = 1) => {
).as(`${prefix}-blg`)
}
export const waitForCompile = ({ prefix = 'compile', pdf = false } = {}) => {
cy.wait(`@${prefix}`)
export const waitForCompile = ({
prefix = 'compile',
pdf = false,
cached = false,
} = {}) => {
if (cached) {
cy.wait(`@${prefix}-cached`)
} else {
cy.wait(`@${prefix}`)
}
cy.wait(`@${prefix}-log`)
.its('request.query.clsiserverid')
.should('eq', cached ? 'cache' : 'foo') // straight from cache if cached
cy.wait(`@${prefix}-blg`)
.its('request.query.clsiserverid')
.should('eq', cached ? 'cache' : 'foo') // straight from cache if cached
if (pdf) {
cy.wait(`@${prefix}-pdf`)
.its('request.query.clsiserverid')
.should('eq', 'foo') // always from VM first
}
return cy.wrap(null)
}
@@ -24,7 +24,11 @@ function PdfFileList({ fileList }: { fileList: PdfFileDataList }) {
{fileList.top.map(file => (
<li key={file.path} role="menuitem">
<DropdownItem role="link" download={basename(file)} href={file.url}>
<DropdownItem
role="link"
download={basename(file)}
href={file.downloadURL || file.url}
>
{file.path}
</DropdownItem>
</li>
@@ -36,7 +40,11 @@ function PdfFileList({ fileList }: { fileList: PdfFileDataList }) {
{fileList.other.map(file => (
<li key={file.path} role="menuitem">
<DropdownItem role="link" download={basename(file)} href={file.url}>
<DropdownItem
role="link"
download={basename(file)}
href={file.downloadURL || file.url}
>
{file.path}
</DropdownItem>
</li>
@@ -11,14 +11,21 @@ const ignoreFiles = ['output.fls', 'output.fdb_latexmk']
export function buildFileList(
outputFiles: Map<string, CompileOutputFile>,
{ clsiServerId, compileGroup, outputFilesArchive }: CompileResponseData
{
clsiServerId,
compileGroup,
outputFilesArchive,
fromCache = false,
}: CompileResponseData
): PdfFileDataList {
const files: PdfFileDataList = { top: [], other: [] }
if (outputFiles) {
const params = new URLSearchParams()
if (clsiServerId) {
if (fromCache) {
params.set('clsiserverid', 'cache')
} else if (clsiServerId) {
params.set('clsiserverid', clsiServerId)
}
if (compileGroup) {
@@ -19,9 +19,10 @@ export function handleOutputFiles(outputFiles, projectId, data) {
outputFile.editorId = outputFile.editorId || EDITOR_SESSION_ID
// build the URL for viewing the PDF in the preview UI
const params = new URLSearchParams({
compileGroup: data.compileGroup,
})
const params = new URLSearchParams()
if (data.compileGroup) {
params.set('compileGroup', data.compileGroup)
}
if (data.clsiServerId) {
params.set('clsiserverid', data.clsiServerId)
@@ -37,10 +38,14 @@ export function handleOutputFiles(outputFiles, projectId, data) {
data.pdfDownloadDomain
)}?${params}`
// build the URL for downloading the PDF
params.set('popupDownload', 'true') // save PDF download as file
if (data.fromCache) {
outputFile.pdfDownloadUrl = outputFile.downloadURL
} else {
// build the URL for downloading the PDF
params.set('popupDownload', 'true') // save PDF download as file
outputFile.pdfDownloadUrl = `/download/project/${projectId}/build/${outputFile.build}/output/output.pdf?${params}`
outputFile.pdfDownloadUrl = `/download/project/${projectId}/build/${outputFile.build}/output/output.pdf?${params}`
}
return outputFile
}
@@ -26,3 +26,4 @@ export const prefetchingEnabled = isFlagEnabled('pdf-caching-prefetching')
export const prefetchLargeEnabled = isFlagEnabled('pdf-caching-prefetch-large')
export const enablePdfCaching = isFlagEnabled('pdf-caching-mode')
export const trackPdfDownloadEnabled = isFlagEnabled('track-pdf-download')
export const useClsiCache = isFlagEnabled('fall-back-to-clsi-cache')
@@ -1,13 +1,14 @@
import OError from '@overleaf/o-error'
import { fallbackRequest, fetchRange } from './pdf-caching'
import { captureException } from '@/infrastructure/error-reporter'
import { getPdfCachingMetrics } from './metrics'
import { EDITOR_SESSION_ID, getPdfCachingMetrics } from './metrics'
import {
cachedUrlLookupEnabled,
enablePdfCaching,
prefetchingEnabled,
prefetchLargeEnabled,
trackPdfDownloadEnabled,
useClsiCache,
} from './pdf-caching-flags'
import { isNetworkError } from '@/utils/is-network-error'
import { debugConsole } from '@/utils/debugging'
@@ -50,12 +51,20 @@ export function generatePdfCachingTransportFactory() {
constructor({ url, pdfFile, abortController, handleFetchError }) {
super(pdfFile.size, new Uint8Array())
this.url = url
pdfFile.ranges = pdfFile.ranges || []
pdfFile.editorId = pdfFile.editorId || EDITOR_SESSION_ID
this.pdfFile = pdfFile
// Clone the chunks as the objectId field is encoded to a Uint8Array.
this.pdfRanges = pdfFile.ranges.map(r => Object.assign({}, r))
this.leanPdfRanges = pdfFile.ranges.map(r => Object.assign({}, r))
this.handleFetchError = handleFetchError
this.abortController = abortController
this.startTime = performance.now()
const params = new URL(url).searchParams
// drop no needed params
params.delete('enable_pdf_caching')
params.delete('verify_chunks')
this.queryForChunks = params.toString()
}
abort() {
@@ -67,7 +76,7 @@ export function generatePdfCachingTransportFactory() {
const getDebugInfo = () => ({
// Sentry does not serialize objects in twice nested objects.
// Move the ranges to the root level to see them in Sentry.
pdfRanges: this.pdfRanges,
pdfRanges: this.leanPdfRanges,
pdfFile: Object.assign({}, this.pdfFile, {
ranges: '[extracted]',
// Hide prefetched chunks as these include binary blobs.
@@ -83,7 +92,7 @@ export function generatePdfCachingTransportFactory() {
performance.now() - this.startTime > STALE_OUTPUT_REQUEST_THRESHOLD_MS
const is404 = err => OError.getFullInfo(err).statusCode === 404
const isFromOutputPDFRequest = err =>
OError.getFullInfo(err).url === this.url
OError.getFullInfo(err).url?.includes?.('/output.pdf') === true
// Do not consider "expected 404s" and network errors as pdf caching
// failures.
@@ -96,11 +105,68 @@ export function generatePdfCachingTransportFactory() {
(is404(err) || isNetworkError(err)) &&
(isStaleOutputRequest() || isFromOutputPDFRequest(err))
const usesCache = url => {
if (!url) return false
const u = new URL(url)
return (
u.pathname.endsWith(
`build/${this.pdfFile.editorId}-${this.pdfFile.build}/output/output.pdf`
) && u.searchParams.get('clsiserverid') === 'cache'
)
}
const canTryFromCache = err => {
if (!useClsiCache) return false
if (!is404(err)) return false
return !usesCache(OError.getFullInfo(err).url)
}
const getOutputPDFURLFromCache = () => {
if (usesCache(this.url)) return this.url
const u = new URL(this.url)
u.searchParams.set('clsiserverid', 'cache')
u.pathname = u.pathname.replace(
/build\/[a-f0-9-]+\//,
`build/${this.pdfFile.editorId}-${this.pdfFile.build}/`
)
return u.href
}
const fetchFromCache = async () => {
// Try fetching the chunk from clsi-cache
const url = getOutputPDFURLFromCache()
return fallbackRequest({
file: this.pdfFile,
url,
start,
end,
abortSignal,
})
.then(blob => {
// Send the next output.pdf request directly to the cache.
this.url = url
// Only try downloading chunks that were cached previously
this.pdfFile.ranges = this.pdfFile.ranges.filter(r =>
cachedUrls.has(r.hash)
)
return blob
})
.catch(err => {
throw OError.tag(
new PDFJS.MissingPDFException(),
'cache-fallback',
{
statusCode: OError.getFullInfo(err).statusCode,
url: OError.getFullInfo(err).url,
err,
}
)
})
}
fetchRange({
url: this.url,
start,
end,
file: this.pdfFile,
queryForChunks: this.queryForChunks,
metrics,
usageScore,
cachedUrls,
@@ -109,9 +175,11 @@ export function generatePdfCachingTransportFactory() {
prefetchLargeEnabled,
cachedUrlLookupEnabled,
abortSignal,
fallbackToCacheURL: getOutputPDFURLFromCache(),
})
.catch(err => {
if (abortSignal.aborted) return
if (canTryFromCache(err)) return fetchFromCache()
if (isExpectedError(err)) {
if (is404(err)) {
// A regular pdf-js request would have seen this 404 as well.
@@ -140,11 +208,13 @@ export function generatePdfCachingTransportFactory() {
},
})
return fallbackRequest({
file: this.pdfFile,
url: this.url,
start,
end,
abortSignal,
}).catch(err => {
if (canTryFromCache(err)) return fetchFromCache()
if (isExpectedError(err)) {
throw OError.tag(new PDFJS.MissingPDFException(), 'fallback', {
statusCode: OError.getFullInfo(err).statusCode,
@@ -21,6 +21,28 @@ const CHUNK_USAGE_THRESHOLD_CACHED = 42
// 42 * 0.7^11 < 1, aka we keep stale entries around for 11 compiles.
const CHUNK_USAGE_STALE_DECAY_RATE = 0.7
let cacheFlag = 'default'
// Work around a Chrome bug: https://issues.chromium.org/issues/40542704
// Multiple simultaneous requests to same URL with Range header cause failure (block backend returns ERR_CACHE_OPERATION_NOT_SUPPORTED)
const CACHE_NO_STORE = 'no-store'
/**
* @param {string} url
* @param {RequestInit} init
*/
async function fetchWithCacheFallback(url, init) {
try {
return await fetch(url, init)
} catch (err) {
if (init.headers?.has('Range') && init.cache !== CACHE_NO_STORE) {
cacheFlag = CACHE_NO_STORE
init.cache = CACHE_NO_STORE
return await fetch(url, init)
}
throw err
}
}
/**
* @param {Object} file
*/
@@ -470,21 +492,30 @@ export function checkChunkResponse(response, estimatedSize, init) {
}
}
function getDynamicChunkInit({ file, start, end, signal }) {
// Avoid making range request when downloading the PDF file in full.
const isFullFile = start === 0 && end === file.size
return {
cache: cacheFlag,
headers: new Headers(
isFullFile ? {} : { Range: `bytes=${start}-${end - 1}` }
),
signal,
}
}
/**
*
* @param {Object} file
* @param {string} url
* @param {number} start
* @param {number} end
* @param {AbortSignal} abortSignal
*/
export async function fallbackRequest({ url, start, end, abortSignal }) {
export async function fallbackRequest({ file, url, start, end, abortSignal }) {
try {
const init = {
cache: 'no-store',
headers: { Range: `bytes=${start}-${end - 1}` },
signal: abortSignal,
}
const response = await fetch(url, init)
const init = getDynamicChunkInit({ file, start, end, signal: abortSignal })
const response = await fetchWithCacheFallback(url, init)
checkChunkResponse(response, end - start, init)
return await response.arrayBuffer()
} catch (e) {
@@ -494,6 +525,7 @@ export async function fallbackRequest({ url, start, end, abortSignal }) {
/**
*
* @param {Object} file
* @param {string} url
* @param {number} start
* @param {number} end
@@ -501,10 +533,24 @@ export async function fallbackRequest({ url, start, end, abortSignal }) {
* @param {Uint8Array} actual
* @param {AbortSignal} abortSignal
*/
async function verifyRange({ url, start, end, metrics, actual, abortSignal }) {
async function verifyRange({
file,
url,
start,
end,
metrics,
actual,
abortSignal,
}) {
let expectedRaw
try {
expectedRaw = await fallbackRequest({ url, start, end, abortSignal })
expectedRaw = await fallbackRequest({
file,
url,
start,
end,
abortSignal,
})
} catch (error) {
throw OError.tag(error, 'cannot verify range', { url, start, end })
}
@@ -541,9 +587,11 @@ function skipPrefetched(chunks, prefetched, start, end) {
* @param {Object|Object[]} chunk
* @param {string} url
* @param {RequestInit} init
* @param {Map<string, string>} cachedUrls
* @param {Map<string, {url:string,init:RequestInit}>} cachedUrls
* @param {Object} metrics
* @param {boolean} cachedUrlLookupEnabled
* @param {string} fallbackToCacheURL
* @param {Object} file
*/
async function fetchChunk({
chunk,
@@ -552,18 +600,21 @@ async function fetchChunk({
cachedUrls,
metrics,
cachedUrlLookupEnabled,
fallbackToCacheURL,
file,
}) {
const estimatedSize = Array.isArray(chunk)
? estimateSizeOfMultipartResponse(chunk)
: chunk.end - chunk.start
const oldUrl = cachedUrls.get(chunk.hash)
if (cachedUrlLookupEnabled && chunk.hash && oldUrl && oldUrl !== url) {
if (cachedUrlLookupEnabled && chunk.hash && oldUrl && oldUrl.url !== url) {
// When the clsi server id changes, the content id changes too and as a
// result all the browser cache keys (aka urls) get invalidated.
// We memorize the previous browser cache keys in `cachedUrls`.
try {
const response = await fetch(oldUrl, init)
oldUrl.init.signal = init.signal
const response = await fetchWithCacheFallback(oldUrl.url, oldUrl.init)
if (response.status === 200) {
checkChunkResponse(response, estimatedSize, init)
metrics.oldUrlHitCount += 1
@@ -577,10 +628,46 @@ async function fetchChunk({
} catch (e) {
// Fallback to the latest url.
}
cachedUrls.delete(chunk.hash) // clear cached state
}
let response
try {
response = await fetchWithCacheFallback(url, init)
checkChunkResponse(response, estimatedSize, init)
if (chunk.hash) {
delete init.signal // omit the signal from the cache
cachedUrls.set(chunk.hash, { url, init })
}
} catch (err1) {
if (chunk.hash) {
cachedUrls.delete(chunk.hash)
}
const isMissing = response?.status === 404 || response?.status === 416
const hasOthersCached = cachedUrls.size > 0
if (isMissing && hasOthersCached) {
// Only try downloading chunks that were cached previously
file.ranges = file.ranges.filter(r => cachedUrls.has(r.hash))
// Try harder at fetching the chunk, fallback to cache
url = fallbackToCacheURL
if (chunk.hash) {
init = getDynamicChunkInit({
file,
// skip object id prefix
start: chunk.start + chunk.objectId.byteLength,
end: chunk.end,
signal: init.signal,
})
}
try {
response = await fetchWithCacheFallback(url, init)
checkChunkResponse(response, estimatedSize, init)
} catch (err2) {
throw err1
}
} else {
throw err1
}
}
const response = await fetch(url, init)
checkChunkResponse(response, estimatedSize, init)
if (chunk.hash) cachedUrls.set(chunk.hash, url)
return response
}
@@ -720,6 +807,7 @@ class Timer {
* @param {number} start
* @param {number} end
* @param {Object} file
* @param {queryForChunks} start
* @param {Object} metrics
* @param {Map} usageScore
* @param {Map} cachedUrls
@@ -728,12 +816,14 @@ class Timer {
* @param {boolean} prefetchLargeEnabled
* @param {boolean} tryOldCachedUrlEnabled
* @param {AbortSignal} abortSignal
* @param {string} fallbackToCacheURL
*/
export async function fetchRange({
url,
start,
end,
file,
queryForChunks,
metrics,
usageScore,
cachedUrls,
@@ -742,6 +832,7 @@ export async function fetchRange({
prefetchLargeEnabled,
cachedUrlLookupEnabled,
abortSignal,
fallbackToCacheURL,
}) {
const timer = new Timer()
timer.startBlockingCompute()
@@ -793,7 +884,7 @@ export async function fetchRange({
fetchedCount: 1,
fetchedBytes: size,
})
return fallbackRequest({ url, start, end, abortSignal })
return fallbackRequest({ file, url, start, end, abortSignal })
}
if (prefetchingEnabled) {
@@ -819,8 +910,8 @@ export async function fetchRange({
chunk: dynamicChunks[0],
url,
init: {
cache: 'no-store',
headers: { Range: `bytes=${byteRanges}` },
cache: cacheFlag,
headers: new Headers({ Range: `bytes=${byteRanges}` }),
},
})
break
@@ -833,8 +924,10 @@ export async function fetchRange({
chunk,
url,
init: {
cache: 'no-store',
headers: { Range: `bytes=${chunk.start}-${chunk.end - 1}` },
cache: cacheFlag,
headers: new Headers({
Range: `bytes=${chunk.start}-${chunk.end - 1}`,
}),
},
})
})
@@ -844,17 +937,12 @@ export async function fetchRange({
chunk: dynamicChunks,
url,
init: {
cache: 'no-store',
headers: { Range: `bytes=${byteRanges}` },
cache: cacheFlag,
headers: new Headers({ Range: `bytes=${byteRanges}` }),
},
})
}
const params = new URL(url).searchParams
// drop no needed params
params.delete('enable_pdf_caching')
params.delete('verify_chunks')
const query = params.toString()
// The schema of `url` is https://domain/project/:id/user/:id/build/... for
// authenticated and https://domain/project/:id/build/... for
// unauthenticated users. Cut it before /build/.
@@ -863,7 +951,7 @@ export async function fetchRange({
const requests = chunks
.map(chunk => ({
chunk,
url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${query}`,
url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${queryForChunks}`,
init: {},
}))
.concat(coalescedDynamicChunks)
@@ -886,6 +974,8 @@ export async function fetchRange({
cachedUrls,
metrics,
cachedUrlLookupEnabled,
fallbackToCacheURL,
file,
})
timer.startBlockingCompute()
const boundary = getMultipartBoundary(response, chunk)
@@ -38,12 +38,14 @@ import { useFileTreePathContext } from '@/features/file-tree/contexts/file-tree-
import { useUserSettingsContext } from '@/shared/context/user-settings-context'
import { useFeatureFlag } from '@/shared/context/split-test-context'
import { useEditorManagerContext } from '@/features/ide-react/context/editor-manager-context'
import { getJSON } from '@/infrastructure/fetch-json'
import { CompileResponseData } from '../../../../types/compile'
import {
PdfScrollPosition,
usePdfScrollPosition,
} from '@/shared/hooks/use-pdf-scroll-position'
import { PdfFileDataList } from '@/features/pdf-preview/util/types'
import { isSplitTestEnabled } from '@/utils/splitTestUtils'
type PdfFile = Record<string, any>
@@ -116,7 +118,7 @@ export const LocalCompileProvider: FC = ({ children }) => {
const { hasPremiumCompile, isProjectOwner } = useEditorContext()
const { openDocWithId, openDocs, currentDocument } = useEditorManagerContext()
const { _id: projectId, rootDocId } = useProjectContext()
const { _id: projectId, rootDocId, joinedOnce } = useProjectContext()
const { pdfPreviewOpen } = useLayoutContext()
@@ -186,6 +188,12 @@ export const LocalCompileProvider: FC = ({ children }) => {
// whether the project has been compiled yet
const [compiledOnce, setCompiledOnce] = useState(false)
// fetch initial compile response from cache
const [initialCompileFromCache, setInitialCompileFromCache] = useState(
isSplitTestEnabled('initial-compile-from-clsi-cache')
)
// Compile triggered while fetching the initial compile from cache
const upgradeInitialCompileFromCacheRef = useRef(false)
// whether the cache is being cleared
const [clearingCache, setClearingCache] = useState(false)
@@ -327,13 +335,43 @@ export const LocalCompileProvider: FC = ({ children }) => {
setEditedSinceCompileStarted(changedAt > 0)
}, [setEditedSinceCompileStarted, changedAt])
// try to fetch the last compile result after opening the project, potentially before joining the project.
useEffect(() => {
if (initialCompileFromCache) {
setInitialCompileFromCache(false)
setCompiling(true)
setCompiledOnce(true)
getJSON(`/project/${projectId}/output/cached/output.overleaf.json`)
.then((data: any) => {
setCompiling(false)
setData({
...data,
options: compiler.defaultOptions,
})
if (upgradeInitialCompileFromCacheRef.current) {
compilingRef.current = false
compiler.compile() // trigger regular compile
}
})
.catch(() => {
setCompiling(false)
if (upgradeInitialCompileFromCacheRef.current) {
compilingRef.current = false
compiler.compile() // trigger regular compile
} else {
setCompiledOnce(false) // trigger auto compile
}
})
}
}, [projectId, initialCompileFromCache, compiler])
// always compile the PDF once after opening the project, after the doc has loaded
useEffect(() => {
if (!compiledOnce && currentDocument) {
if (!compiledOnce && currentDocument && !initialCompileFromCache) {
setCompiledOnce(true)
compiler.compile({ isAutoCompileOnLoad: true })
}
}, [compiledOnce, currentDocument, compiler])
}, [compiledOnce, currentDocument, initialCompileFromCache, compiler])
useEffect(() => {
setHasShortCompileTimeout(
@@ -370,6 +408,7 @@ export const LocalCompileProvider: FC = ({ children }) => {
// note: this should _only_ run when `data` changes,
// the other dependencies must all be static
useEffect(() => {
if (!joinedOnce) return // wait for joinProject, it populates the premium flags.
const abortController = new AbortController()
const recordedActions = recordedActionsRef.current
@@ -519,6 +558,7 @@ export const LocalCompileProvider: FC = ({ children }) => {
abortController.abort()
}
}, [
joinedOnce,
data,
alphaProgram,
labsProgram,
@@ -579,9 +619,10 @@ export const LocalCompileProvider: FC = ({ children }) => {
// start a compile manually
const startCompile = useCallback(
options => {
upgradeInitialCompileFromCacheRef.current = true
compiler.compile(options)
},
[compiler]
[compiler, upgradeInitialCompileFromCacheRef]
)
// stop a compile manually
@@ -29,6 +29,7 @@ const projectFallback = {
export const ProjectProvider: FC = ({ children }) => {
const [project] = useScopeValue('project')
const joinedOnce = !!project
const {
_id,
@@ -69,6 +70,7 @@ export const ProjectProvider: FC = ({ children }) => {
trackChangesState,
mainBibliographyDocId,
projectSnapshot,
joinedOnce,
}
}, [
_id,
@@ -84,6 +86,7 @@ export const ProjectProvider: FC = ({ children }) => {
trackChangesState,
mainBibliographyDocId,
projectSnapshot,
joinedOnce,
])
return (
@@ -49,6 +49,7 @@ export type ProjectContextValue = {
}[]
trackChangesState: boolean | Record<UserId | '__guests__', boolean>
projectSnapshot: ProjectSnapshot
joinedOnce: boolean
}
export type ProjectContextUpdateValue = Partial<ProjectContextValue>
@@ -23,7 +23,7 @@ describe('<PdfJSViewer/>', function () {
<EditorProviders scope={scope}>
<PdfPreviewProvider>
<div className="pdf-viewer">
<PdfJsViewer url="/build/123/output.pdf" />
<PdfJsViewer url="/build/123/output.pdf?clsiserverid=foo" />
</div>
</PdfPreviewProvider>
</EditorProviders>
@@ -68,7 +68,7 @@ describe('<PdfJSViewer/>', function () {
<EditorProviders scope={scope}>
<PdfPreviewProvider>
<div className="pdf-viewer">
<PdfJsViewer url="/build/123/output.pdf" />
<PdfJsViewer url="/build/123/output.pdf?clsiserverid=foo" />
</div>
</PdfPreviewProvider>
</EditorProviders>
@@ -88,7 +88,7 @@ describe('<PdfJSViewer/>', function () {
<EditorProviders scope={scope}>
<PdfPreviewProvider>
<div className="pdf-viewer">
<PdfJsViewer url="/build/123/output.pdf" />
<PdfJsViewer url="/build/123/output.pdf?clsiserverid=foo" />
</div>
</PdfPreviewProvider>
</EditorProviders>
@@ -34,12 +34,15 @@ describe('<PdfPreview/>', function () {
'ol-compilesUserContentDomain',
'https://compiles-user.dev-overleaf.com'
)
window.metaAttributesCache.set('ol-splitTestVariants', {
'initial-compile-from-clsi-cache': 'enabled',
})
cy.interceptEvents()
})
it('renders the PDF preview', function () {
window.metaAttributesCache.set('ol-preventCompileOnLoad', false)
cy.interceptCompile('compile')
cy.interceptCompile()
const scope = mockScope()
@@ -57,6 +60,58 @@ describe('<PdfPreview/>', function () {
cy.findByRole('button', { name: 'Recompile' })
})
it('uses the cache when available', function () {
cy.interceptCompile({ prefix: 'compile', times: 1, cached: true })
const scope = mockScope()
cy.mount(
<EditorProviders scope={scope}>
<div className="pdf-viewer">
<PdfPreview />
</div>
</EditorProviders>
)
// wait for "compile from cache on load" to finish
cy.waitForCompile({ pdf: true, cached: true })
cy.contains('Your Paper')
})
it('uses the cache when available then compiles', function () {
cy.interceptCompile({ prefix: 'compile', times: 1, cached: true })
const scope = mockScope()
cy.mount(
<EditorProviders scope={scope}>
<div className="pdf-viewer">
<PdfPreview />
</div>
</EditorProviders>
)
// wait for "compile from cache on load" to finish
cy.waitForCompile({ pdf: true, cached: true })
cy.contains('Your Paper')
// Then trigger a new compile
cy.interceptCompile({
prefix: 'recompile',
times: 1,
cached: false,
outputPDFFixture: 'output-2.pdf',
})
// press the Recompile button => compile
cy.findByRole('button', { name: 'Recompile' }).click()
// wait for compile to finish
cy.waitForCompile({ prefix: 'recompile', pdf: true })
cy.contains('Modern Authoring Tools for Science')
})
it('runs a compile when the Recompile button is pressed', function () {
cy.interceptCompile()
@@ -479,7 +534,7 @@ describe('<PdfPreview/>', function () {
cy.findByRole('button', { name: 'Recompile' }).click()
cy.waitForCompile({ pdf: true })
cy.interceptCompile('recompile')
cy.interceptCompile({ prefix: 'recompile' })
cy.intercept('DELETE', '/project/*/output*', {
statusCode: 204,
@@ -108,7 +108,7 @@ describe('<CodeMirrorEditor/> Table editor', function () {
cy.interceptEvents()
cy.interceptMathJax()
cy.interceptCompile('compile', Number.MAX_SAFE_INTEGER)
cy.interceptCompile({ prefix: 'compile', times: Number.MAX_SAFE_INTEGER })
cy.intercept('/project/*/doc/*/metadata', { body: {} })
window.metaAttributesCache.set('ol-preventCompileOnLoad', true)
})
+1
View File
@@ -1,6 +1,7 @@
export type CompileOutputFile = {
path: string
url: string
downloadURL?: string
type: string
build: string
ranges?: {