diff --git a/services/web/app/src/Features/Compile/ClsiCacheController.js b/services/web/app/src/Features/Compile/ClsiCacheController.js new file mode 100644 index 0000000000..a8c99a830b --- /dev/null +++ b/services/web/app/src/Features/Compile/ClsiCacheController.js @@ -0,0 +1,191 @@ +const { NotFoundError } = require('../Errors/Errors') +const { + fetchStreamWithResponse, + RequestFailedError, + fetchJson, +} = require('@overleaf/fetch-utils') +const Path = require('path') +const { pipeline } = require('stream/promises') +const logger = require('@overleaf/logger') +const ClsiCacheManager = require('./ClsiCacheManager') +const CompileController = require('./CompileController') +const { expressify } = require('@overleaf/promise-utils') +const ClsiCacheHandler = require('./ClsiCacheHandler') +const ProjectGetter = require('../Project/ProjectGetter') + +/** + * Download a file from a specific build on the clsi-cache. + * + * @param req + * @param res + * @return {Promise<*>} + */ +async function downloadFromCache(req, res) { + const { Project_id: projectId, buildId, filename } = req.params + const userId = CompileController._getUserIdForCompile(req) + const signal = AbortSignal.timeout(60 * 1000) + let location, projectName + try { + ;[{ location }, { name: projectName }] = await Promise.all([ + ClsiCacheHandler.getOutputFile( + projectId, + userId, + buildId, + filename, + signal + ), + ProjectGetter.promises.getProject(projectId, { name: 1 }), + ]) + } catch (err) { + if (err instanceof NotFoundError) { + // res.sendStatus() sends a description of the status as body. + // Using res.status().end() avoids sending that fake body. + return res.status(404).end() + } else { + throw err + } + } + + const { stream, response } = await fetchStreamWithResponse(location, { + signal, + }) + if (req.destroyed) { + // The client has disconnected already, avoid trying to write into the broken connection. + return + } + + for (const key of ['Content-Length', 'Content-Type']) { + if (response.headers.has(key)) res.setHeader(key, response.headers.get(key)) + } + const ext = Path.extname(filename) + res.attachment( + ext === '.pdf' + ? `${CompileController._getSafeProjectName({ name: projectName })}.pdf` + : filename + ) + try { + res.writeHead(response.status) + await pipeline(stream, res) + } catch (err) { + const reqAborted = Boolean(req.destroyed) + const streamingStarted = Boolean(res.headersSent) + if (!streamingStarted) { + if (err instanceof RequestFailedError) { + res.sendStatus(err.response.status) + } else { + res.sendStatus(500) + } + } + if ( + streamingStarted && + reqAborted && + err.code === 'ERR_STREAM_PREMATURE_CLOSE' + ) { + // Ignore noisy spurious error + return + } + logger.warn( + { + err, + projectId, + location, + filename, + reqAborted, + streamingStarted, + }, + 'CLSI-cache proxy error' + ) + } +} + +/** + * Prepare a compile response from the clsi-cache. + * + * @param req + * @param res + * @return {Promise} + */ +async function getLatestBuildFromCache(req, res) { + const { Project_id: projectId } = req.params + const userId = CompileController._getUserIdForCompile(req) + try { + const { + internal: { location: metaLocation, zone }, + external: { isUpToDate, allFiles }, + } = await ClsiCacheManager.getLatestBuildFromCache( + projectId, + userId, + 'output.overleaf.json' + ) + + if (!isUpToDate) return res.sendStatus(410) + + const meta = await fetchJson(metaLocation, { + signal: AbortSignal.timeout(5 * 1000), + }) + + const [, editorId, buildId] = metaLocation.match( + /\/build\/([a-f0-9-]+?)-([a-f0-9]+-[a-f0-9]+)\// + ) + + let baseURL = `/project/${projectId}` + if (userId) { + baseURL += `/user/${userId}` + } + + const { ranges, contentId, clsiServerId, compileGroup, size } = meta + + const outputFiles = allFiles + .filter( + path => path !== 'output.overleaf.json' && path !== 'output.tar.gz' + ) + .map(path => { + const f = { + url: `${baseURL}/build/${editorId}-${buildId}/output/${path}`, + downloadURL: `/download/project/${projectId}/build/${editorId}-${buildId}/output/cached/${path}`, + build: buildId, + path, + type: path.split('.').pop(), + } + if (path === 'output.pdf') { + Object.assign(f, { + size, + editorId, + }) + if (clsiServerId !== 'cache') { + // Enable PDF caching and attempt to download from VM first. + // (clsi VMs do not have the editorId in the path on disk, omit it). + Object.assign(f, { + url: `${baseURL}/build/${buildId}/output/output.pdf`, + ranges, + contentId, + }) + } + } + return f + }) + let { pdfCachingMinChunkSize, pdfDownloadDomain } = + await CompileController._getSplitTestOptions(req, res) + pdfDownloadDomain += `/zone/${zone}` + res.json({ + fromCache: true, + status: 'success', + outputFiles, + compileGroup, + clsiServerId, + pdfDownloadDomain, + pdfCachingMinChunkSize, + }) + } catch (err) { + if (err instanceof NotFoundError) { + res.sendStatus(404) + } else { + throw err + } + } +} + +module.exports = { + downloadFromCache: expressify(downloadFromCache), + getLatestBuildFromCache: expressify(getLatestBuildFromCache), +} diff --git a/services/web/app/src/Features/Compile/ClsiCacheHandler.js b/services/web/app/src/Features/Compile/ClsiCacheHandler.js index de6cde7edc..80296f7f8d 100644 --- a/services/web/app/src/Features/Compile/ClsiCacheHandler.js +++ b/services/web/app/src/Features/Compile/ClsiCacheHandler.js @@ -1,3 +1,4 @@ +const _ = require('lodash') const { fetchNothing, fetchRedirectWithResponse, @@ -23,6 +24,13 @@ function validateFilename(filename) { } } +/** + * Clear the cache on all clsi-cache instances. + * + * @param projectId + * @param userId + * @return {Promise} + */ async function clearCache(projectId, userId) { let path = `/project/${projectId}` if (userId) { @@ -46,6 +54,45 @@ async function clearCache(projectId, userId) { ) } +/** + * Get an output file from a specific build. + * + * @param projectId + * @param userId + * @param buildId + * @param filename + * @param signal + * @return {Promise<{size: number, zone: string, location: string, lastModified: Date, allFiles: string[]}>} + */ +async function getOutputFile( + projectId, + userId, + buildId, + filename, + signal = AbortSignal.timeout(15_000) +) { + validateFilename(filename) + if (!/^[a-f0-9-]+$/.test(buildId)) { + throw new InvalidNameError('bad buildId') + } + + let path = `/project/${projectId}` + if (userId) { + path += `/user/${userId}` + } + path += `/build/${buildId}/search/output/${filename}` + return getRedirectWithFallback(projectId, userId, path, signal) +} + +/** + * Get an output file from the most recent build. + * + * @param projectId + * @param userId + * @param filename + * @param signal + * @return {Promise<{size: number, zone: string, location: string, lastModified: Date, allFiles: string[]}>} + */ async function getLatestOutputFile( projectId, userId, @@ -59,8 +106,34 @@ async function getLatestOutputFile( path += `/user/${userId}` } path += `/latest/output/${filename}` + return getRedirectWithFallback(projectId, userId, path, signal) +} - for (const { url, zone } of Settings.apis.clsiCache.instances) { +/** + * Request the given path from any of the clsi-cache instances. + * + * Some of them might be down temporarily. Try the next one until we receive a redirect or 404. + * + * This function is similar to the Coordinator in the clsi-cache, notable differences: + * - all the logic for sorting builds is in clsi-cache (re-used by clsi and web) + * - fan-out (1 client performs lookup on many clsi-cache instances) is "central" in clsi-cache, resulting in better connection re-use + * - we only cross the k8s cluster boundary via an internal GCLB once ($$$) + * + * @param projectId + * @param userId + * @param path + * @param signal + * @return {Promise<{size: number, zone: string, location: string, lastModified: Date, allFiles: string[]}>} + */ +async function getRedirectWithFallback( + projectId, + userId, + path, + signal = AbortSignal.timeout(15_000) +) { + // Avoid hitting the same instance first all the time. + const instances = _.shuffle(Settings.apis.clsiCache.instances) + for (const { url, zone } of instances) { const u = new URL(url) u.pathname = path try { @@ -92,6 +165,21 @@ async function getLatestOutputFile( throw new NotFoundError('nothing cached yet') } +/** + * Populate the clsi-cache for the given project/user with the provided source + * + * This is either another project, or a template (id+version). + * + * @param projectId + * @param userId + * @param sourceProjectId + * @param templateId + * @param templateVersionId + * @param lastUpdated + * @param zone + * @param signal + * @return {Promise} + */ async function prepareCacheSource( projectId, userId, @@ -122,6 +210,7 @@ async function prepareCacheSource( module.exports = { clearCache, + getOutputFile, getLatestOutputFile, prepareCacheSource, } diff --git a/services/web/app/src/Features/Compile/ClsiCacheManager.js b/services/web/app/src/Features/Compile/ClsiCacheManager.js index 7c1811dd67..1090b9f2d0 100644 --- a/services/web/app/src/Features/Compile/ClsiCacheManager.js +++ b/services/web/app/src/Features/Compile/ClsiCacheManager.js @@ -4,15 +4,26 @@ const DocumentUpdaterHandler = require('../DocumentUpdater/DocumentUpdaterHandle const ProjectGetter = require('../Project/ProjectGetter') const SplitTestHandler = require('../SplitTests/SplitTestHandler') +/** + * Get the most recent build and metadata + * + * Internal: internal metadata; External: fine to send to user as-is. + * + * @param projectId + * @param userId + * @param filename + * @param signal + * @return {Promise<{internal: {zone: string, location: string}, external: {isUpToDate: boolean, lastUpdated: Date, size: number, allFiles: string[]}}>} + */ async function getLatestBuildFromCache(projectId, userId, filename, signal) { const [ { location, lastModified: lastCompiled, zone, size, allFiles }, lastUpdatedInRedis, - { lastUpdated: lastUpdatedInMongo, name: projectName }, + { lastUpdated: lastUpdatedInMongo }, ] = await Promise.all([ ClsiCacheHandler.getLatestOutputFile(projectId, userId, filename, signal), DocumentUpdaterHandler.promises.getProjectLastUpdatedAt(projectId), - ProjectGetter.promises.getProject(projectId, { lastUpdated: 1, name: 1 }), + ProjectGetter.promises.getProject(projectId, { lastUpdated: 1 }), ]) const lastUpdated = @@ -25,7 +36,6 @@ async function getLatestBuildFromCache(projectId, userId, filename, signal) { internal: { location, zone, - projectName, }, external: { isUpToDate, @@ -36,6 +46,16 @@ async function getLatestBuildFromCache(projectId, userId, filename, signal) { } } +/** + * Collect metadata and prepare the clsi-cache for the given project. + * + * @param projectId + * @param userId + * @param sourceProjectId + * @param templateId + * @param templateVersionId + * @return {Promise} + */ async function prepareClsiCache( projectId, userId, diff --git a/services/web/app/src/Features/Compile/CompileController.js b/services/web/app/src/Features/Compile/CompileController.js index b7726f1d72..bb225c6b79 100644 --- a/services/web/app/src/Features/Compile/CompileController.js +++ b/services/web/app/src/Features/Compile/CompileController.js @@ -57,7 +57,7 @@ async function getPdfCachingMinChunkSize(req, res) { return parseInt(variant, 10) } -const getSplitTestOptions = callbackify(async function (req, res) { +async function _getSplitTestOptions(req, res) { // Use the query flags from the editor request for overriding the split test. let query = {} try { @@ -122,7 +122,8 @@ const getSplitTestOptions = callbackify(async function (req, res) { enablePdfCaching, pdfCachingMinChunkSize, } -}) +} +const getSplitTestOptionsCb = callbackify(_getSplitTestOptions) module.exports = CompileController = { compile(req, res, next) { @@ -161,7 +162,7 @@ module.exports = CompileController = { options.incrementalCompilesEnabled = true } - getSplitTestOptions(req, res, (err, splitTestOptions) => { + getSplitTestOptionsCb(req, res, (err, splitTestOptions) => { if (err) return next(err) let { compileFromClsiCache, @@ -305,25 +306,20 @@ module.exports = CompileController = { ) }, - _compileAsUser(req, callback) { - // callback with userId if per-user, undefined otherwise - if (!Settings.disablePerUserCompiles) { - const userId = SessionManager.getLoggedInUserId(req.session) - callback(null, userId) - } else { - callback() - } - }, // do a per-project compile, not per-user + _getSplitTestOptions, - _downloadAsUser(req, callback) { - // callback with userId if per-user, undefined otherwise + _getUserIdForCompile(req) { if (!Settings.disablePerUserCompiles) { - const userId = SessionManager.getLoggedInUserId(req.session) - callback(null, userId) - } else { - callback() + return SessionManager.getLoggedInUserId(req.session) } - }, // do a per-project compile, not per-user + return null + }, + _compileAsUser(req, callback) { + callback(null, CompileController._getUserIdForCompile(req)) + }, + _downloadAsUser(req, callback) { + callback(null, CompileController._getUserIdForCompile(req)) + }, downloadPdf(req, res, next) { Metrics.inc('pdf-downloads') @@ -549,7 +545,7 @@ module.exports = CompileController = { getImageNameForProject(projectId, (error, imageName) => { if (error) return next(error) - getSplitTestOptions(req, res, (error, splitTestOptions) => { + getSplitTestOptionsCb(req, res, (error, splitTestOptions) => { if (error) return next(error) const { compileFromClsiCache } = splitTestOptions @@ -597,7 +593,7 @@ module.exports = CompileController = { getImageNameForProject(projectId, (error, imageName) => { if (error) return next(error) - getSplitTestOptions(req, res, (error, splitTestOptions) => { + getSplitTestOptionsCb(req, res, (error, splitTestOptions) => { if (error) return next(error) const { compileFromClsiCache } = splitTestOptions diff --git a/services/web/app/src/Features/Project/ProjectController.js b/services/web/app/src/Features/Project/ProjectController.js index 8fd4668468..8d1f59348b 100644 --- a/services/web/app/src/Features/Project/ProjectController.js +++ b/services/web/app/src/Features/Project/ProjectController.js @@ -338,6 +338,8 @@ const _ProjectController = { 'external-socket-heartbeat', 'full-project-search', 'null-test-share-modal', + 'fall-back-to-clsi-cache', + 'initial-compile-from-clsi-cache', 'pdf-caching-cached-url-lookup', 'pdf-caching-mode', 'pdf-caching-prefetch-large', diff --git a/services/web/app/src/router.mjs b/services/web/app/src/router.mjs index 830620506a..1c024fe7ab 100644 --- a/services/web/app/src/router.mjs +++ b/services/web/app/src/router.mjs @@ -66,6 +66,7 @@ import _ from 'lodash' import { plainTextResponse } from './infrastructure/Response.js' import PublicAccessLevels from './Features/Authorization/PublicAccessLevels.js' import SocketDiagnostics from './Features/SocketDiagnostics/SocketDiagnostics.mjs' +import ClsiCacheController from './Features/Compile/ClsiCacheController.js' const ClsiCookieManager = ClsiCookieManagerFactory( Settings.apis.clsi != null ? Settings.apis.clsi.backendGroupName : undefined ) @@ -625,6 +626,18 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) { CompileController.downloadPdf ) + webRouter.get( + '/project/:Project_id/output/cached/output.overleaf.json', + AuthorizationMiddleware.ensureUserCanReadProject, + ClsiCacheController.getLatestBuildFromCache + ) + + webRouter.get( + '/download/project/:Project_id/build/:buildId/output/cached/:filename', + AuthorizationMiddleware.ensureUserCanReadProject, + ClsiCacheController.downloadFromCache + ) + // PDF Download button for specific build webRouter.get( /^\/download\/project\/([^/]*)\/build\/([0-9a-f-]+)\/output\/output\.pdf$/, diff --git a/services/web/cypress/support/shared/commands/compile.ts b/services/web/cypress/support/shared/commands/compile.ts index 90567d8e74..e6ea7f70dd 100644 --- a/services/web/cypress/support/shared/commands/compile.ts +++ b/services/web/cypress/support/shared/commands/compile.ts @@ -43,23 +43,60 @@ const outputFiles = () => { ] } -export const interceptCompile = (prefix = 'compile', times = 1) => { - cy.intercept( - { method: 'POST', pathname: '/project/*/compile', times }, - { - body: { - status: 'success', - clsiServerId: 'foo', - compileGroup: 'priority', - pdfDownloadDomain: 'https://clsi.test-overleaf.com', - outputFiles: outputFiles(), - }, - } - ).as(`${prefix}`) +export const interceptCompile = ({ + prefix = 'compile', + times = 1, + cached = false, + outputPDFFixture = 'output.pdf', +} = {}) => { + if (cached) { + cy.intercept( + { pathname: '/project/*/output/cached/output.overleaf.json', times }, + { + body: { + fromCache: true, + status: 'success', + clsiServerId: 'foo', + compileGroup: 'priority', + pdfDownloadDomain: 'https://clsi.test-overleaf.com', + outputFiles: outputFiles(), + }, + } + ).as(`${prefix}-cached`) + cy.intercept( + { method: 'POST', pathname: '/project/*/compile', times }, + { + body: { + status: 'unavailable', + clsiServerId: 'foo', + compileGroup: 'priority', + pdfDownloadDomain: 'https://clsi.test-overleaf.com', + outputFiles: [], + }, + } + ).as(`${prefix}`) + } else { + cy.intercept( + { pathname: '/project/*/output/cached/output.overleaf.json', times }, + { statusCode: 404 } + ).as(`${prefix}-cached`) + cy.intercept( + { method: 'POST', pathname: '/project/*/compile', times }, + { + body: { + status: 'success', + clsiServerId: 'foo', + compileGroup: 'priority', + pdfDownloadDomain: 'https://clsi.test-overleaf.com', + outputFiles: outputFiles(), + }, + } + ).as(`${prefix}`) + } cy.intercept( { pathname: '/build/*/output.pdf', times }, - { fixture: 'build/output.pdf,null' } + { fixture: `build/${outputPDFFixture},null` } ).as(`${prefix}-pdf`) cy.intercept( @@ -73,12 +110,26 @@ export const interceptCompile = (prefix = 'compile', times = 1) => { ).as(`${prefix}-blg`) } -export const waitForCompile = ({ prefix = 'compile', pdf = false } = {}) => { - cy.wait(`@${prefix}`) +export const waitForCompile = ({ + prefix = 'compile', + pdf = false, + cached = false, +} = {}) => { + if (cached) { + cy.wait(`@${prefix}-cached`) + } else { + cy.wait(`@${prefix}`) + } cy.wait(`@${prefix}-log`) + .its('request.query.clsiserverid') + .should('eq', cached ? 'cache' : 'foo') // straight from cache if cached cy.wait(`@${prefix}-blg`) + .its('request.query.clsiserverid') + .should('eq', cached ? 'cache' : 'foo') // straight from cache if cached if (pdf) { cy.wait(`@${prefix}-pdf`) + .its('request.query.clsiserverid') + .should('eq', 'foo') // always from VM first } return cy.wrap(null) } diff --git a/services/web/frontend/js/features/pdf-preview/components/pdf-file-list.tsx b/services/web/frontend/js/features/pdf-preview/components/pdf-file-list.tsx index 8ed0b86d9b..753aa32805 100644 --- a/services/web/frontend/js/features/pdf-preview/components/pdf-file-list.tsx +++ b/services/web/frontend/js/features/pdf-preview/components/pdf-file-list.tsx @@ -24,7 +24,11 @@ function PdfFileList({ fileList }: { fileList: PdfFileDataList }) { {fileList.top.map(file => (
  • - + {file.path}
  • @@ -36,7 +40,11 @@ function PdfFileList({ fileList }: { fileList: PdfFileDataList }) { {fileList.other.map(file => (
  • - + {file.path}
  • diff --git a/services/web/frontend/js/features/pdf-preview/util/file-list.ts b/services/web/frontend/js/features/pdf-preview/util/file-list.ts index 106234f1fe..310fbb55fb 100644 --- a/services/web/frontend/js/features/pdf-preview/util/file-list.ts +++ b/services/web/frontend/js/features/pdf-preview/util/file-list.ts @@ -11,14 +11,21 @@ const ignoreFiles = ['output.fls', 'output.fdb_latexmk'] export function buildFileList( outputFiles: Map, - { clsiServerId, compileGroup, outputFilesArchive }: CompileResponseData + { + clsiServerId, + compileGroup, + outputFilesArchive, + fromCache = false, + }: CompileResponseData ): PdfFileDataList { const files: PdfFileDataList = { top: [], other: [] } if (outputFiles) { const params = new URLSearchParams() - if (clsiServerId) { + if (fromCache) { + params.set('clsiserverid', 'cache') + } else if (clsiServerId) { params.set('clsiserverid', clsiServerId) } if (compileGroup) { diff --git a/services/web/frontend/js/features/pdf-preview/util/output-files.js b/services/web/frontend/js/features/pdf-preview/util/output-files.js index 0a79786f64..6c93a02368 100644 --- a/services/web/frontend/js/features/pdf-preview/util/output-files.js +++ b/services/web/frontend/js/features/pdf-preview/util/output-files.js @@ -19,9 +19,10 @@ export function handleOutputFiles(outputFiles, projectId, data) { outputFile.editorId = outputFile.editorId || EDITOR_SESSION_ID // build the URL for viewing the PDF in the preview UI - const params = new URLSearchParams({ - compileGroup: data.compileGroup, - }) + const params = new URLSearchParams() + if (data.compileGroup) { + params.set('compileGroup', data.compileGroup) + } if (data.clsiServerId) { params.set('clsiserverid', data.clsiServerId) @@ -37,10 +38,14 @@ export function handleOutputFiles(outputFiles, projectId, data) { data.pdfDownloadDomain )}?${params}` - // build the URL for downloading the PDF - params.set('popupDownload', 'true') // save PDF download as file + if (data.fromCache) { + outputFile.pdfDownloadUrl = outputFile.downloadURL + } else { + // build the URL for downloading the PDF + params.set('popupDownload', 'true') // save PDF download as file - outputFile.pdfDownloadUrl = `/download/project/${projectId}/build/${outputFile.build}/output/output.pdf?${params}` + outputFile.pdfDownloadUrl = `/download/project/${projectId}/build/${outputFile.build}/output/output.pdf?${params}` + } return outputFile } diff --git a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-flags.js b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-flags.js index 8c15dc4550..d0a5a57ae9 100644 --- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-flags.js +++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-flags.js @@ -26,3 +26,4 @@ export const prefetchingEnabled = isFlagEnabled('pdf-caching-prefetching') export const prefetchLargeEnabled = isFlagEnabled('pdf-caching-prefetch-large') export const enablePdfCaching = isFlagEnabled('pdf-caching-mode') export const trackPdfDownloadEnabled = isFlagEnabled('track-pdf-download') +export const useClsiCache = isFlagEnabled('fall-back-to-clsi-cache') diff --git a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.js b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.js index cb7433ee52..3bebc980ae 100644 --- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.js +++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.js @@ -1,13 +1,14 @@ import OError from '@overleaf/o-error' import { fallbackRequest, fetchRange } from './pdf-caching' import { captureException } from '@/infrastructure/error-reporter' -import { getPdfCachingMetrics } from './metrics' +import { EDITOR_SESSION_ID, getPdfCachingMetrics } from './metrics' import { cachedUrlLookupEnabled, enablePdfCaching, prefetchingEnabled, prefetchLargeEnabled, trackPdfDownloadEnabled, + useClsiCache, } from './pdf-caching-flags' import { isNetworkError } from '@/utils/is-network-error' import { debugConsole } from '@/utils/debugging' @@ -50,12 +51,20 @@ export function generatePdfCachingTransportFactory() { constructor({ url, pdfFile, abortController, handleFetchError }) { super(pdfFile.size, new Uint8Array()) this.url = url + pdfFile.ranges = pdfFile.ranges || [] + pdfFile.editorId = pdfFile.editorId || EDITOR_SESSION_ID this.pdfFile = pdfFile // Clone the chunks as the objectId field is encoded to a Uint8Array. - this.pdfRanges = pdfFile.ranges.map(r => Object.assign({}, r)) + this.leanPdfRanges = pdfFile.ranges.map(r => Object.assign({}, r)) this.handleFetchError = handleFetchError this.abortController = abortController this.startTime = performance.now() + + const params = new URL(url).searchParams + // drop no needed params + params.delete('enable_pdf_caching') + params.delete('verify_chunks') + this.queryForChunks = params.toString() } abort() { @@ -67,7 +76,7 @@ export function generatePdfCachingTransportFactory() { const getDebugInfo = () => ({ // Sentry does not serialize objects in twice nested objects. // Move the ranges to the root level to see them in Sentry. - pdfRanges: this.pdfRanges, + pdfRanges: this.leanPdfRanges, pdfFile: Object.assign({}, this.pdfFile, { ranges: '[extracted]', // Hide prefetched chunks as these include binary blobs. @@ -83,7 +92,7 @@ export function generatePdfCachingTransportFactory() { performance.now() - this.startTime > STALE_OUTPUT_REQUEST_THRESHOLD_MS const is404 = err => OError.getFullInfo(err).statusCode === 404 const isFromOutputPDFRequest = err => - OError.getFullInfo(err).url === this.url + OError.getFullInfo(err).url?.includes?.('/output.pdf') === true // Do not consider "expected 404s" and network errors as pdf caching // failures. @@ -96,11 +105,68 @@ export function generatePdfCachingTransportFactory() { (is404(err) || isNetworkError(err)) && (isStaleOutputRequest() || isFromOutputPDFRequest(err)) + const usesCache = url => { + if (!url) return false + const u = new URL(url) + return ( + u.pathname.endsWith( + `build/${this.pdfFile.editorId}-${this.pdfFile.build}/output/output.pdf` + ) && u.searchParams.get('clsiserverid') === 'cache' + ) + } + const canTryFromCache = err => { + if (!useClsiCache) return false + if (!is404(err)) return false + return !usesCache(OError.getFullInfo(err).url) + } + const getOutputPDFURLFromCache = () => { + if (usesCache(this.url)) return this.url + const u = new URL(this.url) + u.searchParams.set('clsiserverid', 'cache') + u.pathname = u.pathname.replace( + /build\/[a-f0-9-]+\//, + `build/${this.pdfFile.editorId}-${this.pdfFile.build}/` + ) + return u.href + } + const fetchFromCache = async () => { + // Try fetching the chunk from clsi-cache + const url = getOutputPDFURLFromCache() + return fallbackRequest({ + file: this.pdfFile, + url, + start, + end, + abortSignal, + }) + .then(blob => { + // Send the next output.pdf request directly to the cache. + this.url = url + // Only try downloading chunks that were cached previously + this.pdfFile.ranges = this.pdfFile.ranges.filter(r => + cachedUrls.has(r.hash) + ) + return blob + }) + .catch(err => { + throw OError.tag( + new PDFJS.MissingPDFException(), + 'cache-fallback', + { + statusCode: OError.getFullInfo(err).statusCode, + url: OError.getFullInfo(err).url, + err, + } + ) + }) + } + fetchRange({ url: this.url, start, end, file: this.pdfFile, + queryForChunks: this.queryForChunks, metrics, usageScore, cachedUrls, @@ -109,9 +175,11 @@ export function generatePdfCachingTransportFactory() { prefetchLargeEnabled, cachedUrlLookupEnabled, abortSignal, + fallbackToCacheURL: getOutputPDFURLFromCache(), }) .catch(err => { if (abortSignal.aborted) return + if (canTryFromCache(err)) return fetchFromCache() if (isExpectedError(err)) { if (is404(err)) { // A regular pdf-js request would have seen this 404 as well. @@ -140,11 +208,13 @@ export function generatePdfCachingTransportFactory() { }, }) return fallbackRequest({ + file: this.pdfFile, url: this.url, start, end, abortSignal, }).catch(err => { + if (canTryFromCache(err)) return fetchFromCache() if (isExpectedError(err)) { throw OError.tag(new PDFJS.MissingPDFException(), 'fallback', { statusCode: OError.getFullInfo(err).statusCode, diff --git a/services/web/frontend/js/features/pdf-preview/util/pdf-caching.js b/services/web/frontend/js/features/pdf-preview/util/pdf-caching.js index 0e3a40d730..77e7d35a2b 100644 --- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching.js +++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching.js @@ -21,6 +21,28 @@ const CHUNK_USAGE_THRESHOLD_CACHED = 42 // 42 * 0.7^11 < 1, aka we keep stale entries around for 11 compiles. const CHUNK_USAGE_STALE_DECAY_RATE = 0.7 +let cacheFlag = 'default' +// Work around a Chrome bug: https://issues.chromium.org/issues/40542704 +// Multiple simultaneous requests to same URL with Range header cause failure (block backend returns ERR_CACHE_OPERATION_NOT_SUPPORTED) +const CACHE_NO_STORE = 'no-store' + +/** + * @param {string} url + * @param {RequestInit} init + */ +async function fetchWithCacheFallback(url, init) { + try { + return await fetch(url, init) + } catch (err) { + if (init.headers?.has('Range') && init.cache !== CACHE_NO_STORE) { + cacheFlag = CACHE_NO_STORE + init.cache = CACHE_NO_STORE + return await fetch(url, init) + } + throw err + } +} + /** * @param {Object} file */ @@ -470,21 +492,30 @@ export function checkChunkResponse(response, estimatedSize, init) { } } +function getDynamicChunkInit({ file, start, end, signal }) { + // Avoid making range request when downloading the PDF file in full. + const isFullFile = start === 0 && end === file.size + return { + cache: cacheFlag, + headers: new Headers( + isFullFile ? {} : { Range: `bytes=${start}-${end - 1}` } + ), + signal, + } +} + /** * + * @param {Object} file * @param {string} url * @param {number} start * @param {number} end * @param {AbortSignal} abortSignal */ -export async function fallbackRequest({ url, start, end, abortSignal }) { +export async function fallbackRequest({ file, url, start, end, abortSignal }) { try { - const init = { - cache: 'no-store', - headers: { Range: `bytes=${start}-${end - 1}` }, - signal: abortSignal, - } - const response = await fetch(url, init) + const init = getDynamicChunkInit({ file, start, end, signal: abortSignal }) + const response = await fetchWithCacheFallback(url, init) checkChunkResponse(response, end - start, init) return await response.arrayBuffer() } catch (e) { @@ -494,6 +525,7 @@ export async function fallbackRequest({ url, start, end, abortSignal }) { /** * + * @param {Object} file * @param {string} url * @param {number} start * @param {number} end @@ -501,10 +533,24 @@ export async function fallbackRequest({ url, start, end, abortSignal }) { * @param {Uint8Array} actual * @param {AbortSignal} abortSignal */ -async function verifyRange({ url, start, end, metrics, actual, abortSignal }) { +async function verifyRange({ + file, + url, + start, + end, + metrics, + actual, + abortSignal, +}) { let expectedRaw try { - expectedRaw = await fallbackRequest({ url, start, end, abortSignal }) + expectedRaw = await fallbackRequest({ + file, + url, + start, + end, + abortSignal, + }) } catch (error) { throw OError.tag(error, 'cannot verify range', { url, start, end }) } @@ -541,9 +587,11 @@ function skipPrefetched(chunks, prefetched, start, end) { * @param {Object|Object[]} chunk * @param {string} url * @param {RequestInit} init - * @param {Map} cachedUrls + * @param {Map} cachedUrls * @param {Object} metrics * @param {boolean} cachedUrlLookupEnabled + * @param {string} fallbackToCacheURL + * @param {Object} file */ async function fetchChunk({ chunk, @@ -552,18 +600,21 @@ async function fetchChunk({ cachedUrls, metrics, cachedUrlLookupEnabled, + fallbackToCacheURL, + file, }) { const estimatedSize = Array.isArray(chunk) ? estimateSizeOfMultipartResponse(chunk) : chunk.end - chunk.start const oldUrl = cachedUrls.get(chunk.hash) - if (cachedUrlLookupEnabled && chunk.hash && oldUrl && oldUrl !== url) { + if (cachedUrlLookupEnabled && chunk.hash && oldUrl && oldUrl.url !== url) { // When the clsi server id changes, the content id changes too and as a // result all the browser cache keys (aka urls) get invalidated. // We memorize the previous browser cache keys in `cachedUrls`. try { - const response = await fetch(oldUrl, init) + oldUrl.init.signal = init.signal + const response = await fetchWithCacheFallback(oldUrl.url, oldUrl.init) if (response.status === 200) { checkChunkResponse(response, estimatedSize, init) metrics.oldUrlHitCount += 1 @@ -577,10 +628,46 @@ async function fetchChunk({ } catch (e) { // Fallback to the latest url. } + cachedUrls.delete(chunk.hash) // clear cached state + } + let response + try { + response = await fetchWithCacheFallback(url, init) + checkChunkResponse(response, estimatedSize, init) + if (chunk.hash) { + delete init.signal // omit the signal from the cache + cachedUrls.set(chunk.hash, { url, init }) + } + } catch (err1) { + if (chunk.hash) { + cachedUrls.delete(chunk.hash) + } + const isMissing = response?.status === 404 || response?.status === 416 + const hasOthersCached = cachedUrls.size > 0 + if (isMissing && hasOthersCached) { + // Only try downloading chunks that were cached previously + file.ranges = file.ranges.filter(r => cachedUrls.has(r.hash)) + // Try harder at fetching the chunk, fallback to cache + url = fallbackToCacheURL + if (chunk.hash) { + init = getDynamicChunkInit({ + file, + // skip object id prefix + start: chunk.start + chunk.objectId.byteLength, + end: chunk.end, + signal: init.signal, + }) + } + try { + response = await fetchWithCacheFallback(url, init) + checkChunkResponse(response, estimatedSize, init) + } catch (err2) { + throw err1 + } + } else { + throw err1 + } } - const response = await fetch(url, init) - checkChunkResponse(response, estimatedSize, init) - if (chunk.hash) cachedUrls.set(chunk.hash, url) return response } @@ -720,6 +807,7 @@ class Timer { * @param {number} start * @param {number} end * @param {Object} file + * @param {queryForChunks} start * @param {Object} metrics * @param {Map} usageScore * @param {Map} cachedUrls @@ -728,12 +816,14 @@ class Timer { * @param {boolean} prefetchLargeEnabled * @param {boolean} tryOldCachedUrlEnabled * @param {AbortSignal} abortSignal + * @param {string} fallbackToCacheURL */ export async function fetchRange({ url, start, end, file, + queryForChunks, metrics, usageScore, cachedUrls, @@ -742,6 +832,7 @@ export async function fetchRange({ prefetchLargeEnabled, cachedUrlLookupEnabled, abortSignal, + fallbackToCacheURL, }) { const timer = new Timer() timer.startBlockingCompute() @@ -793,7 +884,7 @@ export async function fetchRange({ fetchedCount: 1, fetchedBytes: size, }) - return fallbackRequest({ url, start, end, abortSignal }) + return fallbackRequest({ file, url, start, end, abortSignal }) } if (prefetchingEnabled) { @@ -819,8 +910,8 @@ export async function fetchRange({ chunk: dynamicChunks[0], url, init: { - cache: 'no-store', - headers: { Range: `bytes=${byteRanges}` }, + cache: cacheFlag, + headers: new Headers({ Range: `bytes=${byteRanges}` }), }, }) break @@ -833,8 +924,10 @@ export async function fetchRange({ chunk, url, init: { - cache: 'no-store', - headers: { Range: `bytes=${chunk.start}-${chunk.end - 1}` }, + cache: cacheFlag, + headers: new Headers({ + Range: `bytes=${chunk.start}-${chunk.end - 1}`, + }), }, }) }) @@ -844,17 +937,12 @@ export async function fetchRange({ chunk: dynamicChunks, url, init: { - cache: 'no-store', - headers: { Range: `bytes=${byteRanges}` }, + cache: cacheFlag, + headers: new Headers({ Range: `bytes=${byteRanges}` }), }, }) } - const params = new URL(url).searchParams - // drop no needed params - params.delete('enable_pdf_caching') - params.delete('verify_chunks') - const query = params.toString() // The schema of `url` is https://domain/project/:id/user/:id/build/... for // authenticated and https://domain/project/:id/build/... for // unauthenticated users. Cut it before /build/. @@ -863,7 +951,7 @@ export async function fetchRange({ const requests = chunks .map(chunk => ({ chunk, - url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${query}`, + url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${queryForChunks}`, init: {}, })) .concat(coalescedDynamicChunks) @@ -886,6 +974,8 @@ export async function fetchRange({ cachedUrls, metrics, cachedUrlLookupEnabled, + fallbackToCacheURL, + file, }) timer.startBlockingCompute() const boundary = getMultipartBoundary(response, chunk) diff --git a/services/web/frontend/js/shared/context/local-compile-context.tsx b/services/web/frontend/js/shared/context/local-compile-context.tsx index e673ba237d..4ecb465382 100644 --- a/services/web/frontend/js/shared/context/local-compile-context.tsx +++ b/services/web/frontend/js/shared/context/local-compile-context.tsx @@ -38,12 +38,14 @@ import { useFileTreePathContext } from '@/features/file-tree/contexts/file-tree- import { useUserSettingsContext } from '@/shared/context/user-settings-context' import { useFeatureFlag } from '@/shared/context/split-test-context' import { useEditorManagerContext } from '@/features/ide-react/context/editor-manager-context' +import { getJSON } from '@/infrastructure/fetch-json' import { CompileResponseData } from '../../../../types/compile' import { PdfScrollPosition, usePdfScrollPosition, } from '@/shared/hooks/use-pdf-scroll-position' import { PdfFileDataList } from '@/features/pdf-preview/util/types' +import { isSplitTestEnabled } from '@/utils/splitTestUtils' type PdfFile = Record @@ -116,7 +118,7 @@ export const LocalCompileProvider: FC = ({ children }) => { const { hasPremiumCompile, isProjectOwner } = useEditorContext() const { openDocWithId, openDocs, currentDocument } = useEditorManagerContext() - const { _id: projectId, rootDocId } = useProjectContext() + const { _id: projectId, rootDocId, joinedOnce } = useProjectContext() const { pdfPreviewOpen } = useLayoutContext() @@ -186,6 +188,12 @@ export const LocalCompileProvider: FC = ({ children }) => { // whether the project has been compiled yet const [compiledOnce, setCompiledOnce] = useState(false) + // fetch initial compile response from cache + const [initialCompileFromCache, setInitialCompileFromCache] = useState( + isSplitTestEnabled('initial-compile-from-clsi-cache') + ) + // Compile triggered while fetching the initial compile from cache + const upgradeInitialCompileFromCacheRef = useRef(false) // whether the cache is being cleared const [clearingCache, setClearingCache] = useState(false) @@ -327,13 +335,43 @@ export const LocalCompileProvider: FC = ({ children }) => { setEditedSinceCompileStarted(changedAt > 0) }, [setEditedSinceCompileStarted, changedAt]) + // try to fetch the last compile result after opening the project, potentially before joining the project. + useEffect(() => { + if (initialCompileFromCache) { + setInitialCompileFromCache(false) + setCompiling(true) + setCompiledOnce(true) + getJSON(`/project/${projectId}/output/cached/output.overleaf.json`) + .then((data: any) => { + setCompiling(false) + setData({ + ...data, + options: compiler.defaultOptions, + }) + if (upgradeInitialCompileFromCacheRef.current) { + compilingRef.current = false + compiler.compile() // trigger regular compile + } + }) + .catch(() => { + setCompiling(false) + if (upgradeInitialCompileFromCacheRef.current) { + compilingRef.current = false + compiler.compile() // trigger regular compile + } else { + setCompiledOnce(false) // trigger auto compile + } + }) + } + }, [projectId, initialCompileFromCache, compiler]) + // always compile the PDF once after opening the project, after the doc has loaded useEffect(() => { - if (!compiledOnce && currentDocument) { + if (!compiledOnce && currentDocument && !initialCompileFromCache) { setCompiledOnce(true) compiler.compile({ isAutoCompileOnLoad: true }) } - }, [compiledOnce, currentDocument, compiler]) + }, [compiledOnce, currentDocument, initialCompileFromCache, compiler]) useEffect(() => { setHasShortCompileTimeout( @@ -370,6 +408,7 @@ export const LocalCompileProvider: FC = ({ children }) => { // note: this should _only_ run when `data` changes, // the other dependencies must all be static useEffect(() => { + if (!joinedOnce) return // wait for joinProject, it populates the premium flags. const abortController = new AbortController() const recordedActions = recordedActionsRef.current @@ -519,6 +558,7 @@ export const LocalCompileProvider: FC = ({ children }) => { abortController.abort() } }, [ + joinedOnce, data, alphaProgram, labsProgram, @@ -579,9 +619,10 @@ export const LocalCompileProvider: FC = ({ children }) => { // start a compile manually const startCompile = useCallback( options => { + upgradeInitialCompileFromCacheRef.current = true compiler.compile(options) }, - [compiler] + [compiler, upgradeInitialCompileFromCacheRef] ) // stop a compile manually diff --git a/services/web/frontend/js/shared/context/project-context.tsx b/services/web/frontend/js/shared/context/project-context.tsx index 4e54acaeed..48532e821c 100644 --- a/services/web/frontend/js/shared/context/project-context.tsx +++ b/services/web/frontend/js/shared/context/project-context.tsx @@ -29,6 +29,7 @@ const projectFallback = { export const ProjectProvider: FC = ({ children }) => { const [project] = useScopeValue('project') + const joinedOnce = !!project const { _id, @@ -69,6 +70,7 @@ export const ProjectProvider: FC = ({ children }) => { trackChangesState, mainBibliographyDocId, projectSnapshot, + joinedOnce, } }, [ _id, @@ -84,6 +86,7 @@ export const ProjectProvider: FC = ({ children }) => { trackChangesState, mainBibliographyDocId, projectSnapshot, + joinedOnce, ]) return ( diff --git a/services/web/frontend/js/shared/context/types/project-context.tsx b/services/web/frontend/js/shared/context/types/project-context.tsx index 91419ed06f..aa045d693f 100644 --- a/services/web/frontend/js/shared/context/types/project-context.tsx +++ b/services/web/frontend/js/shared/context/types/project-context.tsx @@ -49,6 +49,7 @@ export type ProjectContextValue = { }[] trackChangesState: boolean | Record projectSnapshot: ProjectSnapshot + joinedOnce: boolean } export type ProjectContextUpdateValue = Partial diff --git a/services/web/test/frontend/components/pdf-preview/pdf-js-viewer.spec.tsx b/services/web/test/frontend/components/pdf-preview/pdf-js-viewer.spec.tsx index 12ba83cb48..fda113ef4a 100644 --- a/services/web/test/frontend/components/pdf-preview/pdf-js-viewer.spec.tsx +++ b/services/web/test/frontend/components/pdf-preview/pdf-js-viewer.spec.tsx @@ -23,7 +23,7 @@ describe('', function () {
    - +
    @@ -68,7 +68,7 @@ describe('', function () {
    - +
    @@ -88,7 +88,7 @@ describe('', function () {
    - +
    diff --git a/services/web/test/frontend/components/pdf-preview/pdf-preview.spec.tsx b/services/web/test/frontend/components/pdf-preview/pdf-preview.spec.tsx index 245009de74..3f5222f0e0 100644 --- a/services/web/test/frontend/components/pdf-preview/pdf-preview.spec.tsx +++ b/services/web/test/frontend/components/pdf-preview/pdf-preview.spec.tsx @@ -34,12 +34,15 @@ describe('', function () { 'ol-compilesUserContentDomain', 'https://compiles-user.dev-overleaf.com' ) + window.metaAttributesCache.set('ol-splitTestVariants', { + 'initial-compile-from-clsi-cache': 'enabled', + }) cy.interceptEvents() }) it('renders the PDF preview', function () { window.metaAttributesCache.set('ol-preventCompileOnLoad', false) - cy.interceptCompile('compile') + cy.interceptCompile() const scope = mockScope() @@ -57,6 +60,58 @@ describe('', function () { cy.findByRole('button', { name: 'Recompile' }) }) + it('uses the cache when available', function () { + cy.interceptCompile({ prefix: 'compile', times: 1, cached: true }) + + const scope = mockScope() + + cy.mount( + +
    + +
    +
    + ) + + // wait for "compile from cache on load" to finish + cy.waitForCompile({ pdf: true, cached: true }) + + cy.contains('Your Paper') + }) + + it('uses the cache when available then compiles', function () { + cy.interceptCompile({ prefix: 'compile', times: 1, cached: true }) + + const scope = mockScope() + + cy.mount( + +
    + +
    +
    + ) + + // wait for "compile from cache on load" to finish + cy.waitForCompile({ pdf: true, cached: true }) + cy.contains('Your Paper') + + // Then trigger a new compile + cy.interceptCompile({ + prefix: 'recompile', + times: 1, + cached: false, + outputPDFFixture: 'output-2.pdf', + }) + + // press the Recompile button => compile + cy.findByRole('button', { name: 'Recompile' }).click() + + // wait for compile to finish + cy.waitForCompile({ prefix: 'recompile', pdf: true }) + cy.contains('Modern Authoring Tools for Science') + }) + it('runs a compile when the Recompile button is pressed', function () { cy.interceptCompile() @@ -479,7 +534,7 @@ describe('', function () { cy.findByRole('button', { name: 'Recompile' }).click() cy.waitForCompile({ pdf: true }) - cy.interceptCompile('recompile') + cy.interceptCompile({ prefix: 'recompile' }) cy.intercept('DELETE', '/project/*/output*', { statusCode: 204, diff --git a/services/web/test/frontend/features/source-editor/components/codemirror-editor-table-generator.spec.tsx b/services/web/test/frontend/features/source-editor/components/codemirror-editor-table-generator.spec.tsx index b391a85222..25b205ed4d 100644 --- a/services/web/test/frontend/features/source-editor/components/codemirror-editor-table-generator.spec.tsx +++ b/services/web/test/frontend/features/source-editor/components/codemirror-editor-table-generator.spec.tsx @@ -108,7 +108,7 @@ describe(' Table editor', function () { cy.interceptEvents() cy.interceptMathJax() - cy.interceptCompile('compile', Number.MAX_SAFE_INTEGER) + cy.interceptCompile({ prefix: 'compile', times: Number.MAX_SAFE_INTEGER }) cy.intercept('/project/*/doc/*/metadata', { body: {} }) window.metaAttributesCache.set('ol-preventCompileOnLoad', true) }) diff --git a/services/web/types/compile.ts b/services/web/types/compile.ts index 11767f36ef..541d03149c 100644 --- a/services/web/types/compile.ts +++ b/services/web/types/compile.ts @@ -1,6 +1,7 @@ export type CompileOutputFile = { path: string url: string + downloadURL?: string type: string build: string ranges?: {