diff --git a/services/clsi/app/js/ContentCacheManager.js b/services/clsi/app/js/ContentCacheManager.js index 758848d710..44288077bd 100644 --- a/services/clsi/app/js/ContentCacheManager.js +++ b/services/clsi/app/js/ContentCacheManager.js @@ -49,6 +49,14 @@ if (Settings.pdfCachingEnableWorkerPool && workerpool.isMainThread) { * @param {number} compileTime */ async function update(contentDir, filePath, size, compileTime) { + if (size < Settings.pdfCachingMinChunkSize) { + return { + contentRanges: [], + newContentRanges: [], + reclaimedSpace: 0, + startXRefTable: undefined, + } + } if (Settings.pdfCachingEnableWorkerPool) { return await updateOtherEventLoop(contentDir, filePath, size, compileTime) } else { @@ -64,18 +72,21 @@ async function update(contentDir, filePath, size, compileTime) { * @param {number} compileTime */ async function updateOtherEventLoop(contentDir, filePath, size, compileTime) { - const timeout = getMaxOverhead(compileTime) + const workerLatencyInMs = 20 + // Prefer getting the timeout error from the worker vs timing out the worker. + const timeout = getMaxOverhead(compileTime) + workerLatencyInMs try { - return await WORKER_POOL.exec('doUpdateInternalNoDeadline', [ + return await WORKER_POOL.exec('updateSameEventLoop', [ contentDir, filePath, size, + compileTime, ]).timeout(timeout) } catch (e) { if (e instanceof workerpool.Promise.TimeoutError) { - throw new TimedOutError('context-lost-in-worker') + throw new TimedOutError('context-lost-in-worker', { timeout }) } - if (e.message.includes('Max queue size of ')) { + if (e.message?.includes?.('Max queue size of ')) { throw new QueueLimitReachedError() } throw e @@ -91,28 +102,8 @@ async function updateOtherEventLoop(contentDir, filePath, size, compileTime) { */ async function updateSameEventLoop(contentDir, filePath, size, compileTime) { const checkDeadline = getDeadlineChecker(compileTime) - return doUpdateInternal(contentDir, filePath, size, checkDeadline) -} - -/** - * - * @param {String} contentDir path to directory where content hash files are cached - * @param {String} filePath the pdf file to scan for streams - * @param {number} size the pdf size - */ -async function doUpdateInternalNoDeadline(contentDir, filePath, size) { - return doUpdateInternal(contentDir, filePath, size, () => {}) -} -/** - * - * @param {String} contentDir path to directory where content hash files are cached - * @param {String} filePath the pdf file to scan for streams - * @param {number} size the pdf size - * @param {function} checkDeadline - */ -async function doUpdateInternal(contentDir, filePath, size, checkDeadline) { - const ranges = [] - const newRanges = [] + const contentRanges = [] + const newContentRanges = [] // keep track of hashes expire old ones when they reach a generation > N. const tracker = await HashFileTracker.from(contentDir) tracker.updateAge() @@ -191,14 +182,14 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) { end: object.endOffset, hash, } - ranges.push(range) + contentRanges.push(range) // Optimization: Skip writing of duplicate streams. if (tracker.track(range)) continue await writePdfStream(contentDir, hash, buffer) checkDeadline('after write ' + idx) - newRanges.push(range) + newContentRanges.push(range) } } finally { await handle.close() @@ -208,7 +199,7 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) { // Let the next compile use the already written ranges. const reclaimedSpace = await tracker.deleteStaleHashes(5) await tracker.flush() - return [ranges, newRanges, reclaimedSpace, startXRefTable] + return { contentRanges, newContentRanges, reclaimedSpace, startXRefTable } } function getStatePath(contentDir) { @@ -334,15 +325,16 @@ function getMaxOverhead(compileTime) { } function getDeadlineChecker(compileTime) { - const maxOverhead = getMaxOverhead(compileTime) + const timeout = getMaxOverhead(compileTime) - const deadline = Date.now() + maxOverhead + const deadline = Date.now() + timeout let lastStage = { stage: 'start', now: Date.now() } let completedStages = 0 return function (stage) { const now = Date.now() if (now > deadline) { throw new TimedOutError(stage, { + timeout, completedStages, lastStage: lastStage.stage, diffToLastStage: now - lastStage.now, @@ -363,6 +355,6 @@ module.exports = { update: callbackify(update), promises: { update, - doUpdateInternalNoDeadline, + updateSameEventLoop, }, } diff --git a/services/clsi/app/js/OutputCacheManager.js b/services/clsi/app/js/OutputCacheManager.js index 2e1448b836..c92a119d2a 100644 --- a/services/clsi/app/js/OutputCacheManager.js +++ b/services/clsi/app/js/OutputCacheManager.js @@ -411,12 +411,12 @@ module.exports = OutputCacheManager = { return callback(null, 'timed-out') } if (err) return callback(err, 'failed') - const [ + const { contentRanges, newContentRanges, reclaimedSpace, startXRefTable, - ] = result + } = result if (enablePdfCachingDark) { // In dark mode we are doing the computation only and do not emit diff --git a/services/clsi/app/lib/pdfjs/parseXrefTable.js b/services/clsi/app/lib/pdfjs/parseXrefTable.js index 6030afc6da..f24788f63b 100644 --- a/services/clsi/app/lib/pdfjs/parseXrefTable.js +++ b/services/clsi/app/lib/pdfjs/parseXrefTable.js @@ -16,7 +16,7 @@ async function parseXrefTable(path, size, checkDeadline) { checkDeadline('pdfjs: after parseStartXRef') await manager.ensureDoc('parse') checkDeadline('pdfjs: after parse') - const xRefEntries = manager.pdfDocument.xref.entries + const xRefEntries = manager.pdfDocument.xref.entries || [] const startXRefTable = manager.pdfDocument.xref.topDict?.get('Prev') return { xRefEntries, startXRefTable } } finally { diff --git a/services/clsi/test/unit/js/ContentCacheManagerTests.js b/services/clsi/test/unit/js/ContentCacheManagerTests.js index bfb7179587..a30fe76758 100644 --- a/services/clsi/test/unit/js/ContentCacheManagerTests.js +++ b/services/clsi/test/unit/js/ContentCacheManagerTests.js @@ -19,7 +19,11 @@ describe('ContentCacheManager', function () { size ) let newlyReclaimed - ;[contentRanges, newContentRanges, newlyReclaimed] = result + ;({ + contentRanges, + newContentRanges, + reclaimedSpace: newlyReclaimed, + } = result) reclaimed += newlyReclaimed const fileNames = await fs.promises.readdir(contentDir)