Merge pull request #27928 from overleaf/dp-pdf-caching-typescript-2

Convert pdf-caching file to typescript GitOrigin-RevId: 9acd2fc0697490008d82abfad0994df362814bad
2026-05-29 12:01:32 +02:00 · 2025-09-08 15:26:51 +01:00
parent 0f8b9d611d
commit e4574effbf
5 changed files with 408 additions and 314 deletions
--- a/services/web/frontend/js/features/pdf-preview/util/metrics.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/metrics.ts
@@ -2,7 +2,7 @@ import { v4 as uuid } from 'uuid'
 import { sendMB } from '../../../infrastructure/event-tracking'
 import { trackPdfDownloadEnabled } from './pdf-caching-flags'
 import { debugConsole } from '@/utils/debugging'
-import { DeliveryLatencies } from './types'
+import { DeliveryLatencies, PdfCachingMetrics } from './types'
 import { CompileResponseData } from '@ol-types/compile'

 // VERSION should get incremented when making changes to caching behavior or
@@ -12,10 +12,6 @@ const VERSION = 9
 // editing session id
 export const EDITOR_SESSION_ID = uuid()

-type PdfCachingMetrics = {
-  viewerId: string
-}
-
 const pdfCachingMetrics: PdfCachingMetrics = {
  viewerId: EDITOR_SESSION_ID,
 }
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching-transport.ts
@@ -1,5 +1,5 @@
 import OError from '@overleaf/o-error'
-import { fallbackRequest, fetchRange } from './pdf-caching'
+import { fallbackRequest, fetchRange, preprocessFileOnce } from './pdf-caching'
 import { captureException } from '@/infrastructure/error-reporter'
 import { EDITOR_SESSION_ID, getPdfCachingMetrics } from './metrics'
 import {
@@ -15,7 +15,8 @@ import { debugConsole } from '@/utils/debugging'
 import { PDFJS } from './pdf-js'
 import { sendMB } from '@/infrastructure/event-tracking'
 import getMeta from '@/utils/meta'
-import { PDFFile, PDFRange } from '@ol-types/compile'
+import { PDFFile, PDFRange, ProcessedPDFFile } from '@ol-types/compile'
+import { PdfCachingMetricsFull } from './types'

 // 30 seconds: The shutdown grace period of a clsi pre-emp instance.
 const STALE_OUTPUT_REQUEST_THRESHOLD_MS = 30 * 1000
@@ -28,7 +29,7 @@ export function generatePdfCachingTransportFactory() {
  const projectId = getMeta('ol-project_id')
  const usageScore = new Map()
  const cachedUrls = new Map()
-  const metrics = Object.assign(getPdfCachingMetrics(), {
+  const metrics: PdfCachingMetricsFull = Object.assign(getPdfCachingMetrics(), {
    failedCount: 0,
    failedOnce: false,
    tooMuchBandwidthCount: 0,
@@ -53,7 +54,7 @@ export function generatePdfCachingTransportFactory() {

  class PDFDataRangeTransport extends PDFJS.PDFDataRangeTransport {
    url: string
-    pdfFile: PDFFile
+    pdfFile: ProcessedPDFFile
    abortController: AbortController
    leanPdfRanges: PDFRange[]
    handleFetchError: (error: any) => void
@@ -76,7 +77,13 @@ export function generatePdfCachingTransportFactory() {
      this.url = url
      pdfFile.ranges = pdfFile.ranges || []
      pdfFile.editorId = pdfFile.editorId || EDITOR_SESSION_ID
-      this.pdfFile = pdfFile
+      preprocessFileOnce({
+        file: pdfFile,
+        usageScore,
+        cachedUrls,
+      })
+      // We can safely cast as preprocessFileOnce mutates the file object into a ProcessedPDFFile
+      this.pdfFile = pdfFile as unknown as ProcessedPDFFile
      // Clone the chunks as the objectId field is encoded to a Uint8Array.
      this.leanPdfRanges = pdfFile.ranges.map(r => Object.assign({}, r))
      this.handleFetchError = handleFetchError
@@ -192,8 +199,6 @@ export function generatePdfCachingTransportFactory() {
          })
      }

-      // @ts-ignore is incorrectly inferring the type of fetchRange.
-      // Remove this when we convert pdf-caching.js to typescript.
      fetchRange({
        url: this.url,
        start,
@@ -248,6 +253,7 @@ export function generatePdfCachingTransportFactory() {
              isFromOutputPDFRequest: isFromOutputPDFRequest(err),
            },
          })
+
          return fallbackRequest({
            file: this.pdfFile,
            url: this.url,
--- a/services/web/frontend/js/features/pdf-preview/util/pdf-caching.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/pdf-caching.ts
@@ -1,4 +1,12 @@
+import {
+  Chunk,
+  PartiallyProcessedPDFFile,
+  PDFRange,
+  PrefetchedChunk,
+  ProcessedPDFFile,
+} from '@ol-types/compile'
 import OError from '@overleaf/o-error'
+import { PdfCachingMetricsFull } from './types'

 const PDF_JS_CHUNK_SIZE = 128 * 1024
 const MAX_SUB_REQUEST_COUNT = 4
@@ -21,20 +29,19 @@ const CHUNK_USAGE_THRESHOLD_CACHED = 42
 // 42 * 0.7^11 < 1, aka we keep stale entries around for 11 compiles.
 const CHUNK_USAGE_STALE_DECAY_RATE = 0.7

-let cacheFlag = 'default'
+let cacheFlag: RequestCache = 'default'
 // Work around a Chrome bug: https://issues.chromium.org/issues/40542704
 // Multiple simultaneous requests to same URL with Range header cause failure (block backend returns ERR_CACHE_OPERATION_NOT_SUPPORTED)
 const CACHE_NO_STORE = 'no-store'

-/**
- * @param {string} url
- * @param {RequestInit} init
- */
-async function fetchWithBrowserCacheFallback(url, init) {
+async function fetchWithBrowserCacheFallback(url: string, init: RequestInit) {
  try {
    return await fetch(url, init)
  } catch (err) {
-    if (init.headers?.has('Range') && init.cache !== CACHE_NO_STORE) {
+    if (
+      (init.headers as Headers | undefined)?.has('Range') &&
+      init.cache !== CACHE_NO_STORE
+    ) {
      cacheFlag = CACHE_NO_STORE
      init.cache = CACHE_NO_STORE
      return await fetch(url, init)
@@ -43,30 +50,29 @@ async function fetchWithBrowserCacheFallback(url, init) {
  }
 }

-/**
- * @param {Object} file
- */
-function backfillEdgeBounds(file) {
+function backfillEdgeBounds(file: PartiallyProcessedPDFFile) {
  const encoder = new TextEncoder()
  for (const chunk of file.ranges) {
    if (chunk.objectId) {
-      chunk.objectId = encoder.encode(chunk.objectId)
+      chunk.objectId = encoder.encode(chunk.objectId as string)
      chunk.start -= chunk.objectId.byteLength
      chunk.size = chunk.end - chunk.start
    }
  }
 }

-/**
- * @param {Map} usageScore
- * @param {Map} cachedUrls
- */
-function trimState({ usageScore, cachedUrls }) {
-  for (const hash of usageScore) {
+function trimState({
+  usageScore,
+  cachedUrls,
+}: {
+  usageScore: Map<string, number>
+  cachedUrls: Map<string, { url: string; init: RequestInit }>
+}) {
+  for (const [hash, score] of usageScore) {
    if (usageScore.size < INCREMENTAL_CACHE_SIZE) {
      break
    }
-    const score = usageScore.get(hash)
+
    if (score >= CHUNK_USAGE_THRESHOLD_TRIGGER_PREFERRED) {
      // Keep entries that are worth caching around for longer.
      usageScore.set(hash, score * CHUNK_USAGE_STALE_DECAY_RATE)
@@ -77,25 +83,25 @@ function trimState({ usageScore, cachedUrls }) {
  }
 }

-/**
- * @param {Object} file
- * @param {Map} usageScore
- * @param {Map} cachedUrls
- */
-function preprocessFileOnce({ file, usageScore, cachedUrls }) {
-  if (file.preprocessed) return
+export function preprocessFileOnce({
+  file,
+  usageScore,
+  cachedUrls,
+}: {
+  file: PartiallyProcessedPDFFile
+  usageScore: Map<string, number>
+  cachedUrls: Map<string, { url: string; init: RequestInit }>
+}) {
+  if ('preprocessed' in file && file.preprocessed) return file
  file.preprocessed = true

-  file.createdAt = new Date(file.createdAt)
+  file.createdAt = new Date(file.createdAt || '')
  file.prefetched = file.prefetched || []
  trimState({ usageScore, cachedUrls })
  backfillEdgeBounds(file)
 }

-/**
- * @param {Array} chunks
- */
-export function estimateSizeOfMultipartResponse(chunks) {
+function estimateSizeOfMultipartResponse(chunks: Chunk[]) {
  /*
  --boundary
  HEADER
@@ -116,18 +122,21 @@ export function estimateSizeOfMultipartResponse(chunks) {
  )
 }

-/**
- *
- * @param {Object} metrics
- * @param {number} size
- * @param {number} cachedCount
- * @param {number} cachedBytes
- * @param {number} fetchedCount
- * @param {number} fetchedBytes
- */
 function trackDownloadStats(
-  metrics,
-  { size, cachedCount, cachedBytes, fetchedCount, fetchedBytes }
+  metrics: PdfCachingMetricsFull,
+  {
+    size,
+    cachedCount,
+    cachedBytes,
+    fetchedCount,
+    fetchedBytes,
+  }: {
+    size: number
+    cachedCount: number
+    cachedBytes: number
+    fetchedCount: number
+    fetchedBytes: number
+  }
 ) {
  metrics.cachedCount += cachedCount
  metrics.cachedBytes += cachedBytes
@@ -137,34 +146,34 @@ function trackDownloadStats(
  metrics.requestedBytes += size
 }

-/**
- * @param {Object} metrics
- * @param {boolean} sizeDiffers
- * @param {boolean} mismatch
- * @param {boolean} success
- */
-function trackChunkVerify(metrics, { sizeDiffers, mismatch, success }) {
+function trackChunkVerify(
+  metrics: PdfCachingMetricsFull,
+  {
+    sizeDiffers,
+    mismatch,
+    success,
+  }: {
+    sizeDiffers: boolean
+    mismatch: boolean
+    success: boolean
+  }
+) {
  if (sizeDiffers) {
-    metrics.chunkVerifySizeDiffers |= 0
-    metrics.chunkVerifySizeDiffers += 1
+    incrementMetric(metrics, 'chunkVerifySizeDiffers')
  }
  if (mismatch) {
-    metrics.chunkVerifyMismatch |= 0
-    metrics.chunkVerifyMismatch += 1
+    incrementMetric(metrics, 'chunkVerifyMismatch')
  }
  if (success) {
-    metrics.chunkVerifySuccess |= 0
-    metrics.chunkVerifySuccess += 1
+    incrementMetric(metrics, 'chunkVerifySuccess')
  }
 }

-/**
- * @param chunk
- * @param {ArrayBuffer} arrayBuffer
- * @return {Uint8Array}
- */
-function backFillObjectContext(chunk, arrayBuffer) {
-  if (!chunk.objectId) {
+function backFillObjectContext(
+  chunk: Chunk | Chunk[] | PDFRange<Uint8Array>,
+  arrayBuffer: ArrayBuffer
+) {
+  if (!('objectId' in chunk)) {
    // This is a dynamic chunk
    return new Uint8Array(arrayBuffer)
  }
@@ -185,14 +194,12 @@ function backFillObjectContext(chunk, arrayBuffer) {
  return fullBuffer
 }

-/**
- * @param {Array} chunks
- * @param {number} start
- * @param {number} end
- * @returns {Array}
- */
-function getMatchingChunks(chunks, start, end) {
-  const matchingChunks = []
+function getMatchingChunks<ChunkType extends Chunk>(
+  chunks: ChunkType[],
+  start: number,
+  end: number
+) {
+  const matchingChunks: ChunkType[] = []
  for (const chunk of chunks) {
    if (chunk.end <= start) {
      // no overlap:
@@ -211,39 +218,19 @@ function getMatchingChunks(chunks, start, end) {
  return matchingChunks
 }

-/**
- * @param {Object} a
- * @param {Object} b
- */
-function sortBySizeDESC(a, b) {
+function sortBySizeDESC(a: { size: number }, b: { size: number }) {
  return a.size > b.size ? -1 : 1
 }

-/**
- * @param {Object} a
- * @param {Object} b
- */
-function sortByStartASC(a, b) {
+function sortByStartASC(a: { start: number }, b: { start: number }) {
  return a.start > b.start ? 1 : -1
 }

-/**
- * @param {Object} chunk
- */
-function usageAboveThreshold(chunk) {
+function usageAboveThreshold(chunk: PDFRange) {
  // We fetched enough shards of this chunk. Cache it in full now.
  return chunk.totalUsage > CHUNK_USAGE_THRESHOLD_TRIGGER_PREFERRED
 }

-/**
- * @param {Array} potentialChunks
- * @param {Map} usageScore
- * @param {Map} cachedUrls
- * @param {Object} metrics
- * @param {number} start
- * @param {number} end
- * @param {boolean} prefetchLargeEnabled
- */
 function cutRequestAmplification({
  potentialChunks,
  usageScore,
@@ -251,12 +238,19 @@ function cutRequestAmplification({
  start,
  end,
  prefetchLargeEnabled,
+}: {
+  potentialChunks: PDFRange[]
+  usageScore: Map<string, number>
+  metrics: PdfCachingMetricsFull
+  start: number
+  end: number
+  prefetchLargeEnabled: boolean
 }) {
  // NOTE: Map keys are stored in insertion order.
  // We re-insert keys on cache hit and turn 'usageScore' into a cheap LRU.

-  const chunks = []
-  const skipAlreadyAdded = chunk => !chunks.includes(chunk)
+  const chunks: PDFRange[] = []
+  const skipAlreadyAdded = (chunk: PDFRange) => !chunks.includes(chunk)
  let tooManyRequests = false
  let tooMuchBandwidth = false
  let newChunks = 0
@@ -332,14 +326,12 @@ function cutRequestAmplification({
  return chunks
 }

-/**
- * @param {Array} chunks
- * @param {number} start
- * @param {number} end
- * @returns {Array}
- */
-function getInterleavingDynamicChunks(chunks, start, end) {
-  const dynamicChunks = []
+function getInterleavingDynamicChunks(
+  chunks: Chunk[],
+  start: number,
+  end: number
+) {
+  const dynamicChunks: Chunk[] = []
  for (const chunk of chunks) {
    if (start < chunk.start) {
      dynamicChunks.push({ start, end: chunk.start })
@@ -353,36 +345,24 @@ function getInterleavingDynamicChunks(chunks, start, end) {
  return dynamicChunks
 }

-/**
- *
- * @param {Response} response
- */
-function getServerTime(response) {
+function getServerTime(response: Response) {
  const raw = response.headers.get('Date')
  if (!raw) return new Date()
  return new Date(raw)
 }

-/**
- *
- * @param {Response} response
- */
-function getResponseSize(response) {
+function getResponseSize(response: Response) {
  const raw = response.headers.get('Content-Length')
  if (!raw) return 0
  return parseInt(raw, 10)
 }

-/**
- *
- * @param {Response} response
- * @param chunk
- */
-export function getMultipartBoundary(response, chunk) {
+function getMultipartBoundary(response: Response, chunk: Chunk | Chunk[]) {
  if (!Array.isArray(chunk)) return ''

  const raw = response.headers.get('Content-Type')
-  if (raw.includes('multipart/byteranges')) {
+
+  if (raw?.includes('multipart/byteranges')) {
    const idx = raw.indexOf('boundary=')
    if (idx !== -1) return raw.slice(idx + 'boundary='.length)
  }
@@ -393,32 +373,45 @@ export function getMultipartBoundary(response, chunk) {
  })
 }

-/**
- * @param {string} boundary
- * @param {number} start
- * @param {number} end
- * @param {number} size
- * @return {string}
- */
-function composeMultipartHeader({ boundary, start, end, size }) {
+function composeMultipartHeader({
+  boundary,
+  start,
+  end,
+  size,
+}: {
+  boundary: string
+  start: number
+  end: number
+  size: number
+}) {
  return `\r\n--${boundary}\r\nContent-Type: application/pdf\r\nContent-Range: bytes ${start}-${
    end - 1
  }/${size}\r\n\r\n`
 }

-/**
- * @param {Object} file
- * @param {Array} chunks
- * @param {Uint8Array} data
- * @param {string} boundary
- * @param {Object} metrics
- */
-export function resolveMultiPartResponses({
+function incrementMetric(
+  metrics: PdfCachingMetricsFull,
+  key: keyof PdfCachingMetricsFull
+) {
+  if (key in metrics) {
+    metrics[key]++
+  } else {
+    ;(metrics[key] as number) = 1
+  }
+}
+
+function resolveMultiPartResponses({
  file,
  chunks,
  data,
  boundary,
  metrics,
+}: {
+  file: ProcessedPDFFile
+  chunks: Chunk[]
+  data: Uint8Array
+  boundary: string
+  metrics: PdfCachingMetricsFull
 }) {
  const responses = []
  let offsetStart = 0
@@ -439,8 +432,7 @@ export function resolveMultiPartResponses({
        .subarray(offsetStart, offsetStart + headerSize)
        .every((v, idx) => v === headerRaw[idx])
    ) {
-      metrics.headerVerifyFailure |= 0
-      metrics.headerVerifyFailure++
+      incrementMetric(metrics, 'headerVerifyFailure')
      throw new OError('multipart response header does not match', {
        actual: new TextDecoder().decode(
          data.subarray(offsetStart, offsetStart + headerSize)
@@ -457,16 +449,15 @@ export function resolveMultiPartResponses({
    })
    offsetStart += chunkSize
  }
+
  return responses
 }

-/**
- *
- * @param {Response} response
- * @param {number} estimatedSize
- * @param {RequestInit} init
- */
-export function checkChunkResponse(response, estimatedSize, init) {
+function checkChunkResponse(
+  response: Response,
+  estimatedSize: number,
+  init: RequestInit
+) {
  if (!(response.status === 206 || response.status === 200)) {
    throw new OError('non successful response status: ' + response.status, {
      statusCode: response.status,
@@ -491,7 +482,17 @@ export function checkChunkResponse(response, estimatedSize, init) {
  }
 }

-function getDynamicChunkInit({ file, start, end, signal }) {
+function getDynamicChunkInit({
+  file,
+  start,
+  end,
+  signal,
+}: {
+  file: ProcessedPDFFile
+  start: number
+  end: number
+  signal?: AbortSignal | null
+}): RequestInit {
  // Avoid making range request when downloading the PDF file in full.
  const isFullFile = start === 0 && end === file.size
  return {
@@ -503,15 +504,19 @@ function getDynamicChunkInit({ file, start, end, signal }) {
  }
 }

-/**
- *
- * @param {Object} file
- * @param {string} url
- * @param {number} start
- * @param {number} end
- * @param {AbortSignal} abortSignal
- */
-export async function fallbackRequest({ file, url, start, end, abortSignal }) {
+export async function fallbackRequest({
+  file,
+  url,
+  start,
+  end,
+  abortSignal,
+}: {
+  file: ProcessedPDFFile
+  url: string
+  start: number
+  end: number
+  abortSignal: AbortSignal
+}) {
  try {
    const init = getDynamicChunkInit({ file, start, end, signal: abortSignal })
    const response = await fetchWithBrowserCacheFallback(url, init)
@@ -522,16 +527,6 @@ export async function fallbackRequest({ file, url, start, end, abortSignal }) {
  }
 }

-/**
- *
- * @param {Object} file
- * @param {string} url
- * @param {number} start
- * @param {number} end
- * @param {Object} metrics
- * @param {Uint8Array} actual
- * @param {AbortSignal} abortSignal
- */
 async function verifyRange({
  file,
  url,
@@ -540,6 +535,14 @@ async function verifyRange({
  metrics,
  actual,
  abortSignal,
+}: {
+  file: ProcessedPDFFile
+  url: string
+  start: number
+  end: number
+  metrics: PdfCachingMetricsFull
+  actual: Uint8Array
+  abortSignal: AbortSignal
 }) {
  let expectedRaw
  try {
@@ -554,7 +557,11 @@ async function verifyRange({
    throw OError.tag(error, 'cannot verify range', { url, start, end })
  }
  const expected = new Uint8Array(expectedRaw)
-  const stats = {}
+  const stats = {
+    sizeDiffers: false,
+    mismatch: false,
+    success: false,
+  }
  if (actual.byteLength !== expected.byteLength) {
    stats.sizeDiffers = true
  } else if (!expected.every((v, idx) => v === actual[idx])) {
@@ -566,13 +573,12 @@ async function verifyRange({
  return expected
 }

-/**
- * @param {Array} chunks
- * @param {Array} prefetched
- * @param {number} start
- * @param {number} end
- */
-function skipPrefetched(chunks, prefetched, start, end) {
+function skipPrefetched<ChunkType extends Chunk>(
+  chunks: ChunkType[],
+  prefetched: PDFRange[],
+  start: number,
+  end: number
+): ChunkType[] {
  return chunks.filter(chunk => {
    return !prefetched.find(
      c =>
@@ -582,18 +588,6 @@ function skipPrefetched(chunks, prefetched, start, end) {
  })
 }

-/**
- * @param {Object|Object[]} chunk
- * @param {string} url
- * @param {RequestInit} init
- * @param {Map<string, {url:string,init:RequestInit}>} cachedUrls
- * @param {Object} metrics
- * @param {boolean} cachedUrlLookupEnabled
- * @param {() => boolean} canTryFromCache
- * @param {string} fallbackToCacheURL
- * @param {Object} file
- * @param {() => void} recordFallbackToClsiCache
- */
 async function fetchChunk({
  chunk,
  url,
@@ -605,47 +599,61 @@ async function fetchChunk({
  fallbackToCacheURL,
  file,
  recordFallbackToClsiCache,
+}: {
+  chunk: Chunk | PDFRange<Uint8Array> | Chunk[]
+  url: string
+  init: RequestInit
+  cachedUrls: Map<string, { url: string; init: RequestInit }>
+  metrics: PdfCachingMetricsFull
+  cachedUrlLookupEnabled: boolean
+  canTryFromCache: (error: any) => boolean
+  fallbackToCacheURL: string
+  file: ProcessedPDFFile
+  recordFallbackToClsiCache: () => void
 }) {
  const estimatedSize = Array.isArray(chunk)
    ? estimateSizeOfMultipartResponse(chunk)
    : chunk.end - chunk.start

-  const oldUrl = cachedUrls.get(chunk.hash)
-  if (cachedUrlLookupEnabled && chunk.hash && oldUrl && oldUrl.url !== url) {
-    // When the clsi server id changes, the content id changes too and as a
-    //  result all the browser cache keys (aka urls) get invalidated.
-    // We memorize the previous browser cache keys in `cachedUrls`.
-    try {
-      oldUrl.init.signal = init.signal
-      const response = await fetchWithBrowserCacheFallback(
-        oldUrl.url,
-        oldUrl.init
-      )
-      if (response.status === 200) {
-        checkChunkResponse(response, estimatedSize, init)
-        metrics.oldUrlHitCount += 1
-        return response
+  if ('hash' in chunk) {
+    const oldUrl = cachedUrls.get(chunk.hash)
+    if (cachedUrlLookupEnabled && chunk.hash && oldUrl && oldUrl.url !== url) {
+      // When the clsi server id changes, the content id changes too and as a
+      //  result all the browser cache keys (aka urls) get invalidated.
+      // We memorize the previous browser cache keys in `cachedUrls`.
+      try {
+        oldUrl.init.signal = init.signal
+        const response = await fetchWithBrowserCacheFallback(
+          oldUrl.url,
+          oldUrl.init
+        )
+        if (response.status === 200) {
+          checkChunkResponse(response, estimatedSize, init)
+          metrics.oldUrlHitCount += 1
+          return response
+        }
+        if (response.status === 404) {
+          // The old browser cache entry is gone and the old file is gone too.
+          metrics.oldUrlMissCount += 1
+        }
+        // Fallback to the latest url.
+      } catch (e) {
+        // Fallback to the latest url.
      }
-      if (response.status === 404) {
-        // The old browser cache entry is gone and the old file is gone too.
-        metrics.oldUrlMissCount += 1
-      }
-      // Fallback to the latest url.
-    } catch (e) {
-      // Fallback to the latest url.
+      cachedUrls.delete(chunk.hash) // clear cached state
    }
-    cachedUrls.delete(chunk.hash) // clear cached state
  }
+
  let response
  try {
    response = await fetchWithBrowserCacheFallback(url, init)
    checkChunkResponse(response, estimatedSize, init)
-    if (chunk.hash) {
+    if ('hash' in chunk && chunk.hash) {
      delete init.signal // omit the signal from the cache
      cachedUrls.set(chunk.hash, { url, init })
    }
  } catch (err1) {
-    if (chunk.hash) {
+    if ('hash' in chunk && chunk.hash) {
      cachedUrls.delete(chunk.hash)
    }
    const hasOthersCached = cachedUrls.size > 0
@@ -656,7 +664,7 @@ async function fetchChunk({
      file.ranges = file.ranges.filter(r => cachedUrls.has(r.hash))
      // Try harder at fetching the chunk, fallback to cache
      url = fallbackToCacheURL
-      if (chunk.hash) {
+      if ('hash' in chunk && chunk.hash) {
        init = getDynamicChunkInit({
          file,
          // skip object id prefix
@@ -679,14 +687,6 @@ async function fetchChunk({
  return response
 }

-/**
- * @param {Object} file
- * @param {number} start
- * @param {number} end
- * @param {Array} dynamicChunks
- * @param {boolean} prefetchXRefTable
- * @param {number} startXRefTableRange
- */
 function addPrefetchingChunks({
  file,
  start,
@@ -694,6 +694,13 @@ function addPrefetchingChunks({
  dynamicChunks,
  prefetchXRefTable,
  startXRefTableRange,
+}: {
+  file: ProcessedPDFFile
+  start: number
+  end: number
+  dynamicChunks: Chunk[]
+  prefetchXRefTable: boolean
+  startXRefTableRange: number
 }) {
  // Prefetch in case this is the first range, or we are fetching dynamic
  //  chunks anyway (so we can ride-share the round trip).
@@ -703,7 +710,7 @@ function addPrefetchingChunks({
    return
  }

-  let extraChunks = []
+  let extraChunks: Chunk[] = []
  if (prefetchXRefTable) {
    // Prefetch the dynamic chunks around the xref table.
    extraChunks = skipPrefetched(
@@ -779,6 +786,10 @@ function addPrefetchingChunks({
 }

 class Timer {
+  max: number
+  total: number
+  lastStart: number
+
  constructor() {
    this.max = 0
    this.total = 0
@@ -799,7 +810,7 @@ class Timer {
    this.lastStart = 0
  }

-  reportInto(metrics) {
+  reportInto(metrics: PdfCachingMetricsFull) {
    const max = Math.ceil(this.max)
    const total = Math.ceil(this.total)
    if (max > metrics.latencyComputeMax) {
@@ -809,25 +820,6 @@ class Timer {
  }
 }

-/**
- *
- * @param {string} url
- * @param {number} start
- * @param {number} end
- * @param {Object} file
- * @param {queryForChunks} start
- * @param {Object} metrics
- * @param {Map} usageScore
- * @param {Map} cachedUrls
- * @param {boolean} verifyChunks
- * @param {boolean} prefetchingEnabled
- * @param {boolean} prefetchLargeEnabled
- * @param {boolean} tryOldCachedUrlEnabled
- * @param {AbortSignal} abortSignal
- * @param {() => boolean} canTryFromCache
- * @param {string} fallbackToCacheURL
- * @param {() => void} recordFallbackToClsiCache
- */
 export async function fetchRange({
  url,
  start,
@@ -845,19 +837,39 @@ export async function fetchRange({
  canTryFromCache,
  fallbackToCacheURL,
  recordFallbackToClsiCache,
+}: {
+  url: string
+  start: number
+  end: number
+  file: ProcessedPDFFile
+  queryForChunks: string
+  metrics: PdfCachingMetricsFull
+  usageScore: Map<string, number>
+  cachedUrls: Map<string, { url: string; init: RequestInit }>
+  verifyChunks: boolean
+  prefetchingEnabled: boolean
+  prefetchLargeEnabled: boolean
+  cachedUrlLookupEnabled: boolean
+  abortSignal: AbortSignal
+  canTryFromCache: (error: any) => boolean
+  fallbackToCacheURL: string
+  recordFallbackToClsiCache: () => void
 }) {
  const timer = new Timer()
  timer.startBlockingCompute()
-  preprocessFileOnce({ file, usageScore, cachedUrls })
+
  const startXRefTableRange =
-    Math.floor(file.startXRefTable / PDF_JS_CHUNK_SIZE) * PDF_JS_CHUNK_SIZE
+    file.startXRefTable !== undefined
+      ? Math.floor(file.startXRefTable / PDF_JS_CHUNK_SIZE) * PDF_JS_CHUNK_SIZE
+      : 0
+
  const prefetchXRefTable =
    prefetchingEnabled && startXRefTableRange > 0 && start === 0
  const prefetched = getMatchingChunks(file.prefetched, start, end)

  // Check that handling the range request won't trigger excessive sub-requests,
  // (to avoid unwanted latency compared to the original request).
-  const chunks = cutRequestAmplification({
+  const chunks: PDFRange[] = cutRequestAmplification({
    potentialChunks: skipPrefetched(
      getMatchingChunks(file.ranges, start, end),
      prefetched,
@@ -865,7 +877,6 @@ export async function fetchRange({
      end
    ),
    usageScore,
-    cachedUrls,
    metrics,
    start,
    end,
@@ -913,7 +924,13 @@ export async function fetchRange({
  const byteRanges = dynamicChunks
    .map(chunk => `${chunk.start}-${chunk.end - 1}`)
    .join(',')
-  const coalescedDynamicChunks = []
+
+  const coalescedDynamicChunks: {
+    chunk: Chunk | Chunk[]
+    url: string
+    init: RequestInit
+  }[] = []
+
  switch (true) {
    case dynamicChunks.length === 0:
      break
@@ -962,11 +979,16 @@ export async function fetchRange({
  const perUserPrefix = url.slice(0, url.indexOf('/build/'))
  const requests = chunks
    .map(chunk => ({
-      chunk,
+      chunk: chunk as Chunk | Chunk[],
      url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${queryForChunks}`,
      init: {},
    }))
-    .concat(coalescedDynamicChunks)
+    .concat(coalescedDynamicChunks) as {
+    chunk: PDFRange | Chunk | Chunk[]
+    url: string
+    init: RequestInit
+  }[]
+
  let cachedCount = 0
  let cachedBytes = 0
  let fetchedCount = 0
@@ -1000,7 +1022,7 @@ export async function fetchRange({
          //     | pdf.js chunk |
          //   | A BIG IMAGE BLOB |
          // |     THE     FULL     PDF     |
-          if (chunk.hash && blobFetchDate < file.createdAt) {
+          if ('hash' in chunk && chunk.hash && blobFetchDate < file.createdAt) {
            const usedChunkSection =
              Math.min(end, chunk.end) - Math.max(start, chunk.start)
            cachedCount++
@@ -1021,6 +1043,7 @@ export async function fetchRange({
        if (!Array.isArray(chunk)) {
          return [{ chunk, data }]
        }
+
        return resolveMultiPartResponses({
          file,
          chunks: chunk,
@@ -1041,52 +1064,65 @@ export async function fetchRange({
  rawResponses
    .flat() // flatten after splitting multipart responses
    .concat(prefetched.map(chunk => ({ chunk, data: chunk.buffer })))
-    .forEach(({ chunk, data }) => {
-      if (!chunk.hash && chunk.end > end) {
-        // This is a (partially) prefetched chunk.
-        chunk.buffer = data
-        file.prefetched.push(chunk)
-        if (chunk.start > end) return // This is a fully prefetched chunk.
-      }
-      // overlap:
-      //     | REQUESTED_RANGE |
-      //  | CHUNK |
-      const offsetStart = Math.max(start - chunk.start, 0)
-      // overlap:
-      //     | REQUESTED_RANGE |
-      //                   | CHUNK |
-      const offsetEnd = Math.max(chunk.end - end, 0)
-      const oldDataLength = data.length
-      if (offsetStart > 0 || offsetEnd > 0) {
-        // compute index positions for slice to handle case where offsetEnd=0
-        const chunkSize = chunk.end - chunk.start
-        data = data.subarray(offsetStart, chunkSize - offsetEnd)
-      }
-      const newDataLength = data.length
-      const insertPosition = Math.max(chunk.start - start, 0)
-      try {
-        reassembledBlob.set(data, insertPosition)
-      } catch (err) {
-        const reassembledBlobLength = reassembledBlob.length
-        const trimmedChunk = {
-          start: chunk.start,
-          end: chunk.end,
-          hash: chunk.hash,
-          objectId: new TextDecoder().decode(chunk.objectId),
+    .forEach(
+      ({
+        chunk,
+        data,
+      }: {
+        chunk: Chunk | PrefetchedChunk<Chunk | PDFRange<Uint8Array>>
+        data: Uint8Array
+      }) => {
+        if ('hash' in chunk && chunk.end > end) {
+          // This is a (partially) prefetched chunk.
+          chunk.buffer = data
+          file.prefetched.push(chunk)
+          if (chunk.start > end) return // This is a fully prefetched chunk.
+        }
+        // overlap:
+        //     | REQUESTED_RANGE |
+        //  | CHUNK |
+        const offsetStart = Math.max(start - chunk.start, 0)
+        // overlap:
+        //     | REQUESTED_RANGE |
+        //                   | CHUNK |
+        const offsetEnd = Math.max(chunk.end - end, 0)
+        const oldDataLength = data.length
+        if (offsetStart > 0 || offsetEnd > 0) {
+          // compute index positions for slice to handle case where offsetEnd=0
+          const chunkSize = chunk.end - chunk.start
+          data = data.subarray(offsetStart, chunkSize - offsetEnd)
+        }
+        const newDataLength = data.length
+        const insertPosition = Math.max(chunk.start - start, 0)
+
+        try {
+          reassembledBlob.set(data, insertPosition)
+        } catch (err) {
+          const reassembledBlobLength = reassembledBlob.length
+
+          const trimmedChunk = {
+            start: chunk.start,
+            end: chunk.end,
+            hash: 'hash' in chunk ? chunk.hash : undefined,
+            objectId:
+              'objectId' in chunk
+                ? new TextDecoder().decode(chunk.objectId)
+                : undefined,
+          }
+          throw OError.tag(err, 'broken reassembly', {
+            start,
+            end,
+            chunk: trimmedChunk,
+            oldDataLength,
+            newDataLength,
+            offsetStart,
+            offsetEnd,
+            insertPosition,
+            reassembledBlobLength,
+          })
        }
-        throw OError.tag(err, 'broken reassembly', {
-          start,
-          end,
-          chunk: trimmedChunk,
-          oldDataLength,
-          newDataLength,
-          offsetStart,
-          offsetEnd,
-          insertPosition,
-          reassembledBlobLength,
-        })
      }
-    })
+    )

  timer.finishBlockingCompute()
  timer.reportInto(metrics)
@@ -1100,6 +1136,7 @@ export async function fetchRange({

  if (verifyChunks) {
    return await verifyRange({
+      file,
      url,
      start,
      end,
--- a/services/web/frontend/js/features/pdf-preview/util/types.ts
+++ b/services/web/frontend/js/features/pdf-preview/util/types.ts
@@ -56,3 +56,32 @@ export type DeliveryLatencies = {
  latencyFetch?: number
  latencyRender?: number
 }
+
+export type PdfCachingMetrics = {
+  viewerId: string
+}
+
+export type PdfCachingMetricsFull = PdfCachingMetrics & {
+  failedCount: number
+  failedOnce: boolean
+  tooMuchBandwidthCount: number
+  tooManyRequestsCount: number
+  cachedCount: number
+  cachedBytes: number
+  fetchedCount: number
+  fetchedBytes: number
+  latencyComputeMax: number
+  latencyComputeTotal: number
+  requestedCount: number
+  requestedBytes: number
+  oldUrlHitCount: number
+  oldUrlMissCount: number
+  enablePdfCaching: boolean
+  prefetchingEnabled: boolean
+  prefetchLargeEnabled: boolean
+  cachedUrlLookupEnabled: boolean
+  chunkVerifySizeDiffers?: number
+  chunkVerifyMismatch?: number
+  chunkVerifySuccess?: number
+  headerVerifyFailure?: number
+}
--- a/services/web/types/compile.ts
+++ b/services/web/types/compile.ts
@@ -1,9 +1,16 @@
-export type PDFRange = {
-  objectId: Uint8Array
+export type Chunk = {
+  start: number
  end: number
+}
+
+export type PrefetchedChunk<ChunkType extends Chunk> = ChunkType & {
+  buffer: Uint8Array
+}
+
+export type PDFRange<ObjectIdType = Uint8Array | string> = Chunk & {
+  objectId: ObjectIdType
  hash: string
  size: number
-  start: number
  totalUsage: number
 }

@@ -18,17 +25,36 @@ type OutputFileBase = {
  main?: boolean
 }

-export type PDFFile = OutputFileBase & {
+type PDFFileBase = OutputFileBase & {
  clsiCacheShard: string
  contentId: string
-  createdAt: Date
  editorId: string
-  pdfDownloadURL: string
-  pdfURL: string
-  prefetched: any[]
-  preprocessed: boolean
-  ranges: PDFRange[]
+  pdfDownloadUrl: string
+  pdfUrl: string
  size: number
+  startXRefTable?: number
+}
+
+export type PDFFile = PDFFileBase & {
+  createdAt?: string
+  ranges: PDFRange<string>[]
+  prefetched?: PrefetchedChunk<PDFRange<Uint8Array>>[]
+}
+
+export type ProcessedPDFFile = PDFFileBase & {
+  preprocessed: true
+  createdAt: Date
+  prefetched: PrefetchedChunk<PDFRange<Uint8Array>>[]
+  ranges: PDFRange<Uint8Array>[]
+}
+
+// This type is a little bit of a hack to work around the fact that we mutate
+// the PDFFile object directly when processed into a ProcessedPDFFile
+export type PartiallyProcessedPDFFile = PDFFileBase & {
+  preprocessed?: boolean
+  createdAt?: Date | string
+  prefetched?: PrefetchedChunk<PDFRange<Uint8Array>>[]
+  ranges: PDFRange<Uint8Array>[] | PDFRange<string>[]
 }

 export type CompileOutputFile = OutputFileBase | PDFFile