Files
overleaf-cep/services/clsi/app/js/OutputFileArchiveManager.js
Jakob Ackermann 81b7121408 [clsi] initial implementation of compile from history (#31883)
* [clsi] initial implementation of compile from history

* [clsi] copy changes

* [saas-e2e] extend test case with nested folder

* [saas-e2e] add test case for tracked changes

* [web] fix accumulating changes from multiple chunks

* [web] optimize size check for compile request payload

* [clsi] deduplicate globalBlobs

* [clsi] add validation for request body details

* [clsi] add metrics for compile from history

* [clsi] download binary files concurrently

* [clsi] skip download of empty file blob

* [clsi] break down e2e compile time metric by compileFromHistory

GitOrigin-RevId: 0dadef93e89d8a172c35cb130a1042d9d1bec42a
2026-03-06 09:12:07 +00:00

115 lines
3.0 KiB
JavaScript

import archiver from 'archiver'
import OutputCacheManager from './OutputCacheManager.js'
import OutputFileFinder from './OutputFileFinder.js'
import Settings from '@overleaf/settings'
import { open } from 'node:fs/promises'
import { NotFoundError } from './Errors.js'
import logger from '@overleaf/logger'
// NOTE: Updating this list requires a corresponding change in
// * services/web/frontend/js/features/pdf-preview/util/file-list.ts
const ignoreFiles = ['output.fls', 'output.fdb_latexmk']
function getContentDir(projectId, userId) {
let subDir
if (userId != null) {
subDir = `${projectId}-${userId}`
} else {
subDir = projectId
}
return `${Settings.path.outputDir}/${subDir}/`
}
export default {
async archiveFilesForBuild(projectId, userId, build) {
logger.debug({ projectId, userId, build }, 'Will create zip file')
const contentDir = getContentDir(projectId, userId)
const outputFiles = await this._getAllOutputFiles(
contentDir,
projectId,
userId,
build
)
const archive = archiver('zip')
archive.on('error', err => {
logger.warn(
{ err, projectId, userId, build },
'error emitted when creating output files archive'
)
})
archive.on('warning', err => {
logger.warn(
{ err, projectId, userId, build },
'warning emitted when creating output files archive'
)
})
const missingFiles = []
for (const { path } of outputFiles) {
let fileHandle
try {
fileHandle = await open(
`${contentDir}${OutputCacheManager.path(build, path)}`
)
} catch (error) {
logger.warn(
{ path, error, projectId, userId, build },
'error opening file to add to output files archive'
)
missingFiles.push(path)
continue
}
const fileStream = fileHandle.createReadStream()
archive.append(fileStream, { name: path })
}
if (missingFiles.length > 0) {
archive.append(missingFiles.join('\n'), {
name: 'missing_files.txt',
})
}
archive.finalize().catch(error => {
logger.error(
{ error, projectId, userId, build },
'error finalizing output files archive'
)
})
return archive
},
async _getAllOutputFiles(contentDir, projectId, userId, build) {
try {
const { outputFiles } = await OutputFileFinder.promises.findOutputFiles(
[],
`${contentDir}${OutputCacheManager.path(build, '.')}`
)
return outputFiles.filter(
// Ignore the pdf, clsi-cache tar-ball, history snapshot blob and also ignore the files ignored by the frontend.
({ path }) =>
path !== 'output.pdf' &&
path !== 'output.tar.gz' &&
path !== 'history-resync.json.gz' &&
!ignoreFiles.includes(path)
)
} catch (error) {
if (
error.code === 'ENOENT' ||
error.code === 'ENOTDIR' ||
error.code === 'EACCES'
) {
throw new NotFoundError('Output files not found')
}
throw error
}
},
}