From 92f62f91c177146a08c5d577ca644bfbd8014aa4 Mon Sep 17 00:00:00 2001 From: Andrew Rumble Date: Wed, 1 May 2024 14:52:12 +0100 Subject: [PATCH] Merge pull request #18148 from overleaf/ar-add-output-zip-endpoint-to-clsi [clsi] Add endpoints to get zip of output files GitOrigin-RevId: a1a935e8170ab5a8d40baa6d96f8e42fe22c2e8c --- package-lock.json | 87 +++++-- services/clsi/app.js | 15 ++ services/clsi/app/js/OutputController.js | 37 +++ .../clsi/app/js/OutputFileArchiveManager.js | 90 +++++++ services/clsi/package.json | 1 + .../test/unit/js/OutputControllerTests.js | 110 ++++++++ .../unit/js/OutputFileArchiveManagerTests.js | 234 ++++++++++++++++++ 7 files changed, 556 insertions(+), 18 deletions(-) create mode 100644 services/clsi/app/js/OutputController.js create mode 100644 services/clsi/app/js/OutputFileArchiveManager.js create mode 100644 services/clsi/test/unit/js/OutputControllerTests.js create mode 100644 services/clsi/test/unit/js/OutputFileArchiveManagerTests.js diff --git a/package-lock.json b/package-lock.json index e3e17a3297..9f64ebb80e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14340,15 +14340,15 @@ ] }, "node_modules/archiver": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.0.tgz", - "integrity": "sha512-iUw+oDwK0fgNpvveEsdQ0Ase6IIKztBJU2U0E9MzszMfmVVUyv1QJhS2ITW9ZCqx8dktAxVAjWWkKehuZE8OPg==", + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.2.tgz", + "integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==", "dependencies": { "archiver-utils": "^2.1.0", - "async": "^3.2.0", + "async": "^3.2.4", "buffer-crc32": "^0.2.1", "readable-stream": "^3.6.0", - "readdir-glob": "^1.0.0", + "readdir-glob": "^1.1.2", "tar-stream": "^2.2.0", "zip-stream": "^4.1.0" }, @@ -14403,6 +14403,11 @@ "safe-buffer": "~5.1.0" } }, + "node_modules/archiver/node_modules/async": { + "version": "3.2.5", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz", + "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg==" + }, "node_modules/are-we-there-yet": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz", @@ -34479,11 +34484,30 @@ } }, "node_modules/readdir-glob": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.1.tgz", - "integrity": "sha512-91/k1EzZwDx6HbERR+zucygRFfiPl2zkIYZtv3Jjr6Mn7SkKcVct8aVO+sSRiGMc6fLf72du3d92/uY63YPdEA==", + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz", + "integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==", "dependencies": { - "minimatch": "^3.0.4" + "minimatch": "^5.1.0" + } + }, + "node_modules/readdir-glob/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/readdir-glob/node_modules/minimatch": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" } }, "node_modules/readdirp": { @@ -41120,6 +41144,7 @@ "@overleaf/o-error": "*", "@overleaf/promise-utils": "*", "@overleaf/settings": "*", + "archiver": "5.3.2", "async": "3.2.2", "body-parser": "^1.19.0", "bunyan": "^1.8.15", @@ -51163,6 +51188,7 @@ "@overleaf/o-error": "*", "@overleaf/promise-utils": "*", "@overleaf/settings": "*", + "archiver": "5.3.2", "async": "3.2.2", "body-parser": "^1.19.0", "bunyan": "^1.8.15", @@ -58349,17 +58375,24 @@ "dev": true }, "archiver": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.0.tgz", - "integrity": "sha512-iUw+oDwK0fgNpvveEsdQ0Ase6IIKztBJU2U0E9MzszMfmVVUyv1QJhS2ITW9ZCqx8dktAxVAjWWkKehuZE8OPg==", + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.2.tgz", + "integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==", "requires": { "archiver-utils": "^2.1.0", - "async": "^3.2.0", + "async": "^3.2.4", "buffer-crc32": "^0.2.1", "readable-stream": "^3.6.0", - "readdir-glob": "^1.0.0", + "readdir-glob": "^1.1.2", "tar-stream": "^2.2.0", "zip-stream": "^4.1.0" + }, + "dependencies": { + "async": { + "version": "3.2.5", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz", + "integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg==" + } } }, "archiver-utils": { @@ -74479,11 +74512,29 @@ } }, "readdir-glob": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.1.tgz", - "integrity": "sha512-91/k1EzZwDx6HbERR+zucygRFfiPl2zkIYZtv3Jjr6Mn7SkKcVct8aVO+sSRiGMc6fLf72du3d92/uY63YPdEA==", + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz", + "integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==", "requires": { - "minimatch": "^3.0.4" + "minimatch": "^5.1.0" + }, + "dependencies": { + "brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "requires": { + "balanced-match": "^1.0.0" + } + }, + "minimatch": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "requires": { + "brace-expansion": "^2.0.1" + } + } } }, "readdirp": { diff --git a/services/clsi/app.js b/services/clsi/app.js index e1c6f5e04f..bcf89aaaf9 100644 --- a/services/clsi/app.js +++ b/services/clsi/app.js @@ -14,6 +14,7 @@ const Metrics = require('@overleaf/metrics') const smokeTest = require('./test/smoke/js/SmokeTests') const ContentTypeMapper = require('./app/js/ContentTypeMapper') const Errors = require('./app/js/Errors') +const { createOutputZip } = require('./app/js/OutputController') const Path = require('path') @@ -170,6 +171,20 @@ const staticOutputServer = ForbidSymlinks( } ) +// This needs to be before GET /project/:project_id/build/:build_id/output/* +app.get( + '/project/:project_id/build/:build_id/output/output.zip', + bodyParser.json(), + createOutputZip +) + +// This needs to be before GET /project/:project_id/user/:user_id/build/:build_id/output/* +app.get( + '/project/:project_id/user/:user_id/build/:build_id/output/output.zip', + bodyParser.json(), + createOutputZip +) + app.get( '/project/:project_id/user/:user_id/build/:build_id/output/*', function (req, res, next) { diff --git a/services/clsi/app/js/OutputController.js b/services/clsi/app/js/OutputController.js new file mode 100644 index 0000000000..1155ee31cb --- /dev/null +++ b/services/clsi/app/js/OutputController.js @@ -0,0 +1,37 @@ +const logger = require('@overleaf/logger') +const OutputFileArchiveManager = require('./OutputFileArchiveManager') +const { expressify } = require('@overleaf/promise-utils') + +function cleanFiles(files) { + if (!Array.isArray(files)) { + return [] + } + return files.filter(file => /^output\./g.test(file)) +} + +async function createOutputZip(req, res) { + const { + project_id: projectId, + user_id: userId, + build_id: buildId, + } = req.params + const files = cleanFiles(req.query.files) + logger.debug({ projectId, userId, buildId, files }, 'Will create zip file') + + const archive = await OutputFileArchiveManager.archiveFilesForBuild( + projectId, + userId, + buildId, + files + ) + + archive.on('error', err => { + logger.warn({ err }, 'error emitted when creating output files archive') + }) + + res.attachment('output.zip') + res.setHeader('X-Content-Type-Options', 'nosniff') + archive.pipe(res) +} + +module.exports = { createOutputZip: expressify(createOutputZip) } diff --git a/services/clsi/app/js/OutputFileArchiveManager.js b/services/clsi/app/js/OutputFileArchiveManager.js new file mode 100644 index 0000000000..23ab7c2133 --- /dev/null +++ b/services/clsi/app/js/OutputFileArchiveManager.js @@ -0,0 +1,90 @@ +let OutputFileArchiveManager +const archiver = require('archiver') +const OutputCacheManager = require('./OutputCacheManager') +const OutputFileFinder = require('./OutputFileFinder') +const Settings = require('@overleaf/settings') +const { open } = require('node:fs/promises') +const path = require('node:path') +const { NotFoundError } = require('./Errors') + +function getContentDir(projectId, userId) { + let subDir + if (userId != null) { + subDir = `${projectId}-${userId}` + } else { + subDir = projectId + } + return `${Settings.path.outputDir}/${subDir}/` +} + +module.exports = OutputFileArchiveManager = { + async archiveFilesForBuild(projectId, userId, build, files = []) { + const contentDir = getContentDir(projectId, userId) + + const validFiles = await (files.length > 0 + ? this._getRequestedOutputFiles(projectId, userId, build, files) + : this._getAllOutputFiles(projectId, userId, build)) + + const archive = archiver('zip') + + const missingFiles = files.filter( + file => !validFiles.includes(path.basename(file)) + ) + + for (const file of validFiles) { + try { + const fileHandle = await open( + `${contentDir}${OutputCacheManager.path(build, file)}` + ) + const fileStream = fileHandle.createReadStream() + archive.append(fileStream, { name: file }) + } catch (error) { + missingFiles.push(file) + } + } + + if (missingFiles.length > 0) { + archive.append(missingFiles.join('\n'), { + name: 'missing_files.txt', + }) + } + + await archive.finalize() + + return archive + }, + + async _getAllOutputFiles(projectId, userId, build) { + const contentDir = getContentDir(projectId, userId) + + try { + const { outputFiles } = await OutputFileFinder.promises.findOutputFiles( + [], + `${contentDir}${OutputCacheManager.path(build, '.')}` + ) + + return outputFiles.map(({ path }) => path) + } catch (error) { + if ( + error.code === 'ENOENT' || + error.code === 'ENOTDIR' || + error.code === 'EACCES' + ) { + throw new NotFoundError('Output files not found') + } + throw error + } + }, + + async _getRequestedOutputFiles(projectId, userId, build, files) { + const outputFiles = new Set( + await OutputFileArchiveManager._getAllOutputFiles( + projectId, + userId, + build + ) + ) + + return files.filter(file => outputFiles.has(file)) + }, +} diff --git a/services/clsi/package.json b/services/clsi/package.json index 4002bcdedf..df92efa1e1 100644 --- a/services/clsi/package.json +++ b/services/clsi/package.json @@ -23,6 +23,7 @@ "@overleaf/o-error": "*", "@overleaf/promise-utils": "*", "@overleaf/settings": "*", + "archiver": "5.3.2", "async": "3.2.2", "body-parser": "^1.19.0", "bunyan": "^1.8.15", diff --git a/services/clsi/test/unit/js/OutputControllerTests.js b/services/clsi/test/unit/js/OutputControllerTests.js new file mode 100644 index 0000000000..3b9ce38773 --- /dev/null +++ b/services/clsi/test/unit/js/OutputControllerTests.js @@ -0,0 +1,110 @@ +const SandboxedModule = require('sandboxed-module') +const sinon = require('sinon') +const MODULE_PATH = require('path').join( + __dirname, + '../../../app/js/OutputController' +) + +describe('OutputController', function () { + describe('createOutputZip', function () { + beforeEach(function () { + this.archive = { + on: sinon.stub(), + pipe: sinon.stub(), + } + + this.archiveFilesForBuild = sinon.stub().resolves(this.archive) + + this.OutputController = SandboxedModule.require(MODULE_PATH, { + requires: { + './OutputFileArchiveManager': { + archiveFilesForBuild: this.archiveFilesForBuild, + }, + }, + }) + }) + + describe('when OutputFileArchiveManager creates an archive', function () { + beforeEach(function (done) { + this.res = { + attachment: sinon.stub(), + setHeader: sinon.stub(), + } + this.req = { + params: { + project_id: 'project-id-123', + user_id: 'user-id-123', + build_id: 'build-id-123', + }, + query: { + files: ['output.tex', 'not-output.tex'], + }, + } + this.archive.pipe.callsFake(() => done()) + this.OutputController.createOutputZip(this.req, this.res) + }) + + it('does not pass files that do not start with "output" to OutputFileArchiveManager', function () { + sinon.assert.calledWith( + this.archiveFilesForBuild, + 'project-id-123', + 'user-id-123', + 'build-id-123', + ['output.tex'] + ) + }) + + it('pipes the archive to the response', function () { + sinon.assert.calledWith(this.archive.pipe, this.res) + }) + + it('calls the express convenience method to set attachment headers', function () { + sinon.assert.calledWith(this.res.attachment, 'output.zip') + }) + + it('sets the X-Content-Type-Options header to nosniff', function () { + sinon.assert.calledWith( + this.res.setHeader, + 'X-Content-Type-Options', + 'nosniff' + ) + }) + }) + + describe('when OutputFileArchiveManager throws an error', function () { + let error + + beforeEach(function (done) { + error = new Error('error message') + + this.archiveFilesForBuild.rejects(error) + + this.res = { + status: sinon.stub().returnsThis(), + send: sinon.stub(), + } + this.req = { + params: { + project_id: 'project-id-123', + user_id: 'user-id-123', + build_id: 'build-id-123', + }, + query: { + files: ['output.tex'], + }, + } + this.OutputController.createOutputZip( + this.req, + this.res, + (this.next = sinon.stub().callsFake(() => { + done() + })) + ) + }) + + it('calls next with the error', function () { + sinon.assert.calledWith(this.next, error) + }) + }) + }) +}) diff --git a/services/clsi/test/unit/js/OutputFileArchiveManagerTests.js b/services/clsi/test/unit/js/OutputFileArchiveManagerTests.js new file mode 100644 index 0000000000..c93231e704 --- /dev/null +++ b/services/clsi/test/unit/js/OutputFileArchiveManagerTests.js @@ -0,0 +1,234 @@ +const SandboxedModule = require('sandboxed-module') +const sinon = require('sinon') +const { assert, expect } = require('chai') + +const MODULE_PATH = require('path').join( + __dirname, + '../../../app/js/OutputFileArchiveManager' +) + +describe('OutputFileArchiveManager', function () { + const userId = 'user-id-123' + const projectId = 'project-id-123' + const buildId = 'build-id-123' + + afterEach(function () { + sinon.restore() + }) + + beforeEach(function () { + this.OutputFileFinder = { + promises: { + findOutputFiles: sinon.stub().resolves({ outputFiles: [] }), + }, + } + + this.OutputCacheManger = { + path: sinon.stub().callsFake((build, path) => { + return `${build}/${path}` + }), + } + + this.archive = { + append: sinon.stub(), + finalize: sinon.stub(), + } + + this.archiver = sinon.stub().returns(this.archive) + + this.outputDir = '/output/dir' + + this.fs = { + open: sinon.stub().callsFake(file => ({ + createReadStream: sinon.stub().returns(`handle: ${file}`), + })), + } + + this.OutputFileArchiveManager = SandboxedModule.require(MODULE_PATH, { + requires: { + './OutputFileFinder': this.OutputFileFinder, + './OutputCacheManager': this.OutputCacheManger, + archiver: this.archiver, + 'node:fs/promises': this.fs, + 'node:path': { + basename: sinon.stub().callsFake(path => path.split('/').pop()), + }, + '@overleaf/settings': { + path: { + outputDir: this.outputDir, + }, + }, + }, + }) + }) + + describe('when called with no files', function () { + beforeEach(async function () { + this.OutputFileFinder.promises.findOutputFiles.resolves({ + outputFiles: [ + { path: 'file_1' }, + { path: 'file_2' }, + { path: 'file_3' }, + { path: 'file_4' }, + ], + }) + await this.OutputFileArchiveManager.archiveFilesForBuild( + projectId, + userId, + buildId + ) + }) + + it('creates a zip archive', function () { + sinon.assert.calledWith(this.archiver, 'zip') + }) + + it('adds all the output files to the archive', function () { + expect(this.archive.append.callCount).to.equal(4) + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_1`, + sinon.match({ name: 'file_1' }) + ) + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_2`, + sinon.match({ name: 'file_2' }) + ) + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_3`, + sinon.match({ name: 'file_3' }) + ) + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_4`, + sinon.match({ name: 'file_4' }) + ) + }) + + it('finalizes the archive after all files are appended', function () { + sinon.assert.called(this.archive.finalize) + expect(this.archive.finalize.calledBefore(this.archive.append)).to.be + .false + }) + }) + + describe('when called with a list of files that all are in the output directory', function () { + beforeEach(async function () { + this.OutputFileFinder.promises.findOutputFiles.resolves({ + outputFiles: [ + { path: 'file_1' }, + { path: 'file_2' }, + { path: 'file_3' }, + { path: 'file_4' }, + ], + }) + await this.OutputFileArchiveManager.archiveFilesForBuild( + projectId, + userId, + buildId, + ['file_1', 'file_4'] + ) + }) + + it('creates a zip archive', function () { + sinon.assert.calledWith(this.archiver, 'zip') + }) + + it('adds only output files from the list of files to the archive', function () { + expect(this.archive.append.callCount).to.equal(2) + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_1`, + sinon.match({ + name: 'file_1', + }) + ) + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_4`, + sinon.match({ + name: 'file_4', + }) + ) + }) + + it('finalizes the archive after all files are appended', function () { + sinon.assert.called(this.archive.finalize) + expect(this.archive.finalize.calledBefore(this.archive.append)).to.be + .false + }) + }) + + describe('when called with a list of files and one of the files is missing from the output directory', function () { + beforeEach(async function () { + this.OutputFileFinder.promises.findOutputFiles.resolves({ + outputFiles: [ + { path: 'file_1' }, + { path: 'file_2' }, + { path: 'file_3' }, + ], + }) + await this.OutputFileArchiveManager.archiveFilesForBuild( + projectId, + userId, + buildId, + ['file_1', 'file_4'] + ) + }) + + it('creates a zip archive', function () { + sinon.assert.calledWith(this.archiver, 'zip') + }) + + it('adds the files that were found to the archive', function () { + sinon.assert.calledWith( + this.archive.append, + `handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_1`, + sinon.match({ name: 'file_1' }) + ) + }) + + it('adds a file listing any missing files', function () { + sinon.assert.calledWith( + this.archive.append, + 'file_4', + sinon.match({ + name: 'missing_files.txt', + }) + ) + }) + + it('finalizes the archive after all files are appended', function () { + sinon.assert.called(this.archive.finalize) + expect(this.archive.finalize.calledBefore(this.archive.append)).to.be + .false + }) + }) + + describe('when the output directory cannot be accessed', function () { + beforeEach(async function () { + this.OutputFileFinder.promises.findOutputFiles.rejects({ + code: 'ENOENT', + }) + }) + + it('rejects with a NotFoundError', async function () { + try { + await this.OutputFileArchiveManager.archiveFilesForBuild( + projectId, + userId, + buildId + ) + assert.fail('should have thrown a NotFoundError') + } catch (err) { + expect(err).to.haveOwnProperty('name', 'NotFoundError') + } + }) + + it('does not create an archive', function () { + expect(this.archiver.called).to.be.false + }) + }) +})