From ce6f9b8e8c69732a61588c2d7fa8ca66f929561e Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Mon, 18 May 2026 13:40:47 +0100 Subject: [PATCH] Merge pull request #33705 from overleaf/mj-clsi-cwd-for-conversions [clsi] Add cwd argument to CommandRunner and use to simplify conversions GitOrigin-RevId: 5333e3262a99e602ab5470ae1e23facb5b28a170 --- services/clsi/app/js/CompileManager.js | 6 +- services/clsi/app/js/ConversionManager.js | 90 +++++++++++++------ services/clsi/app/js/LatexRunner.js | 1 + services/clsi/app/js/LocalCommandRunner.js | 10 ++- .../test/unit/js/ConversionManager.test.js | 24 +++-- .../clsi/test/unit/js/DockerRunner.test.js | 86 ++++++++++++++++++ 6 files changed, 178 insertions(+), 39 deletions(-) diff --git a/services/clsi/app/js/CompileManager.js b/services/clsi/app/js/CompileManager.js index 541913d7c9..8ea9665195 100644 --- a/services/clsi/app/js/CompileManager.js +++ b/services/clsi/app/js/CompileManager.js @@ -626,7 +626,8 @@ async function _runSynctex(projectId, userId, command, opts) { imageName || defaultImageName, timeout, {}, - compileGroup + compileGroup, + null ) return { stdout, @@ -674,7 +675,8 @@ async function wordcount(projectId, userId, filename, image) { image, timeout, {}, - compileGroup + compileGroup, + null ) const results = _parseWordcountFromOutput(stdout) logger.debug( diff --git a/services/clsi/app/js/ConversionManager.js b/services/clsi/app/js/ConversionManager.js index dcaae2bd1d..32825aecc9 100644 --- a/services/clsi/app/js/ConversionManager.js +++ b/services/clsi/app/js/ConversionManager.js @@ -68,7 +68,8 @@ async function convertToLaTeX( Settings.pandocImage, Settings.conversionTimeoutSeconds * 1000, {}, - 'conversions' + 'conversions', + null ) if (exitCodePandoc !== 0) { throw new OError('Non-zero exit code from pandoc', { @@ -95,7 +96,8 @@ async function convertToLaTeX( Settings.pandocImage, Settings.conversionTimeoutSeconds * 1000, {}, - 'conversions' + 'conversions', + null ) if (exitCodeZip !== 0) { throw new OError('Non-zero exit code from pandoc', { @@ -129,21 +131,18 @@ const LATEX_EXPORT_CONFIGS = { 'docx', '--citeproc', '--number-sections', - '--resource-path=.', ], }, markdown: { fileExtension: 'md', compressOutput: true, - getPandocArgs: ({ outputPath, subdirName }) => [ + getPandocArgs: ({ outputPath }) => [ '--output', outputPath, '--from', 'latex', '--to', 'markdown', - '--resource-path=.', - `--extract-media=${subdirName}`, ], }, } @@ -181,36 +180,74 @@ async function convertLaTeXToDocumentInDir( const timeoutMs = Settings.conversionTimeoutSeconds * 1000 const outputId = crypto.randomUUID() - let pandocOutputPath, finalOutputName - if (config.compressOutput) { - await fs.mkdir(Path.join(compileDir, outputId), { recursive: true }) - pandocOutputPath = Path.join(outputId, `main.${config.fileExtension}`) - finalOutputName = outputId + '.zip' - } else { - pandocOutputPath = outputId + '.' + config.fileExtension - finalOutputName = pandocOutputPath - } - logger.debug( { compileDir, rootDocPath, type }, 'running pandoc latex-to-document in compile dir' ) + if (!config.compressOutput) { + const outputName = `${outputId}.${config.fileExtension}` + const { exitCode, stdout, stderr } = await CommandRunner.promises.run( + conversionId, + [ + 'pandoc', + rootDocPath, + ...config.getPandocArgs({ outputPath: outputName }), + '--resource-path=.', + ], + compileDir, + Settings.pandocImage, + timeoutMs, + {}, + 'conversions', + null + ) + + if (exitCode !== 0) { + throw new OError('pandoc latex-to-document conversion failed', { + type, + exitCode, + stdout, + stderr, + }) + } + + logger.debug( + { stdout, stderr, exitCode }, + 'pandoc latex-to-document conversion completed' + ) + + return Path.join(compileDir, outputName) + } + + // For compressed outputs we stage everything inside a uuid subdir so + // the archive root ends up flat: + // - pandoc runs with cwd=, --extract-media=. drops images flat + // alongside main., and --resource-path=.. lets it find originals + // in the parent compile dir. + // - zip runs with the same cwd, so `zip -r ../.zip .` produces an + // archive whose root is main. + the media files (no uuid leak, + // no collision with anything already in compileDir). + await fs.mkdir(Path.join(compileDir, outputId), { recursive: true }) + + const outputName = `main.${config.fileExtension}` + const finalOutputName = `${outputId}.zip` + const { exitCode, stdout, stderr } = await CommandRunner.promises.run( conversionId, [ 'pandoc', - rootDocPath, - ...config.getPandocArgs({ - outputPath: pandocOutputPath, - subdirName: outputId, - }), + Path.join('..', rootDocPath), + ...config.getPandocArgs({ outputPath: outputName }), + '--resource-path=..', + '--extract-media=.', ], compileDir, Settings.pandocImage, timeoutMs, {}, - 'conversions' + 'conversions', + outputId ) if (exitCode !== 0) { @@ -227,22 +264,19 @@ async function convertLaTeXToDocumentInDir( 'pandoc latex-to-document conversion completed' ) - if (!config.compressOutput) { - return Path.join(compileDir, finalOutputName) - } - const { exitCode: zipExitCode, stdout: zipStdout, stderr: zipStderr, } = await CommandRunner.promises.run( conversionId, - ['sh', '-c', `cd ${outputId} && zip -r ../${finalOutputName} .`], + ['zip', '-r', Path.join('..', finalOutputName), '.'], compileDir, Settings.pandocImage, timeoutMs, {}, - 'conversions' + 'conversions', + outputId ) if (zipExitCode !== 0) { diff --git a/services/clsi/app/js/LatexRunner.js b/services/clsi/app/js/LatexRunner.js index f0607ec670..2fd3afa279 100644 --- a/services/clsi/app/js/LatexRunner.js +++ b/services/clsi/app/js/LatexRunner.js @@ -73,6 +73,7 @@ function runLatex(projectId, options, callback) { timeout, environment, compileGroup, + null, function (error, output) { delete ProcessTable[id] if (error) { diff --git a/services/clsi/app/js/LocalCommandRunner.js b/services/clsi/app/js/LocalCommandRunner.js index c0cbbbe67b..a53f6756ab 100644 --- a/services/clsi/app/js/LocalCommandRunner.js +++ b/services/clsi/app/js/LocalCommandRunner.js @@ -13,6 +13,7 @@ */ import { spawn } from 'node:child_process' import { promisify } from 'node:util' +import Path from 'node:path' import _ from 'lodash' import logger from '@overleaf/logger' let CommandRunner @@ -28,14 +29,19 @@ export default CommandRunner = { timeout, environment, compileGroup, + cwd, callback ) { let key, value callback = _.once(callback) + const spawnCwd = cwd ? Path.join(directory, cwd) : directory command = Array.from(command).map(arg => arg.toString().replace('$COMPILE_DIR', directory) ) - logger.debug({ projectId, command, directory }, 'running command') + logger.debug( + { projectId, command, directory, cwd: spawnCwd }, + 'running command' + ) logger.warn('timeouts and sandboxing are not enabled with CommandRunner') // merge environment settings @@ -51,7 +57,7 @@ export default CommandRunner = { // run command as detached process so it has its own process group (which can be killed if needed) const proc = spawn(command[0], command.slice(1), { - cwd: directory, + cwd: spawnCwd, env, stdio: ['pipe', 'pipe', 'ignore'], detached: true, diff --git a/services/clsi/test/unit/js/ConversionManager.test.js b/services/clsi/test/unit/js/ConversionManager.test.js index bd01c6cae3..b65cde0f18 100644 --- a/services/clsi/test/unit/js/ConversionManager.test.js +++ b/services/clsi/test/unit/js/ConversionManager.test.js @@ -66,15 +66,15 @@ const LATEX_TO_DOCUMENT_CASES = [ compressOutput: true, pandocArgs: outputId => [ 'pandoc', - 'main.tex', + Path.join('..', 'main.tex'), '--output', - Path.join(outputId, 'main.md'), + 'main.md', '--from', 'latex', '--to', 'markdown', - '--resource-path=.', - `--extract-media=${outputId}`, + '--resource-path=..', + '--extract-media=.', ], }, ] @@ -409,13 +409,23 @@ describe('ConversionManager', function () { ) }) + it('should run the conversion in the uuid-named subdirectory', function (ctx) { + expect(ctx.CommandRunner.promises.run.firstCall.args[7]).toBe( + 'output-uuid' + ) + }) + it('should run pandoc then zip the subdirectory and return the zip path', function (ctx) { expect(ctx.CommandRunner.promises.run.callCount).toBe(2) expect(ctx.CommandRunner.promises.run.secondCall.args[1]).toEqual([ - 'sh', - '-c', - 'cd output-uuid && zip -r ../output-uuid.zip .', + 'zip', + '-r', + Path.join('..', 'output-uuid.zip'), + '.', ]) + expect(ctx.CommandRunner.promises.run.secondCall.args[7]).toBe( + 'output-uuid' + ) expect(ctx.result).toBe(Path.join(ctx.compileDir, 'output-uuid.zip')) }) }) diff --git a/services/clsi/test/unit/js/DockerRunner.test.js b/services/clsi/test/unit/js/DockerRunner.test.js index 60e13db919..608838d975 100644 --- a/services/clsi/test/unit/js/DockerRunner.test.js +++ b/services/clsi/test/unit/js/DockerRunner.test.js @@ -132,6 +132,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, (err, output) => { ctx.callback(err, output) return resolve() @@ -177,6 +178,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -208,6 +210,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, 'synctex-output', + null, ctx.callback ) }) @@ -239,6 +242,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, 'synctex', + null, ctx.callback ) }) @@ -270,6 +274,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, 'wordcount', + null, ctx.callback ) }) @@ -287,6 +292,39 @@ describe('DockerRunner', () => { }) }) + describe('with a cwd', () => { + beforeEach(ctx => { + ctx.DockerRunner._runAndWaitForContainer = sinon + .stub() + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) + ctx.DockerRunner.run( + ctx.project_id, + ctx.command, + ctx.directory, + ctx.image, + ctx.timeout, + ctx.env, + ctx.compileGroup, + 'subdir', + ctx.callback + ) + }) + + it('should pass the cwd through to _getContainerOptions', ctx => { + ctx.DockerRunner._getContainerOptions + .calledWith( + ctx.command_with_dir, + ctx.image, + ctx.volumes, + ctx.timeout, + ctx.env, + ctx.compileGroup, + 'subdir' + ) + .should.equal(true) + }) + }) + describe('when the run throws an error', () => { beforeEach(ctx => { let firstTime = true @@ -319,6 +357,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -351,6 +390,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -381,6 +421,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -412,6 +453,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -431,6 +473,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -486,6 +529,7 @@ describe('DockerRunner', () => { ctx.timeout, ctx.env, ctx.compileGroup, + null, ctx.callback ) }) @@ -506,6 +550,48 @@ describe('DockerRunner', () => { ctx.callback.calledWith(null, ctx.output).should.equal(true) }) }) + + describe('WorkingDir with cwd', () => { + beforeEach(ctx => { + ctx.DockerRunner._runAndWaitForContainer = sinon + .stub() + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) + }) + + it('should default WorkingDir to /compile when cwd is null', ctx => { + ctx.DockerRunner.run( + ctx.project_id, + ctx.command, + ctx.directory, + ctx.image, + ctx.timeout, + ctx.env, + ctx.compileGroup, + null, + ctx.callback + ) + const options = + ctx.DockerRunner._runAndWaitForContainer.lastCall.args[0] + expect(options.WorkingDir).to.equal('/compile') + }) + + it('should join cwd onto /compile when provided', ctx => { + ctx.DockerRunner.run( + ctx.project_id, + ctx.command, + ctx.directory, + ctx.image, + ctx.timeout, + ctx.env, + ctx.compileGroup, + 'subdir/nested', + ctx.callback + ) + const options = + ctx.DockerRunner._runAndWaitForContainer.lastCall.args[0] + expect(options.WorkingDir).to.equal('/compile/subdir/nested') + }) + }) }) describe('_runAndWaitForContainer', () => {