Merge pull request #33705 from overleaf/mj-clsi-cwd-for-conversions

[clsi] Add cwd argument to CommandRunner and use to simplify conversions

GitOrigin-RevId: 5333e3262a99e602ab5470ae1e23facb5b28a170
This commit is contained in:
Mathias Jakobsen
2026-05-18 13:40:47 +01:00
committed by Copybot
parent c0111fec29
commit ce6f9b8e8c
6 changed files with 178 additions and 39 deletions

View File

@@ -626,7 +626,8 @@ async function _runSynctex(projectId, userId, command, opts) {
imageName || defaultImageName, imageName || defaultImageName,
timeout, timeout,
{}, {},
compileGroup compileGroup,
null
) )
return { return {
stdout, stdout,
@@ -674,7 +675,8 @@ async function wordcount(projectId, userId, filename, image) {
image, image,
timeout, timeout,
{}, {},
compileGroup compileGroup,
null
) )
const results = _parseWordcountFromOutput(stdout) const results = _parseWordcountFromOutput(stdout)
logger.debug( logger.debug(

View File

@@ -68,7 +68,8 @@ async function convertToLaTeX(
Settings.pandocImage, Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000, Settings.conversionTimeoutSeconds * 1000,
{}, {},
'conversions' 'conversions',
null
) )
if (exitCodePandoc !== 0) { if (exitCodePandoc !== 0) {
throw new OError('Non-zero exit code from pandoc', { throw new OError('Non-zero exit code from pandoc', {
@@ -95,7 +96,8 @@ async function convertToLaTeX(
Settings.pandocImage, Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000, Settings.conversionTimeoutSeconds * 1000,
{}, {},
'conversions' 'conversions',
null
) )
if (exitCodeZip !== 0) { if (exitCodeZip !== 0) {
throw new OError('Non-zero exit code from pandoc', { throw new OError('Non-zero exit code from pandoc', {
@@ -129,21 +131,18 @@ const LATEX_EXPORT_CONFIGS = {
'docx', 'docx',
'--citeproc', '--citeproc',
'--number-sections', '--number-sections',
'--resource-path=.',
], ],
}, },
markdown: { markdown: {
fileExtension: 'md', fileExtension: 'md',
compressOutput: true, compressOutput: true,
getPandocArgs: ({ outputPath, subdirName }) => [ getPandocArgs: ({ outputPath }) => [
'--output', '--output',
outputPath, outputPath,
'--from', '--from',
'latex', 'latex',
'--to', '--to',
'markdown', 'markdown',
'--resource-path=.',
`--extract-media=${subdirName}`,
], ],
}, },
} }
@@ -181,36 +180,74 @@ async function convertLaTeXToDocumentInDir(
const timeoutMs = Settings.conversionTimeoutSeconds * 1000 const timeoutMs = Settings.conversionTimeoutSeconds * 1000
const outputId = crypto.randomUUID() const outputId = crypto.randomUUID()
let pandocOutputPath, finalOutputName
if (config.compressOutput) {
await fs.mkdir(Path.join(compileDir, outputId), { recursive: true })
pandocOutputPath = Path.join(outputId, `main.${config.fileExtension}`)
finalOutputName = outputId + '.zip'
} else {
pandocOutputPath = outputId + '.' + config.fileExtension
finalOutputName = pandocOutputPath
}
logger.debug( logger.debug(
{ compileDir, rootDocPath, type }, { compileDir, rootDocPath, type },
'running pandoc latex-to-document in compile dir' 'running pandoc latex-to-document in compile dir'
) )
if (!config.compressOutput) {
const outputName = `${outputId}.${config.fileExtension}`
const { exitCode, stdout, stderr } = await CommandRunner.promises.run(
conversionId,
[
'pandoc',
rootDocPath,
...config.getPandocArgs({ outputPath: outputName }),
'--resource-path=.',
],
compileDir,
Settings.pandocImage,
timeoutMs,
{},
'conversions',
null
)
if (exitCode !== 0) {
throw new OError('pandoc latex-to-document conversion failed', {
type,
exitCode,
stdout,
stderr,
})
}
logger.debug(
{ stdout, stderr, exitCode },
'pandoc latex-to-document conversion completed'
)
return Path.join(compileDir, outputName)
}
// For compressed outputs we stage everything inside a uuid subdir so
// the archive root ends up flat:
// - pandoc runs with cwd=<outputId>, --extract-media=. drops images flat
// alongside main.<ext>, and --resource-path=.. lets it find originals
// in the parent compile dir.
// - zip runs with the same cwd, so `zip -r ../<id>.zip .` produces an
// archive whose root is main.<ext> + the media files (no uuid leak,
// no collision with anything already in compileDir).
await fs.mkdir(Path.join(compileDir, outputId), { recursive: true })
const outputName = `main.${config.fileExtension}`
const finalOutputName = `${outputId}.zip`
const { exitCode, stdout, stderr } = await CommandRunner.promises.run( const { exitCode, stdout, stderr } = await CommandRunner.promises.run(
conversionId, conversionId,
[ [
'pandoc', 'pandoc',
rootDocPath, Path.join('..', rootDocPath),
...config.getPandocArgs({ ...config.getPandocArgs({ outputPath: outputName }),
outputPath: pandocOutputPath, '--resource-path=..',
subdirName: outputId, '--extract-media=.',
}),
], ],
compileDir, compileDir,
Settings.pandocImage, Settings.pandocImage,
timeoutMs, timeoutMs,
{}, {},
'conversions' 'conversions',
outputId
) )
if (exitCode !== 0) { if (exitCode !== 0) {
@@ -227,22 +264,19 @@ async function convertLaTeXToDocumentInDir(
'pandoc latex-to-document conversion completed' 'pandoc latex-to-document conversion completed'
) )
if (!config.compressOutput) {
return Path.join(compileDir, finalOutputName)
}
const { const {
exitCode: zipExitCode, exitCode: zipExitCode,
stdout: zipStdout, stdout: zipStdout,
stderr: zipStderr, stderr: zipStderr,
} = await CommandRunner.promises.run( } = await CommandRunner.promises.run(
conversionId, conversionId,
['sh', '-c', `cd ${outputId} && zip -r ../${finalOutputName} .`], ['zip', '-r', Path.join('..', finalOutputName), '.'],
compileDir, compileDir,
Settings.pandocImage, Settings.pandocImage,
timeoutMs, timeoutMs,
{}, {},
'conversions' 'conversions',
outputId
) )
if (zipExitCode !== 0) { if (zipExitCode !== 0) {

View File

@@ -73,6 +73,7 @@ function runLatex(projectId, options, callback) {
timeout, timeout,
environment, environment,
compileGroup, compileGroup,
null,
function (error, output) { function (error, output) {
delete ProcessTable[id] delete ProcessTable[id]
if (error) { if (error) {

View File

@@ -13,6 +13,7 @@
*/ */
import { spawn } from 'node:child_process' import { spawn } from 'node:child_process'
import { promisify } from 'node:util' import { promisify } from 'node:util'
import Path from 'node:path'
import _ from 'lodash' import _ from 'lodash'
import logger from '@overleaf/logger' import logger from '@overleaf/logger'
let CommandRunner let CommandRunner
@@ -28,14 +29,19 @@ export default CommandRunner = {
timeout, timeout,
environment, environment,
compileGroup, compileGroup,
cwd,
callback callback
) { ) {
let key, value let key, value
callback = _.once(callback) callback = _.once(callback)
const spawnCwd = cwd ? Path.join(directory, cwd) : directory
command = Array.from(command).map(arg => command = Array.from(command).map(arg =>
arg.toString().replace('$COMPILE_DIR', directory) arg.toString().replace('$COMPILE_DIR', directory)
) )
logger.debug({ projectId, command, directory }, 'running command') logger.debug(
{ projectId, command, directory, cwd: spawnCwd },
'running command'
)
logger.warn('timeouts and sandboxing are not enabled with CommandRunner') logger.warn('timeouts and sandboxing are not enabled with CommandRunner')
// merge environment settings // merge environment settings
@@ -51,7 +57,7 @@ export default CommandRunner = {
// run command as detached process so it has its own process group (which can be killed if needed) // run command as detached process so it has its own process group (which can be killed if needed)
const proc = spawn(command[0], command.slice(1), { const proc = spawn(command[0], command.slice(1), {
cwd: directory, cwd: spawnCwd,
env, env,
stdio: ['pipe', 'pipe', 'ignore'], stdio: ['pipe', 'pipe', 'ignore'],
detached: true, detached: true,

View File

@@ -66,15 +66,15 @@ const LATEX_TO_DOCUMENT_CASES = [
compressOutput: true, compressOutput: true,
pandocArgs: outputId => [ pandocArgs: outputId => [
'pandoc', 'pandoc',
'main.tex', Path.join('..', 'main.tex'),
'--output', '--output',
Path.join(outputId, 'main.md'), 'main.md',
'--from', '--from',
'latex', 'latex',
'--to', '--to',
'markdown', 'markdown',
'--resource-path=.', '--resource-path=..',
`--extract-media=${outputId}`, '--extract-media=.',
], ],
}, },
] ]
@@ -409,13 +409,23 @@ describe('ConversionManager', function () {
) )
}) })
it('should run the conversion in the uuid-named subdirectory', function (ctx) {
expect(ctx.CommandRunner.promises.run.firstCall.args[7]).toBe(
'output-uuid'
)
})
it('should run pandoc then zip the subdirectory and return the zip path', function (ctx) { it('should run pandoc then zip the subdirectory and return the zip path', function (ctx) {
expect(ctx.CommandRunner.promises.run.callCount).toBe(2) expect(ctx.CommandRunner.promises.run.callCount).toBe(2)
expect(ctx.CommandRunner.promises.run.secondCall.args[1]).toEqual([ expect(ctx.CommandRunner.promises.run.secondCall.args[1]).toEqual([
'sh', 'zip',
'-c', '-r',
'cd output-uuid && zip -r ../output-uuid.zip .', Path.join('..', 'output-uuid.zip'),
'.',
]) ])
expect(ctx.CommandRunner.promises.run.secondCall.args[7]).toBe(
'output-uuid'
)
expect(ctx.result).toBe(Path.join(ctx.compileDir, 'output-uuid.zip')) expect(ctx.result).toBe(Path.join(ctx.compileDir, 'output-uuid.zip'))
}) })
}) })

View File

@@ -132,6 +132,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
(err, output) => { (err, output) => {
ctx.callback(err, output) ctx.callback(err, output)
return resolve() return resolve()
@@ -177,6 +178,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -208,6 +210,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
'synctex-output', 'synctex-output',
null,
ctx.callback ctx.callback
) )
}) })
@@ -239,6 +242,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
'synctex', 'synctex',
null,
ctx.callback ctx.callback
) )
}) })
@@ -270,6 +274,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
'wordcount', 'wordcount',
null,
ctx.callback ctx.callback
) )
}) })
@@ -287,6 +292,39 @@ describe('DockerRunner', () => {
}) })
}) })
describe('with a cwd', () => {
beforeEach(ctx => {
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
ctx.directory,
ctx.image,
ctx.timeout,
ctx.env,
ctx.compileGroup,
'subdir',
ctx.callback
)
})
it('should pass the cwd through to _getContainerOptions', ctx => {
ctx.DockerRunner._getContainerOptions
.calledWith(
ctx.command_with_dir,
ctx.image,
ctx.volumes,
ctx.timeout,
ctx.env,
ctx.compileGroup,
'subdir'
)
.should.equal(true)
})
})
describe('when the run throws an error', () => { describe('when the run throws an error', () => {
beforeEach(ctx => { beforeEach(ctx => {
let firstTime = true let firstTime = true
@@ -319,6 +357,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -351,6 +390,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -381,6 +421,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -412,6 +453,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -431,6 +473,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -486,6 +529,7 @@ describe('DockerRunner', () => {
ctx.timeout, ctx.timeout,
ctx.env, ctx.env,
ctx.compileGroup, ctx.compileGroup,
null,
ctx.callback ctx.callback
) )
}) })
@@ -506,6 +550,48 @@ describe('DockerRunner', () => {
ctx.callback.calledWith(null, ctx.output).should.equal(true) ctx.callback.calledWith(null, ctx.output).should.equal(true)
}) })
}) })
describe('WorkingDir with cwd', () => {
beforeEach(ctx => {
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
})
it('should default WorkingDir to /compile when cwd is null', ctx => {
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
ctx.directory,
ctx.image,
ctx.timeout,
ctx.env,
ctx.compileGroup,
null,
ctx.callback
)
const options =
ctx.DockerRunner._runAndWaitForContainer.lastCall.args[0]
expect(options.WorkingDir).to.equal('/compile')
})
it('should join cwd onto /compile when provided', ctx => {
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
ctx.directory,
ctx.image,
ctx.timeout,
ctx.env,
ctx.compileGroup,
'subdir/nested',
ctx.callback
)
const options =
ctx.DockerRunner._runAndWaitForContainer.lastCall.args[0]
expect(options.WorkingDir).to.equal('/compile/subdir/nested')
})
})
}) })
describe('_runAndWaitForContainer', () => { describe('_runAndWaitForContainer', () => {