Files
overleaf-cep/services/clsi/app/js/ConversionManager.js
Mathias Jakobsen eddcc5a42e Merge pull request #32857 from overleaf/ds-pandoc-import-md
[WEB + CLSI] Import markdown files using pandoc

GitOrigin-RevId: adad7831ddb13a8fcb8063871166bde13cbbf1b6
2026-05-08 08:09:02 +00:00

193 lines
4.5 KiB
JavaScript

import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import fs from 'node:fs/promises'
import Path from 'node:path'
import CommandRunner from './CommandRunner.js'
import LockManager from './LockManager.js'
import OError from '@overleaf/o-error'
const CONVERSION_CONFIGS = {
docx: {
inputFilename: 'input.docx',
pandocArgs: ['--extract-media=.', '--from', 'docx+citations', '--citeproc'],
},
markdown: {
inputFilename: 'input.md',
pandocArgs: ['--from', 'markdown'],
},
}
async function convertToLaTeXWithLock(conversionId, inputPath, conversionType) {
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
const lock = LockManager.acquire(conversionDir)
try {
return await convertToLaTeX(
conversionId,
conversionDir,
inputPath,
conversionType
)
} finally {
lock.release()
}
}
async function convertToLaTeX(
conversionId,
conversionDir,
inputPath,
conversionType
) {
const config = CONVERSION_CONFIGS[conversionType]
if (!config) {
throw new OError('unsupported conversion type', { conversionType })
}
await fs.mkdir(conversionDir, { recursive: true })
const newSourcePath = Path.join(conversionDir, config.inputFilename)
await fs.copyFile(inputPath, newSourcePath)
const outputName = crypto.randomUUID() + '.zip'
try {
const {
stdout: stdoutPandoc,
stderr: stderrPandoc,
exitCode: exitCodePandoc,
} = await CommandRunner.promises.run(
conversionId,
[
'pandoc',
config.inputFilename,
'--output',
'main.tex',
'--to',
'latex',
'--standalone',
...config.pandocArgs,
],
conversionDir,
Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions'
)
if (exitCodePandoc !== 0) {
throw new OError('Non-zero exit code from pandoc', {
exitCode: exitCodePandoc,
stderr: stderrPandoc,
})
}
logger.debug(
{ stdout: stdoutPandoc, stderr: stderrPandoc, exitCode: exitCodePandoc },
'conversion command completed'
)
// Clean up the source document to leave only the conversion result
await fs.unlink(newSourcePath).catch(() => {})
const {
stdout: stdoutZip,
stderr: stderrZip,
exitCode: exitCodeZip,
} = await CommandRunner.promises.run(
conversionId,
['zip', '-r', outputName, '.'],
conversionDir,
Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions'
)
if (exitCodeZip !== 0) {
throw new OError('Non-zero exit code from pandoc', {
exitCode: exitCodeZip,
stderr: stderrZip,
})
}
logger.debug(
{ stdout: stdoutZip, stderr: stderrZip, exitCode: exitCodeZip },
'conversion output compressed'
)
} catch (error) {
// Clean up the conversion directory on error to avoid leaving failed conversions around
await fs.rm(conversionDir, { force: true, recursive: true }).catch(() => {})
throw new OError('pandoc conversion failed').withCause(error)
}
return Path.join(conversionDir, outputName)
}
async function convertLaTeXToDocumentInDirWithLock(
conversionId,
compileDir,
rootDocPath,
type,
extension
) {
const lock = LockManager.acquire(compileDir)
try {
return await convertLaTeXToDocumentInDir(
conversionId,
compileDir,
rootDocPath,
type,
extension
)
} finally {
lock.release()
}
}
async function convertLaTeXToDocumentInDir(
conversionId,
compileDir,
rootDocPath = 'main.tex',
type,
extension
) {
const outputName = crypto.randomUUID() + '.' + extension
const timeoutMs = Settings.conversionTimeoutSeconds * 1000
logger.debug(
{ compileDir, rootDocPath, type },
'running pandoc latex-to-document in compile dir'
)
const { exitCode, stdout, stderr } = await CommandRunner.promises.run(
conversionId,
[
'pandoc',
rootDocPath,
'--output',
outputName,
'--from',
'latex',
'--to',
type,
'--resource-path=.',
],
compileDir,
Settings.pandocImage,
timeoutMs,
{},
'conversions'
)
if (exitCode !== 0) {
throw new OError('pandoc latex-to-document conversion failed', {
type,
exitCode,
stdout,
stderr,
})
}
return Path.join(compileDir, outputName)
}
export default {
promises: {
convertToLaTeXWithLock,
convertLaTeXToDocumentInDirWithLock,
},
}