mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 09:09:36 +02:00
[WEB + CLSI] Import markdown files using pandoc GitOrigin-RevId: adad7831ddb13a8fcb8063871166bde13cbbf1b6
193 lines
4.5 KiB
JavaScript
193 lines
4.5 KiB
JavaScript
import logger from '@overleaf/logger'
|
|
import Settings from '@overleaf/settings'
|
|
import fs from 'node:fs/promises'
|
|
import Path from 'node:path'
|
|
import CommandRunner from './CommandRunner.js'
|
|
import LockManager from './LockManager.js'
|
|
import OError from '@overleaf/o-error'
|
|
|
|
const CONVERSION_CONFIGS = {
|
|
docx: {
|
|
inputFilename: 'input.docx',
|
|
pandocArgs: ['--extract-media=.', '--from', 'docx+citations', '--citeproc'],
|
|
},
|
|
markdown: {
|
|
inputFilename: 'input.md',
|
|
pandocArgs: ['--from', 'markdown'],
|
|
},
|
|
}
|
|
|
|
async function convertToLaTeXWithLock(conversionId, inputPath, conversionType) {
|
|
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
|
|
const lock = LockManager.acquire(conversionDir)
|
|
try {
|
|
return await convertToLaTeX(
|
|
conversionId,
|
|
conversionDir,
|
|
inputPath,
|
|
conversionType
|
|
)
|
|
} finally {
|
|
lock.release()
|
|
}
|
|
}
|
|
|
|
async function convertToLaTeX(
|
|
conversionId,
|
|
conversionDir,
|
|
inputPath,
|
|
conversionType
|
|
) {
|
|
const config = CONVERSION_CONFIGS[conversionType]
|
|
if (!config) {
|
|
throw new OError('unsupported conversion type', { conversionType })
|
|
}
|
|
await fs.mkdir(conversionDir, { recursive: true })
|
|
const newSourcePath = Path.join(conversionDir, config.inputFilename)
|
|
await fs.copyFile(inputPath, newSourcePath)
|
|
const outputName = crypto.randomUUID() + '.zip'
|
|
|
|
try {
|
|
const {
|
|
stdout: stdoutPandoc,
|
|
stderr: stderrPandoc,
|
|
exitCode: exitCodePandoc,
|
|
} = await CommandRunner.promises.run(
|
|
conversionId,
|
|
[
|
|
'pandoc',
|
|
config.inputFilename,
|
|
'--output',
|
|
'main.tex',
|
|
'--to',
|
|
'latex',
|
|
'--standalone',
|
|
...config.pandocArgs,
|
|
],
|
|
conversionDir,
|
|
Settings.pandocImage,
|
|
Settings.conversionTimeoutSeconds * 1000,
|
|
{},
|
|
'conversions'
|
|
)
|
|
if (exitCodePandoc !== 0) {
|
|
throw new OError('Non-zero exit code from pandoc', {
|
|
exitCode: exitCodePandoc,
|
|
stderr: stderrPandoc,
|
|
})
|
|
}
|
|
logger.debug(
|
|
{ stdout: stdoutPandoc, stderr: stderrPandoc, exitCode: exitCodePandoc },
|
|
'conversion command completed'
|
|
)
|
|
|
|
// Clean up the source document to leave only the conversion result
|
|
await fs.unlink(newSourcePath).catch(() => {})
|
|
|
|
const {
|
|
stdout: stdoutZip,
|
|
stderr: stderrZip,
|
|
exitCode: exitCodeZip,
|
|
} = await CommandRunner.promises.run(
|
|
conversionId,
|
|
['zip', '-r', outputName, '.'],
|
|
conversionDir,
|
|
Settings.pandocImage,
|
|
Settings.conversionTimeoutSeconds * 1000,
|
|
{},
|
|
'conversions'
|
|
)
|
|
if (exitCodeZip !== 0) {
|
|
throw new OError('Non-zero exit code from pandoc', {
|
|
exitCode: exitCodeZip,
|
|
stderr: stderrZip,
|
|
})
|
|
}
|
|
logger.debug(
|
|
{ stdout: stdoutZip, stderr: stderrZip, exitCode: exitCodeZip },
|
|
'conversion output compressed'
|
|
)
|
|
} catch (error) {
|
|
// Clean up the conversion directory on error to avoid leaving failed conversions around
|
|
await fs.rm(conversionDir, { force: true, recursive: true }).catch(() => {})
|
|
throw new OError('pandoc conversion failed').withCause(error)
|
|
}
|
|
|
|
return Path.join(conversionDir, outputName)
|
|
}
|
|
|
|
async function convertLaTeXToDocumentInDirWithLock(
|
|
conversionId,
|
|
compileDir,
|
|
rootDocPath,
|
|
type,
|
|
extension
|
|
) {
|
|
const lock = LockManager.acquire(compileDir)
|
|
try {
|
|
return await convertLaTeXToDocumentInDir(
|
|
conversionId,
|
|
compileDir,
|
|
rootDocPath,
|
|
type,
|
|
extension
|
|
)
|
|
} finally {
|
|
lock.release()
|
|
}
|
|
}
|
|
|
|
async function convertLaTeXToDocumentInDir(
|
|
conversionId,
|
|
compileDir,
|
|
rootDocPath = 'main.tex',
|
|
type,
|
|
extension
|
|
) {
|
|
const outputName = crypto.randomUUID() + '.' + extension
|
|
const timeoutMs = Settings.conversionTimeoutSeconds * 1000
|
|
|
|
logger.debug(
|
|
{ compileDir, rootDocPath, type },
|
|
'running pandoc latex-to-document in compile dir'
|
|
)
|
|
|
|
const { exitCode, stdout, stderr } = await CommandRunner.promises.run(
|
|
conversionId,
|
|
[
|
|
'pandoc',
|
|
rootDocPath,
|
|
'--output',
|
|
outputName,
|
|
'--from',
|
|
'latex',
|
|
'--to',
|
|
type,
|
|
'--resource-path=.',
|
|
],
|
|
compileDir,
|
|
Settings.pandocImage,
|
|
timeoutMs,
|
|
{},
|
|
'conversions'
|
|
)
|
|
|
|
if (exitCode !== 0) {
|
|
throw new OError('pandoc latex-to-document conversion failed', {
|
|
type,
|
|
exitCode,
|
|
stdout,
|
|
stderr,
|
|
})
|
|
}
|
|
|
|
return Path.join(compileDir, outputName)
|
|
}
|
|
|
|
export default {
|
|
promises: {
|
|
convertToLaTeXWithLock,
|
|
convertLaTeXToDocumentInDirWithLock,
|
|
},
|
|
}
|