Files
overleaf-cep/services/clsi/app/js/ConversionManager.js
Davinder Singh be5a7b56c8 [WEB + CLSI] Download as docx file feature (#32851)
* using CLSI logic for fetching the project contents and skip the .zip export

* Use unique conversion directory for project-to-docx export to avoid corrupting the shared compile
  directory when a compile runs concurrently

* Remove X-Accel-Buffering header — not needed as CLSI does not run behind nginx

* moving log before sending the data

* Return CLSI stream directly instead of buffering to disk on web

  Previously convertProjectToDocx wrote the CLSI response to a temp file
  on disk, then the controller read it back to stream to the client.
  Now the stream is returned directly and piped to the response,
  avoiding unnecessary disk I/O on the web server.

* Use href redirect for docx export instead of fetching blob into memory

* making functions and files more generic so they can be used in future for other documents exports as well

* adding export-docx split test

* adding unit tests

* adding cypress E2E test

* format:fix

* renaming the route to download from convert

* adding new icon for export docx button

* format:fix

* remove unused showExportDocumentErrorToast export and adding guard against invalid Content-Length header from CLSI

* format:fix

* refactor(clsi): move promisify(parse) into RequestParser

* refactor: generic conversion endpoint with type as route
  param

* refactor: use type→extension map for validated conversion types

* refactor(clsi): remove --standalone flag and fix rejection test

* fixing the href in cypress test

* renaming function

* adding type to Metrics.inc

* fix: rename exportProjectDocument, add WithLock wrapper and metrics type label

* format:fix

* fix: hide docx export from anonymous users and add WithLock wrapper

* format fix

* remove redundant Content-Length validation from DocumentConversionManager

* format:fix

* removing trailing icon

GitOrigin-RevId: e9764fefac2c4b625d23be9e942ea4a8b283c70d
2026-04-24 08:06:10 +00:00

171 lines
4.1 KiB
JavaScript

import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import fs from 'node:fs/promises'
import Path from 'node:path'
import CommandRunner from './CommandRunner.js'
import LockManager from './LockManager.js'
import OError from '@overleaf/o-error'
async function convertDocxToLaTeXWithLock(conversionId, inputPath) {
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
const lock = LockManager.acquire(conversionDir)
try {
return await convertDocxToLaTeX(conversionId, conversionDir, inputPath)
} finally {
lock.release()
}
}
async function convertDocxToLaTeX(conversionId, conversionDir, inputPath) {
await fs.mkdir(conversionDir, { recursive: true })
const newSourcePath = Path.join(conversionDir, 'input.docx')
await fs.copyFile(inputPath, newSourcePath)
const outputName = crypto.randomUUID() + '.zip'
try {
const {
stdout: stdoutPandoc,
stderr: stderrPandoc,
exitCode: exitCodePandoc,
} = await CommandRunner.promises.run(
conversionId,
[
'pandoc',
'input.docx',
'--output',
'main.tex',
'--extract-media=.',
'--from',
'docx+citations',
'--to',
'latex',
'--citeproc',
'--standalone',
],
conversionDir,
Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions'
)
if (exitCodePandoc !== 0) {
throw new OError('Non-zero exit code from pandoc', {
exitCode: exitCodePandoc,
stderr: stderrPandoc,
})
}
logger.debug(
{ stdout: stdoutPandoc, stderr: stderrPandoc, exitCode: exitCodePandoc },
'conversion command completed'
)
// Clean up the source document to leave only the conversion result
await fs.unlink(newSourcePath).catch(() => {})
const {
stdout: stdoutZip,
stderr: stderrZip,
exitCode: exitCodeZip,
} = await CommandRunner.promises.run(
conversionId,
['zip', '-r', outputName, '.'],
conversionDir,
Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions'
)
if (exitCodeZip !== 0) {
throw new OError('Non-zero exit code from pandoc', {
exitCode: exitCodeZip,
stderr: stderrZip,
})
}
logger.debug(
{ stdout: stdoutZip, stderr: stderrZip, exitCode: exitCodeZip },
'conversion output compressed'
)
} catch (error) {
// Clean up the conversion directory on error to avoid leaving failed conversions around
await fs.rm(conversionDir, { force: true, recursive: true }).catch(() => {})
throw new OError('pandoc conversion failed').withCause(error)
}
return Path.join(conversionDir, outputName)
}
async function convertLaTeXToDocumentInDirWithLock(
conversionId,
compileDir,
rootDocPath,
type,
extension
) {
const lock = LockManager.acquire(compileDir)
try {
return await convertLaTeXToDocumentInDir(
conversionId,
compileDir,
rootDocPath,
type,
extension
)
} finally {
lock.release()
}
}
async function convertLaTeXToDocumentInDir(
conversionId,
compileDir,
rootDocPath = 'main.tex',
type,
extension
) {
const outputName = crypto.randomUUID() + '.' + extension
const timeoutMs = Settings.conversionTimeoutSeconds * 1000
logger.debug(
{ compileDir, rootDocPath, type },
'running pandoc latex-to-document in compile dir'
)
const { exitCode, stdout, stderr } = await CommandRunner.promises.run(
conversionId,
[
'pandoc',
rootDocPath,
'--output',
outputName,
'--from',
'latex',
'--to',
type,
'--resource-path=.',
],
compileDir,
Settings.pandocImage,
timeoutMs,
{},
'conversions'
)
if (exitCode !== 0) {
throw new OError('pandoc latex-to-document conversion failed', {
type,
exitCode,
stdout,
stderr,
})
}
return Path.join(compileDir, outputName)
}
export default {
promises: {
convertDocxToLaTeXWithLock,
convertLaTeXToDocumentInDirWithLock,
},
}