mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 09:09:36 +02:00
Merge pull request #33089 from overleaf/ds-export-md-files-pandoc
[WEB + CLSI] Download as markdown GitOrigin-RevId: 181eddf2513e9c5edacbab37e93f9cac2191ee1a
This commit is contained in:
committed by
Copybot
parent
eddcc5a42e
commit
5dc67db403
@@ -9,7 +9,10 @@ import { pipeline } from 'node:stream/promises'
|
||||
import Settings from '@overleaf/settings'
|
||||
import Path from 'node:path'
|
||||
|
||||
const SUPPORTED_CONVERSION_TYPES = new Map([['docx', 'docx']])
|
||||
const CONVERSION_CONFIGS = {
|
||||
docx: { extension: 'docx' },
|
||||
markdown: { extension: 'zip' },
|
||||
}
|
||||
|
||||
async function convertDocumentToLaTeX(req, res) {
|
||||
const { path } = req.file
|
||||
@@ -57,10 +60,10 @@ async function convertProjectToDocument(req, res) {
|
||||
}
|
||||
|
||||
const type = req.query.type
|
||||
const extension = SUPPORTED_CONVERSION_TYPES.get(type)
|
||||
if (!extension) {
|
||||
if (!Object.hasOwn(CONVERSION_CONFIGS, type)) {
|
||||
return res.sendStatus(400)
|
||||
}
|
||||
const config = CONVERSION_CONFIGS[type]
|
||||
|
||||
const request = await RequestParser.promises.parse(req.body)
|
||||
request.project_id = req.params.project_id
|
||||
@@ -88,13 +91,12 @@ async function convertProjectToDocument(req, res) {
|
||||
conversionId,
|
||||
conversionDir,
|
||||
request.rootResourcePath,
|
||||
type,
|
||||
extension
|
||||
type
|
||||
)
|
||||
|
||||
const documentStat = await fs.stat(documentPath)
|
||||
res.setHeader('Content-Length', documentStat.size)
|
||||
res.attachment(`output.${extension}`)
|
||||
res.attachment(`output.${config.extension}`)
|
||||
res.setHeader('X-Content-Type-Options', 'nosniff')
|
||||
const readStream = fsSync.createReadStream(documentPath)
|
||||
await pipeline(readStream, res)
|
||||
|
||||
@@ -116,12 +116,41 @@ async function convertToLaTeX(
|
||||
return Path.join(conversionDir, outputName)
|
||||
}
|
||||
|
||||
const LATEX_EXPORT_CONFIGS = {
|
||||
docx: {
|
||||
fileExtension: 'docx',
|
||||
compressOutput: false,
|
||||
getPandocArgs: ({ outputPath }) => [
|
||||
'--output',
|
||||
outputPath,
|
||||
'--from',
|
||||
'latex',
|
||||
'--to',
|
||||
'docx',
|
||||
'--resource-path=.',
|
||||
],
|
||||
},
|
||||
markdown: {
|
||||
fileExtension: 'md',
|
||||
compressOutput: true,
|
||||
getPandocArgs: ({ outputPath, subdirName }) => [
|
||||
'--output',
|
||||
outputPath,
|
||||
'--from',
|
||||
'latex',
|
||||
'--to',
|
||||
'markdown',
|
||||
'--resource-path=.',
|
||||
`--extract-media=${subdirName}`,
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
async function convertLaTeXToDocumentInDirWithLock(
|
||||
conversionId,
|
||||
compileDir,
|
||||
rootDocPath,
|
||||
type,
|
||||
extension
|
||||
type
|
||||
) {
|
||||
const lock = LockManager.acquire(compileDir)
|
||||
try {
|
||||
@@ -129,8 +158,7 @@ async function convertLaTeXToDocumentInDirWithLock(
|
||||
conversionId,
|
||||
compileDir,
|
||||
rootDocPath,
|
||||
type,
|
||||
extension
|
||||
type
|
||||
)
|
||||
} finally {
|
||||
lock.release()
|
||||
@@ -141,11 +169,25 @@ async function convertLaTeXToDocumentInDir(
|
||||
conversionId,
|
||||
compileDir,
|
||||
rootDocPath = 'main.tex',
|
||||
type,
|
||||
extension
|
||||
type
|
||||
) {
|
||||
const outputName = crypto.randomUUID() + '.' + extension
|
||||
if (!Object.hasOwn(LATEX_EXPORT_CONFIGS, type)) {
|
||||
throw new OError('unsupported conversion type', { type })
|
||||
}
|
||||
const config = LATEX_EXPORT_CONFIGS[type]
|
||||
|
||||
const timeoutMs = Settings.conversionTimeoutSeconds * 1000
|
||||
const outputId = crypto.randomUUID()
|
||||
|
||||
let pandocOutputPath, finalOutputName
|
||||
if (config.compressOutput) {
|
||||
await fs.mkdir(Path.join(compileDir, outputId), { recursive: true })
|
||||
pandocOutputPath = Path.join(outputId, `main.${config.fileExtension}`)
|
||||
finalOutputName = outputId + '.zip'
|
||||
} else {
|
||||
pandocOutputPath = outputId + '.' + config.fileExtension
|
||||
finalOutputName = pandocOutputPath
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
{ compileDir, rootDocPath, type },
|
||||
@@ -157,13 +199,10 @@ async function convertLaTeXToDocumentInDir(
|
||||
[
|
||||
'pandoc',
|
||||
rootDocPath,
|
||||
'--output',
|
||||
outputName,
|
||||
'--from',
|
||||
'latex',
|
||||
'--to',
|
||||
type,
|
||||
'--resource-path=.',
|
||||
...config.getPandocArgs({
|
||||
outputPath: pandocOutputPath,
|
||||
subdirName: outputId,
|
||||
}),
|
||||
],
|
||||
compileDir,
|
||||
Settings.pandocImage,
|
||||
@@ -181,7 +220,43 @@ async function convertLaTeXToDocumentInDir(
|
||||
})
|
||||
}
|
||||
|
||||
return Path.join(compileDir, outputName)
|
||||
logger.debug(
|
||||
{ stdout, stderr, exitCode },
|
||||
'pandoc latex-to-document conversion completed'
|
||||
)
|
||||
|
||||
if (!config.compressOutput) {
|
||||
return Path.join(compileDir, finalOutputName)
|
||||
}
|
||||
|
||||
const {
|
||||
exitCode: zipExitCode,
|
||||
stdout: zipStdout,
|
||||
stderr: zipStderr,
|
||||
} = await CommandRunner.promises.run(
|
||||
conversionId,
|
||||
['sh', '-c', `cd ${outputId} && zip -r ../${finalOutputName} .`],
|
||||
compileDir,
|
||||
Settings.pandocImage,
|
||||
timeoutMs,
|
||||
{},
|
||||
'conversions'
|
||||
)
|
||||
|
||||
if (zipExitCode !== 0) {
|
||||
throw new OError('zip compression of export failed', {
|
||||
exitCode: zipExitCode,
|
||||
stdout: zipStdout,
|
||||
stderr: zipStderr,
|
||||
})
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
{ stdout: zipStdout, stderr: zipStderr, exitCode: zipExitCode },
|
||||
'export compressed'
|
||||
)
|
||||
|
||||
return Path.join(compileDir, finalOutputName)
|
||||
}
|
||||
|
||||
export default {
|
||||
|
||||
@@ -339,7 +339,7 @@ describe('ConversionController', function () {
|
||||
)
|
||||
})
|
||||
|
||||
it('should call convertLaTeXToDocumentInDirWithLock with docx type and extension', function (ctx) {
|
||||
it('should call convertLaTeXToDocumentInDirWithLock with docx type', function (ctx) {
|
||||
sinon.assert.calledWith(
|
||||
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock,
|
||||
sinon.match(
|
||||
@@ -347,7 +347,6 @@ describe('ConversionController', function () {
|
||||
),
|
||||
sinon.match(uuidDirPattern),
|
||||
'main.tex',
|
||||
'docx',
|
||||
'docx'
|
||||
)
|
||||
})
|
||||
@@ -385,6 +384,37 @@ describe('ConversionController', function () {
|
||||
})
|
||||
})
|
||||
|
||||
describe('with conversionType=markdown', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.req.query = { type: 'markdown', projectName: 'My_Project' }
|
||||
ctx.fs.stat.resolves(ctx.documentStat)
|
||||
|
||||
await ctx.ConversionController.convertProjectToDocument(
|
||||
ctx.req,
|
||||
ctx.res,
|
||||
sinon.stub()
|
||||
)
|
||||
})
|
||||
|
||||
it('should call convertLaTeXToDocumentInDirWithLock with type=markdown', function (ctx) {
|
||||
sinon.assert.calledWith(
|
||||
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock,
|
||||
sinon.match(
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
||||
),
|
||||
sinon.match(
|
||||
/^\/compiles\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
||||
),
|
||||
'main.tex',
|
||||
'markdown'
|
||||
)
|
||||
})
|
||||
|
||||
it('should set the attachment filename with .zip extension', function (ctx) {
|
||||
sinon.assert.calledWith(ctx.res.attachment, 'output.zip')
|
||||
})
|
||||
})
|
||||
|
||||
describe('when conversion fails', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.next = sinon.stub()
|
||||
|
||||
@@ -362,8 +362,7 @@ describe('ConversionManager', function () {
|
||||
ctx.conversionId,
|
||||
ctx.compileDir,
|
||||
ctx.rootDocPath,
|
||||
ctx.type,
|
||||
ctx.extension
|
||||
ctx.type
|
||||
)
|
||||
})
|
||||
|
||||
@@ -424,7 +423,6 @@ describe('ConversionManager', function () {
|
||||
ctx.conversionId,
|
||||
ctx.compileDir,
|
||||
'main.tex',
|
||||
'docx',
|
||||
'docx'
|
||||
)
|
||||
).to.be.rejectedWith('pandoc latex-to-document conversion failed')
|
||||
@@ -433,4 +431,131 @@ describe('ConversionManager', function () {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('convertLaTeXToDocumentInDirWithLock (type=markdown)', function () {
|
||||
describe('successfully', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.compileDir = '/compiles/test-compile-dir'
|
||||
ctx.rootDocPath = 'main.tex'
|
||||
|
||||
ctx.result =
|
||||
await ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
|
||||
ctx.conversionId,
|
||||
ctx.compileDir,
|
||||
ctx.rootDocPath,
|
||||
'markdown'
|
||||
)
|
||||
})
|
||||
|
||||
it('should acquire a lock on the compile dir', function (ctx) {
|
||||
sinon.assert.calledWith(ctx.LockManager.acquire, ctx.compileDir)
|
||||
})
|
||||
|
||||
it('should release the lock', function (ctx) {
|
||||
sinon.assert.called(ctx.lock.release)
|
||||
})
|
||||
|
||||
it('should create a UUID-named subdirectory', function (ctx) {
|
||||
sinon.assert.calledWith(
|
||||
ctx.fs.mkdir,
|
||||
Path.join(ctx.compileDir, 'output-uuid'),
|
||||
{ recursive: true }
|
||||
)
|
||||
})
|
||||
|
||||
it('should run pandoc then zip (two commands total)', function (ctx) {
|
||||
expect(ctx.CommandRunner.promises.run.callCount).toBe(2)
|
||||
})
|
||||
|
||||
it('should run pandoc outputting main.md into the UUID-named subdir', function (ctx) {
|
||||
expect(ctx.CommandRunner.promises.run.firstCall.args).toEqual([
|
||||
ctx.conversionId,
|
||||
[
|
||||
'pandoc',
|
||||
ctx.rootDocPath,
|
||||
'--output',
|
||||
Path.join('output-uuid', 'main.md'),
|
||||
'--from',
|
||||
'latex',
|
||||
'--to',
|
||||
'markdown',
|
||||
'--resource-path=.',
|
||||
'--extract-media=output-uuid',
|
||||
],
|
||||
ctx.compileDir,
|
||||
ctx.Settings.pandocImage,
|
||||
60_000,
|
||||
{},
|
||||
'conversions',
|
||||
])
|
||||
})
|
||||
|
||||
it('should zip the project-named subdirectory', function (ctx) {
|
||||
expect(ctx.CommandRunner.promises.run.secondCall.args).toEqual([
|
||||
ctx.conversionId,
|
||||
['sh', '-c', 'cd output-uuid && zip -r ../output-uuid.zip .'],
|
||||
ctx.compileDir,
|
||||
ctx.Settings.pandocImage,
|
||||
60_000,
|
||||
{},
|
||||
'conversions',
|
||||
])
|
||||
})
|
||||
|
||||
it('should return the path to the zip file', function (ctx) {
|
||||
expect(ctx.result).toBe(Path.join(ctx.compileDir, 'output-uuid.zip'))
|
||||
})
|
||||
|
||||
it('should convert conversion timeout to milliseconds', function (ctx) {
|
||||
expect(ctx.CommandRunner.promises.run.firstCall.args[4]).toBe(60_000)
|
||||
expect(ctx.CommandRunner.promises.run.secondCall.args[4]).toBe(60_000)
|
||||
})
|
||||
})
|
||||
|
||||
describe('when pandoc fails (non-zero exit code)', function () {
|
||||
it('should reject with an error and release the lock', async function (ctx) {
|
||||
ctx.compileDir = '/compiles/test-compile-dir'
|
||||
|
||||
ctx.CommandRunner.promises.run.resolves({
|
||||
stdout: 'mock-stdout',
|
||||
stderr: 'mock-stderr',
|
||||
exitCode: 1,
|
||||
})
|
||||
|
||||
await expect(
|
||||
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
|
||||
ctx.conversionId,
|
||||
ctx.compileDir,
|
||||
'main.tex',
|
||||
'markdown'
|
||||
)
|
||||
).to.be.rejectedWith('pandoc latex-to-document conversion failed')
|
||||
|
||||
sinon.assert.called(ctx.lock.release)
|
||||
})
|
||||
})
|
||||
|
||||
describe('when zip fails (non-zero exit code)', function () {
|
||||
it('should reject with an error and release the lock', async function (ctx) {
|
||||
ctx.compileDir = '/compiles/test-compile-dir'
|
||||
|
||||
ctx.CommandRunner.promises.run
|
||||
.onFirstCall()
|
||||
.resolves({ stdout: '', stderr: '', exitCode: 0 })
|
||||
.onSecondCall()
|
||||
.resolves({ stdout: '', stderr: 'zip error', exitCode: 1 })
|
||||
|
||||
await expect(
|
||||
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
|
||||
ctx.conversionId,
|
||||
ctx.compileDir,
|
||||
'main.tex',
|
||||
'markdown'
|
||||
)
|
||||
).to.be.rejectedWith('zip compression of export failed')
|
||||
|
||||
sinon.assert.called(ctx.lock.release)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user