Merge pull request #33089 from overleaf/ds-export-md-files-pandoc

[WEB + CLSI] Download as markdown

GitOrigin-RevId: 181eddf2513e9c5edacbab37e93f9cac2191ee1a
This commit is contained in:
Mathias Jakobsen
2026-05-07 12:19:13 +01:00
committed by Copybot
parent eddcc5a42e
commit 5dc67db403
13 changed files with 377 additions and 30 deletions

View File

@@ -9,7 +9,10 @@ import { pipeline } from 'node:stream/promises'
import Settings from '@overleaf/settings'
import Path from 'node:path'
const SUPPORTED_CONVERSION_TYPES = new Map([['docx', 'docx']])
const CONVERSION_CONFIGS = {
docx: { extension: 'docx' },
markdown: { extension: 'zip' },
}
async function convertDocumentToLaTeX(req, res) {
const { path } = req.file
@@ -57,10 +60,10 @@ async function convertProjectToDocument(req, res) {
}
const type = req.query.type
const extension = SUPPORTED_CONVERSION_TYPES.get(type)
if (!extension) {
if (!Object.hasOwn(CONVERSION_CONFIGS, type)) {
return res.sendStatus(400)
}
const config = CONVERSION_CONFIGS[type]
const request = await RequestParser.promises.parse(req.body)
request.project_id = req.params.project_id
@@ -88,13 +91,12 @@ async function convertProjectToDocument(req, res) {
conversionId,
conversionDir,
request.rootResourcePath,
type,
extension
type
)
const documentStat = await fs.stat(documentPath)
res.setHeader('Content-Length', documentStat.size)
res.attachment(`output.${extension}`)
res.attachment(`output.${config.extension}`)
res.setHeader('X-Content-Type-Options', 'nosniff')
const readStream = fsSync.createReadStream(documentPath)
await pipeline(readStream, res)

View File

@@ -116,12 +116,41 @@ async function convertToLaTeX(
return Path.join(conversionDir, outputName)
}
const LATEX_EXPORT_CONFIGS = {
docx: {
fileExtension: 'docx',
compressOutput: false,
getPandocArgs: ({ outputPath }) => [
'--output',
outputPath,
'--from',
'latex',
'--to',
'docx',
'--resource-path=.',
],
},
markdown: {
fileExtension: 'md',
compressOutput: true,
getPandocArgs: ({ outputPath, subdirName }) => [
'--output',
outputPath,
'--from',
'latex',
'--to',
'markdown',
'--resource-path=.',
`--extract-media=${subdirName}`,
],
},
}
async function convertLaTeXToDocumentInDirWithLock(
conversionId,
compileDir,
rootDocPath,
type,
extension
type
) {
const lock = LockManager.acquire(compileDir)
try {
@@ -129,8 +158,7 @@ async function convertLaTeXToDocumentInDirWithLock(
conversionId,
compileDir,
rootDocPath,
type,
extension
type
)
} finally {
lock.release()
@@ -141,11 +169,25 @@ async function convertLaTeXToDocumentInDir(
conversionId,
compileDir,
rootDocPath = 'main.tex',
type,
extension
type
) {
const outputName = crypto.randomUUID() + '.' + extension
if (!Object.hasOwn(LATEX_EXPORT_CONFIGS, type)) {
throw new OError('unsupported conversion type', { type })
}
const config = LATEX_EXPORT_CONFIGS[type]
const timeoutMs = Settings.conversionTimeoutSeconds * 1000
const outputId = crypto.randomUUID()
let pandocOutputPath, finalOutputName
if (config.compressOutput) {
await fs.mkdir(Path.join(compileDir, outputId), { recursive: true })
pandocOutputPath = Path.join(outputId, `main.${config.fileExtension}`)
finalOutputName = outputId + '.zip'
} else {
pandocOutputPath = outputId + '.' + config.fileExtension
finalOutputName = pandocOutputPath
}
logger.debug(
{ compileDir, rootDocPath, type },
@@ -157,13 +199,10 @@ async function convertLaTeXToDocumentInDir(
[
'pandoc',
rootDocPath,
'--output',
outputName,
'--from',
'latex',
'--to',
type,
'--resource-path=.',
...config.getPandocArgs({
outputPath: pandocOutputPath,
subdirName: outputId,
}),
],
compileDir,
Settings.pandocImage,
@@ -181,7 +220,43 @@ async function convertLaTeXToDocumentInDir(
})
}
return Path.join(compileDir, outputName)
logger.debug(
{ stdout, stderr, exitCode },
'pandoc latex-to-document conversion completed'
)
if (!config.compressOutput) {
return Path.join(compileDir, finalOutputName)
}
const {
exitCode: zipExitCode,
stdout: zipStdout,
stderr: zipStderr,
} = await CommandRunner.promises.run(
conversionId,
['sh', '-c', `cd ${outputId} && zip -r ../${finalOutputName} .`],
compileDir,
Settings.pandocImage,
timeoutMs,
{},
'conversions'
)
if (zipExitCode !== 0) {
throw new OError('zip compression of export failed', {
exitCode: zipExitCode,
stdout: zipStdout,
stderr: zipStderr,
})
}
logger.debug(
{ stdout: zipStdout, stderr: zipStderr, exitCode: zipExitCode },
'export compressed'
)
return Path.join(compileDir, finalOutputName)
}
export default {

View File

@@ -339,7 +339,7 @@ describe('ConversionController', function () {
)
})
it('should call convertLaTeXToDocumentInDirWithLock with docx type and extension', function (ctx) {
it('should call convertLaTeXToDocumentInDirWithLock with docx type', function (ctx) {
sinon.assert.calledWith(
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock,
sinon.match(
@@ -347,7 +347,6 @@ describe('ConversionController', function () {
),
sinon.match(uuidDirPattern),
'main.tex',
'docx',
'docx'
)
})
@@ -385,6 +384,37 @@ describe('ConversionController', function () {
})
})
describe('with conversionType=markdown', function () {
beforeEach(async function (ctx) {
ctx.req.query = { type: 'markdown', projectName: 'My_Project' }
ctx.fs.stat.resolves(ctx.documentStat)
await ctx.ConversionController.convertProjectToDocument(
ctx.req,
ctx.res,
sinon.stub()
)
})
it('should call convertLaTeXToDocumentInDirWithLock with type=markdown', function (ctx) {
sinon.assert.calledWith(
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock,
sinon.match(
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
),
sinon.match(
/^\/compiles\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
),
'main.tex',
'markdown'
)
})
it('should set the attachment filename with .zip extension', function (ctx) {
sinon.assert.calledWith(ctx.res.attachment, 'output.zip')
})
})
describe('when conversion fails', function () {
beforeEach(async function (ctx) {
ctx.next = sinon.stub()

View File

@@ -362,8 +362,7 @@ describe('ConversionManager', function () {
ctx.conversionId,
ctx.compileDir,
ctx.rootDocPath,
ctx.type,
ctx.extension
ctx.type
)
})
@@ -424,7 +423,6 @@ describe('ConversionManager', function () {
ctx.conversionId,
ctx.compileDir,
'main.tex',
'docx',
'docx'
)
).to.be.rejectedWith('pandoc latex-to-document conversion failed')
@@ -433,4 +431,131 @@ describe('ConversionManager', function () {
})
})
})
describe('convertLaTeXToDocumentInDirWithLock (type=markdown)', function () {
describe('successfully', function () {
beforeEach(async function (ctx) {
ctx.compileDir = '/compiles/test-compile-dir'
ctx.rootDocPath = 'main.tex'
ctx.result =
await ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
ctx.conversionId,
ctx.compileDir,
ctx.rootDocPath,
'markdown'
)
})
it('should acquire a lock on the compile dir', function (ctx) {
sinon.assert.calledWith(ctx.LockManager.acquire, ctx.compileDir)
})
it('should release the lock', function (ctx) {
sinon.assert.called(ctx.lock.release)
})
it('should create a UUID-named subdirectory', function (ctx) {
sinon.assert.calledWith(
ctx.fs.mkdir,
Path.join(ctx.compileDir, 'output-uuid'),
{ recursive: true }
)
})
it('should run pandoc then zip (two commands total)', function (ctx) {
expect(ctx.CommandRunner.promises.run.callCount).toBe(2)
})
it('should run pandoc outputting main.md into the UUID-named subdir', function (ctx) {
expect(ctx.CommandRunner.promises.run.firstCall.args).toEqual([
ctx.conversionId,
[
'pandoc',
ctx.rootDocPath,
'--output',
Path.join('output-uuid', 'main.md'),
'--from',
'latex',
'--to',
'markdown',
'--resource-path=.',
'--extract-media=output-uuid',
],
ctx.compileDir,
ctx.Settings.pandocImage,
60_000,
{},
'conversions',
])
})
it('should zip the project-named subdirectory', function (ctx) {
expect(ctx.CommandRunner.promises.run.secondCall.args).toEqual([
ctx.conversionId,
['sh', '-c', 'cd output-uuid && zip -r ../output-uuid.zip .'],
ctx.compileDir,
ctx.Settings.pandocImage,
60_000,
{},
'conversions',
])
})
it('should return the path to the zip file', function (ctx) {
expect(ctx.result).toBe(Path.join(ctx.compileDir, 'output-uuid.zip'))
})
it('should convert conversion timeout to milliseconds', function (ctx) {
expect(ctx.CommandRunner.promises.run.firstCall.args[4]).toBe(60_000)
expect(ctx.CommandRunner.promises.run.secondCall.args[4]).toBe(60_000)
})
})
describe('when pandoc fails (non-zero exit code)', function () {
it('should reject with an error and release the lock', async function (ctx) {
ctx.compileDir = '/compiles/test-compile-dir'
ctx.CommandRunner.promises.run.resolves({
stdout: 'mock-stdout',
stderr: 'mock-stderr',
exitCode: 1,
})
await expect(
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
ctx.conversionId,
ctx.compileDir,
'main.tex',
'markdown'
)
).to.be.rejectedWith('pandoc latex-to-document conversion failed')
sinon.assert.called(ctx.lock.release)
})
})
describe('when zip fails (non-zero exit code)', function () {
it('should reject with an error and release the lock', async function (ctx) {
ctx.compileDir = '/compiles/test-compile-dir'
ctx.CommandRunner.promises.run
.onFirstCall()
.resolves({ stdout: '', stderr: '', exitCode: 0 })
.onSecondCall()
.resolves({ stdout: '', stderr: 'zip error', exitCode: 1 })
await expect(
ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock(
ctx.conversionId,
ctx.compileDir,
'main.tex',
'markdown'
)
).to.be.rejectedWith('zip compression of export failed')
sinon.assert.called(ctx.lock.release)
})
})
})
})

View File

@@ -9,7 +9,10 @@ import DocumentConversionManager from '../Uploads/DocumentConversionManager.mjs'
import { expressify } from '@overleaf/promise-utils'
import { pipeline } from 'node:stream/promises'
const SUPPORTED_CONVERSION_TYPES = new Map([['docx', 'docx']])
const SUPPORTED_CONVERSION_TYPES = new Map([
['docx', 'docx'],
['markdown', 'zip'],
])
// Keep in sync with the logic for PDF files in CompileController
function getSafeProjectName(project) {
@@ -30,14 +33,14 @@ async function exportProjectConversion(req, res) {
name: true,
})
const safeFileName = getSafeProjectName(project)
const { stream, contentLength } =
await DocumentConversionManager.promises.convertProjectToDocument(
projectId,
userId,
type
)
const safeFileName = getSafeProjectName(project)
res.setHeader('Content-Length', contentLength)
res.attachment(`${safeFileName}.${extension}`)
res.setHeader('X-Content-Type-Options', 'nosniff')

View File

@@ -483,6 +483,7 @@ const _ProjectController = {
'overleaf-code',
'export-docx',
'sharing-updates',
'export-markdown',
].filter(Boolean)
const getUserValues = async userId =>

View File

@@ -631,6 +631,7 @@
"expires_in_days": "",
"expires_on": "",
"export_as_docx": "",
"export_as_markdown": "",
"export_csv": "",
"export_document_error": "",
"export_project_to_github": "",

View File

@@ -142,3 +142,43 @@ export const ExportProjectDocx = () => {
</OLDropdownMenuItem>
)
}
export const ExportProjectMarkdown = () => {
const { t } = useTranslation()
const { projectId } = useProjectContext()
const exportMarkdownEnabled = useFeatureFlag('export-markdown')
const enablePandocConversions =
getMeta('ol-ExposedSettings')?.enablePandocConversions
const anonymous = getMeta('ol-anonymous')
const showExportMarkdown =
exportMarkdownEnabled && enablePandocConversions && !anonymous
useCommandProvider(
() =>
showExportMarkdown
? [
{
id: 'export-as-markdown',
href: `/project/${projectId}/download/conversion/markdown`,
label: t('export_as_markdown'),
},
]
: [],
[t, showExportMarkdown, projectId]
)
if (!showExportMarkdown) {
return null
}
return (
<OLDropdownMenuItem
href={`/project/${projectId}/download/conversion/markdown`}
target="_blank"
rel="noreferrer"
>
{t('export_as_markdown')}
</OLDropdownMenuItem>
)
}

View File

@@ -95,6 +95,7 @@ export const ToolbarMenuBar = () => {
'download-as-source-zip',
'download-pdf',
'export-as-docx',
'export-as-markdown',
],
},
],

View File

@@ -14,6 +14,7 @@ import {
DownloadProjectPDF,
DownloadProjectZip,
ExportProjectDocx,
ExportProjectMarkdown,
} from './download-project'
import { useCallback, useState } from 'react'
import OLDropdownMenuItem from '@/shared/components/ol/ol-dropdown-menu-item'
@@ -81,6 +82,7 @@ export const ToolbarProjectTitle = () => {
<DownloadProjectPDF />
<DownloadProjectZip />
<ExportProjectDocx />
<ExportProjectMarkdown />
<DropdownDivider />
<DuplicateProject />
<OLDropdownMenuItem

View File

@@ -836,6 +836,7 @@
"expiry": "Expiry Date",
"explore_all_plans": "Explore all plans",
"export_as_docx": "Export as Word document (.docx)",
"export_as_markdown": "Export as Markdown (.md)",
"export_csv": "Export CSV",
"export_document_error": "Export failed. Please try again.",
"export_project_to_github": "Export Project to GitHub",

View File

@@ -340,5 +340,71 @@ describe('ProjectDownloadsController', function () {
sinon.assert.calledWith(ctx.pipeline, ctx.exportStream, ctx.res)
})
})
describe('with type=markdown', function () {
beforeEach(async function (ctx) {
ctx.projectId = 'test-project-id'
ctx.userId = 'test-user-id'
ctx.projectName = 'My Test Project'
ctx.exportStream = { pipe: sinon.stub() }
ctx.contentLength = 9876
ctx.req.params = { Project_id: ctx.projectId, type: 'markdown' }
ctx.req.session = { user: { _id: ctx.userId } }
ctx.req.ip = '192.168.1.1'
ctx.res.attachment = sinon.stub().returns(ctx.res)
ctx.SessionManager.getLoggedInUserId.returns(ctx.userId)
ctx.ProjectGetter.promises.getProject.resolves({
name: ctx.projectName,
})
ctx.DocumentConversionManager.promises.convertProjectToDocument.resolves(
{
stream: ctx.exportStream,
contentLength: ctx.contentLength,
}
)
await ctx.ProjectDownloadsController.exportProjectConversion(
ctx.req,
ctx.res,
ctx.next
)
})
it('should call convertProjectToDocument with the markdown type', function (ctx) {
sinon.assert.calledWith(
ctx.DocumentConversionManager.promises.convertProjectToDocument,
ctx.projectId,
ctx.userId,
'markdown'
)
})
it('should set the attachment filename with .zip extension', function (ctx) {
sinon.assert.calledWith(ctx.res.attachment, 'My_Test_Project.zip')
})
it('should add an audit log entry for markdown export', function (ctx) {
sinon.assert.calledWith(
ctx.ProjectAuditLogHandler.addEntryInBackground,
ctx.projectId,
'project-exported-markdown',
ctx.userId,
ctx.req.ip
)
})
it('should record the action via Metrics with markdown type', function (ctx) {
ctx.Metrics.inc
.calledWith('document-exports', 1, { type: 'markdown' })
.should.equal(true)
})
it('should stream the document to the response', function (ctx) {
sinon.assert.calledWith(ctx.pipeline, ctx.exportStream, ctx.res)
})
})
})
})

View File

@@ -311,7 +311,7 @@ describe('DocumentConversionManager', function () {
})
})
describe('successfully', function () {
describe('successfully converts the document', function () {
beforeEach(async function (ctx) {
ctx.result =
await ctx.DocumentConversionManager.promises.convertProjectToDocument(