From 5dc67db403efa8c01d568c5cae01cb91045b475c Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Thu, 7 May 2026 12:19:13 +0100 Subject: [PATCH] Merge pull request #33089 from overleaf/ds-export-md-files-pandoc [WEB + CLSI] Download as markdown GitOrigin-RevId: 181eddf2513e9c5edacbab37e93f9cac2191ee1a --- services/clsi/app/js/ConversionController.js | 14 +- services/clsi/app/js/ConversionManager.js | 105 ++++++++++++-- .../test/unit/js/ConversionController.test.js | 34 ++++- .../test/unit/js/ConversionManager.test.js | 131 +++++++++++++++++- .../Downloads/ProjectDownloadsController.mjs | 9 +- .../Features/Project/ProjectController.mjs | 1 + .../web/frontend/extracted-translations.json | 1 + .../components/toolbar/download-project.tsx | 40 ++++++ .../ide-react/components/toolbar/menu-bar.tsx | 1 + .../components/toolbar/project-title.tsx | 2 + services/web/locales/en.json | 1 + .../ProjectDownloadsController.test.mjs | 66 +++++++++ .../DocumentConversionManager.test.mjs | 2 +- 13 files changed, 377 insertions(+), 30 deletions(-) diff --git a/services/clsi/app/js/ConversionController.js b/services/clsi/app/js/ConversionController.js index eccdd39ad9..7214143fe5 100644 --- a/services/clsi/app/js/ConversionController.js +++ b/services/clsi/app/js/ConversionController.js @@ -9,7 +9,10 @@ import { pipeline } from 'node:stream/promises' import Settings from '@overleaf/settings' import Path from 'node:path' -const SUPPORTED_CONVERSION_TYPES = new Map([['docx', 'docx']]) +const CONVERSION_CONFIGS = { + docx: { extension: 'docx' }, + markdown: { extension: 'zip' }, +} async function convertDocumentToLaTeX(req, res) { const { path } = req.file @@ -57,10 +60,10 @@ async function convertProjectToDocument(req, res) { } const type = req.query.type - const extension = SUPPORTED_CONVERSION_TYPES.get(type) - if (!extension) { + if (!Object.hasOwn(CONVERSION_CONFIGS, type)) { return res.sendStatus(400) } + const config = CONVERSION_CONFIGS[type] const request = await RequestParser.promises.parse(req.body) request.project_id = req.params.project_id @@ -88,13 +91,12 @@ async function convertProjectToDocument(req, res) { conversionId, conversionDir, request.rootResourcePath, - type, - extension + type ) const documentStat = await fs.stat(documentPath) res.setHeader('Content-Length', documentStat.size) - res.attachment(`output.${extension}`) + res.attachment(`output.${config.extension}`) res.setHeader('X-Content-Type-Options', 'nosniff') const readStream = fsSync.createReadStream(documentPath) await pipeline(readStream, res) diff --git a/services/clsi/app/js/ConversionManager.js b/services/clsi/app/js/ConversionManager.js index f8a0a3a049..4edd9ff333 100644 --- a/services/clsi/app/js/ConversionManager.js +++ b/services/clsi/app/js/ConversionManager.js @@ -116,12 +116,41 @@ async function convertToLaTeX( return Path.join(conversionDir, outputName) } +const LATEX_EXPORT_CONFIGS = { + docx: { + fileExtension: 'docx', + compressOutput: false, + getPandocArgs: ({ outputPath }) => [ + '--output', + outputPath, + '--from', + 'latex', + '--to', + 'docx', + '--resource-path=.', + ], + }, + markdown: { + fileExtension: 'md', + compressOutput: true, + getPandocArgs: ({ outputPath, subdirName }) => [ + '--output', + outputPath, + '--from', + 'latex', + '--to', + 'markdown', + '--resource-path=.', + `--extract-media=${subdirName}`, + ], + }, +} + async function convertLaTeXToDocumentInDirWithLock( conversionId, compileDir, rootDocPath, - type, - extension + type ) { const lock = LockManager.acquire(compileDir) try { @@ -129,8 +158,7 @@ async function convertLaTeXToDocumentInDirWithLock( conversionId, compileDir, rootDocPath, - type, - extension + type ) } finally { lock.release() @@ -141,11 +169,25 @@ async function convertLaTeXToDocumentInDir( conversionId, compileDir, rootDocPath = 'main.tex', - type, - extension + type ) { - const outputName = crypto.randomUUID() + '.' + extension + if (!Object.hasOwn(LATEX_EXPORT_CONFIGS, type)) { + throw new OError('unsupported conversion type', { type }) + } + const config = LATEX_EXPORT_CONFIGS[type] + const timeoutMs = Settings.conversionTimeoutSeconds * 1000 + const outputId = crypto.randomUUID() + + let pandocOutputPath, finalOutputName + if (config.compressOutput) { + await fs.mkdir(Path.join(compileDir, outputId), { recursive: true }) + pandocOutputPath = Path.join(outputId, `main.${config.fileExtension}`) + finalOutputName = outputId + '.zip' + } else { + pandocOutputPath = outputId + '.' + config.fileExtension + finalOutputName = pandocOutputPath + } logger.debug( { compileDir, rootDocPath, type }, @@ -157,13 +199,10 @@ async function convertLaTeXToDocumentInDir( [ 'pandoc', rootDocPath, - '--output', - outputName, - '--from', - 'latex', - '--to', - type, - '--resource-path=.', + ...config.getPandocArgs({ + outputPath: pandocOutputPath, + subdirName: outputId, + }), ], compileDir, Settings.pandocImage, @@ -181,7 +220,43 @@ async function convertLaTeXToDocumentInDir( }) } - return Path.join(compileDir, outputName) + logger.debug( + { stdout, stderr, exitCode }, + 'pandoc latex-to-document conversion completed' + ) + + if (!config.compressOutput) { + return Path.join(compileDir, finalOutputName) + } + + const { + exitCode: zipExitCode, + stdout: zipStdout, + stderr: zipStderr, + } = await CommandRunner.promises.run( + conversionId, + ['sh', '-c', `cd ${outputId} && zip -r ../${finalOutputName} .`], + compileDir, + Settings.pandocImage, + timeoutMs, + {}, + 'conversions' + ) + + if (zipExitCode !== 0) { + throw new OError('zip compression of export failed', { + exitCode: zipExitCode, + stdout: zipStdout, + stderr: zipStderr, + }) + } + + logger.debug( + { stdout: zipStdout, stderr: zipStderr, exitCode: zipExitCode }, + 'export compressed' + ) + + return Path.join(compileDir, finalOutputName) } export default { diff --git a/services/clsi/test/unit/js/ConversionController.test.js b/services/clsi/test/unit/js/ConversionController.test.js index 849a877646..a785d2139f 100644 --- a/services/clsi/test/unit/js/ConversionController.test.js +++ b/services/clsi/test/unit/js/ConversionController.test.js @@ -339,7 +339,7 @@ describe('ConversionController', function () { ) }) - it('should call convertLaTeXToDocumentInDirWithLock with docx type and extension', function (ctx) { + it('should call convertLaTeXToDocumentInDirWithLock with docx type', function (ctx) { sinon.assert.calledWith( ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock, sinon.match( @@ -347,7 +347,6 @@ describe('ConversionController', function () { ), sinon.match(uuidDirPattern), 'main.tex', - 'docx', 'docx' ) }) @@ -385,6 +384,37 @@ describe('ConversionController', function () { }) }) + describe('with conversionType=markdown', function () { + beforeEach(async function (ctx) { + ctx.req.query = { type: 'markdown', projectName: 'My_Project' } + ctx.fs.stat.resolves(ctx.documentStat) + + await ctx.ConversionController.convertProjectToDocument( + ctx.req, + ctx.res, + sinon.stub() + ) + }) + + it('should call convertLaTeXToDocumentInDirWithLock with type=markdown', function (ctx) { + sinon.assert.calledWith( + ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock, + sinon.match( + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/ + ), + sinon.match( + /^\/compiles\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/ + ), + 'main.tex', + 'markdown' + ) + }) + + it('should set the attachment filename with .zip extension', function (ctx) { + sinon.assert.calledWith(ctx.res.attachment, 'output.zip') + }) + }) + describe('when conversion fails', function () { beforeEach(async function (ctx) { ctx.next = sinon.stub() diff --git a/services/clsi/test/unit/js/ConversionManager.test.js b/services/clsi/test/unit/js/ConversionManager.test.js index 49ff520ee8..4f3d7f9f0e 100644 --- a/services/clsi/test/unit/js/ConversionManager.test.js +++ b/services/clsi/test/unit/js/ConversionManager.test.js @@ -362,8 +362,7 @@ describe('ConversionManager', function () { ctx.conversionId, ctx.compileDir, ctx.rootDocPath, - ctx.type, - ctx.extension + ctx.type ) }) @@ -424,7 +423,6 @@ describe('ConversionManager', function () { ctx.conversionId, ctx.compileDir, 'main.tex', - 'docx', 'docx' ) ).to.be.rejectedWith('pandoc latex-to-document conversion failed') @@ -433,4 +431,131 @@ describe('ConversionManager', function () { }) }) }) + + describe('convertLaTeXToDocumentInDirWithLock (type=markdown)', function () { + describe('successfully', function () { + beforeEach(async function (ctx) { + ctx.compileDir = '/compiles/test-compile-dir' + ctx.rootDocPath = 'main.tex' + + ctx.result = + await ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock( + ctx.conversionId, + ctx.compileDir, + ctx.rootDocPath, + 'markdown' + ) + }) + + it('should acquire a lock on the compile dir', function (ctx) { + sinon.assert.calledWith(ctx.LockManager.acquire, ctx.compileDir) + }) + + it('should release the lock', function (ctx) { + sinon.assert.called(ctx.lock.release) + }) + + it('should create a UUID-named subdirectory', function (ctx) { + sinon.assert.calledWith( + ctx.fs.mkdir, + Path.join(ctx.compileDir, 'output-uuid'), + { recursive: true } + ) + }) + + it('should run pandoc then zip (two commands total)', function (ctx) { + expect(ctx.CommandRunner.promises.run.callCount).toBe(2) + }) + + it('should run pandoc outputting main.md into the UUID-named subdir', function (ctx) { + expect(ctx.CommandRunner.promises.run.firstCall.args).toEqual([ + ctx.conversionId, + [ + 'pandoc', + ctx.rootDocPath, + '--output', + Path.join('output-uuid', 'main.md'), + '--from', + 'latex', + '--to', + 'markdown', + '--resource-path=.', + '--extract-media=output-uuid', + ], + ctx.compileDir, + ctx.Settings.pandocImage, + 60_000, + {}, + 'conversions', + ]) + }) + + it('should zip the project-named subdirectory', function (ctx) { + expect(ctx.CommandRunner.promises.run.secondCall.args).toEqual([ + ctx.conversionId, + ['sh', '-c', 'cd output-uuid && zip -r ../output-uuid.zip .'], + ctx.compileDir, + ctx.Settings.pandocImage, + 60_000, + {}, + 'conversions', + ]) + }) + + it('should return the path to the zip file', function (ctx) { + expect(ctx.result).toBe(Path.join(ctx.compileDir, 'output-uuid.zip')) + }) + + it('should convert conversion timeout to milliseconds', function (ctx) { + expect(ctx.CommandRunner.promises.run.firstCall.args[4]).toBe(60_000) + expect(ctx.CommandRunner.promises.run.secondCall.args[4]).toBe(60_000) + }) + }) + + describe('when pandoc fails (non-zero exit code)', function () { + it('should reject with an error and release the lock', async function (ctx) { + ctx.compileDir = '/compiles/test-compile-dir' + + ctx.CommandRunner.promises.run.resolves({ + stdout: 'mock-stdout', + stderr: 'mock-stderr', + exitCode: 1, + }) + + await expect( + ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock( + ctx.conversionId, + ctx.compileDir, + 'main.tex', + 'markdown' + ) + ).to.be.rejectedWith('pandoc latex-to-document conversion failed') + + sinon.assert.called(ctx.lock.release) + }) + }) + + describe('when zip fails (non-zero exit code)', function () { + it('should reject with an error and release the lock', async function (ctx) { + ctx.compileDir = '/compiles/test-compile-dir' + + ctx.CommandRunner.promises.run + .onFirstCall() + .resolves({ stdout: '', stderr: '', exitCode: 0 }) + .onSecondCall() + .resolves({ stdout: '', stderr: 'zip error', exitCode: 1 }) + + await expect( + ctx.ConversionManager.promises.convertLaTeXToDocumentInDirWithLock( + ctx.conversionId, + ctx.compileDir, + 'main.tex', + 'markdown' + ) + ).to.be.rejectedWith('zip compression of export failed') + + sinon.assert.called(ctx.lock.release) + }) + }) + }) }) diff --git a/services/web/app/src/Features/Downloads/ProjectDownloadsController.mjs b/services/web/app/src/Features/Downloads/ProjectDownloadsController.mjs index 520b5a712a..b39bb558ee 100644 --- a/services/web/app/src/Features/Downloads/ProjectDownloadsController.mjs +++ b/services/web/app/src/Features/Downloads/ProjectDownloadsController.mjs @@ -9,7 +9,10 @@ import DocumentConversionManager from '../Uploads/DocumentConversionManager.mjs' import { expressify } from '@overleaf/promise-utils' import { pipeline } from 'node:stream/promises' -const SUPPORTED_CONVERSION_TYPES = new Map([['docx', 'docx']]) +const SUPPORTED_CONVERSION_TYPES = new Map([ + ['docx', 'docx'], + ['markdown', 'zip'], +]) // Keep in sync with the logic for PDF files in CompileController function getSafeProjectName(project) { @@ -30,14 +33,14 @@ async function exportProjectConversion(req, res) { name: true, }) + const safeFileName = getSafeProjectName(project) + const { stream, contentLength } = await DocumentConversionManager.promises.convertProjectToDocument( projectId, userId, type ) - - const safeFileName = getSafeProjectName(project) res.setHeader('Content-Length', contentLength) res.attachment(`${safeFileName}.${extension}`) res.setHeader('X-Content-Type-Options', 'nosniff') diff --git a/services/web/app/src/Features/Project/ProjectController.mjs b/services/web/app/src/Features/Project/ProjectController.mjs index de991e6297..85d3befde1 100644 --- a/services/web/app/src/Features/Project/ProjectController.mjs +++ b/services/web/app/src/Features/Project/ProjectController.mjs @@ -483,6 +483,7 @@ const _ProjectController = { 'overleaf-code', 'export-docx', 'sharing-updates', + 'export-markdown', ].filter(Boolean) const getUserValues = async userId => diff --git a/services/web/frontend/extracted-translations.json b/services/web/frontend/extracted-translations.json index 535c5c8728..aa204c9d15 100644 --- a/services/web/frontend/extracted-translations.json +++ b/services/web/frontend/extracted-translations.json @@ -631,6 +631,7 @@ "expires_in_days": "", "expires_on": "", "export_as_docx": "", + "export_as_markdown": "", "export_csv": "", "export_document_error": "", "export_project_to_github": "", diff --git a/services/web/frontend/js/features/ide-react/components/toolbar/download-project.tsx b/services/web/frontend/js/features/ide-react/components/toolbar/download-project.tsx index 390480430a..db5ffd0713 100644 --- a/services/web/frontend/js/features/ide-react/components/toolbar/download-project.tsx +++ b/services/web/frontend/js/features/ide-react/components/toolbar/download-project.tsx @@ -142,3 +142,43 @@ export const ExportProjectDocx = () => { ) } + +export const ExportProjectMarkdown = () => { + const { t } = useTranslation() + const { projectId } = useProjectContext() + const exportMarkdownEnabled = useFeatureFlag('export-markdown') + const enablePandocConversions = + getMeta('ol-ExposedSettings')?.enablePandocConversions + const anonymous = getMeta('ol-anonymous') + + const showExportMarkdown = + exportMarkdownEnabled && enablePandocConversions && !anonymous + + useCommandProvider( + () => + showExportMarkdown + ? [ + { + id: 'export-as-markdown', + href: `/project/${projectId}/download/conversion/markdown`, + label: t('export_as_markdown'), + }, + ] + : [], + [t, showExportMarkdown, projectId] + ) + + if (!showExportMarkdown) { + return null + } + + return ( + + {t('export_as_markdown')} + + ) +} diff --git a/services/web/frontend/js/features/ide-react/components/toolbar/menu-bar.tsx b/services/web/frontend/js/features/ide-react/components/toolbar/menu-bar.tsx index a0a16427e8..ce3e7a498e 100644 --- a/services/web/frontend/js/features/ide-react/components/toolbar/menu-bar.tsx +++ b/services/web/frontend/js/features/ide-react/components/toolbar/menu-bar.tsx @@ -95,6 +95,7 @@ export const ToolbarMenuBar = () => { 'download-as-source-zip', 'download-pdf', 'export-as-docx', + 'export-as-markdown', ], }, ], diff --git a/services/web/frontend/js/features/ide-react/components/toolbar/project-title.tsx b/services/web/frontend/js/features/ide-react/components/toolbar/project-title.tsx index 50fa058041..45bd5974c0 100644 --- a/services/web/frontend/js/features/ide-react/components/toolbar/project-title.tsx +++ b/services/web/frontend/js/features/ide-react/components/toolbar/project-title.tsx @@ -14,6 +14,7 @@ import { DownloadProjectPDF, DownloadProjectZip, ExportProjectDocx, + ExportProjectMarkdown, } from './download-project' import { useCallback, useState } from 'react' import OLDropdownMenuItem from '@/shared/components/ol/ol-dropdown-menu-item' @@ -81,6 +82,7 @@ export const ToolbarProjectTitle = () => { +