From 9c97876268998ca4c246c1ad29f9c0bde866654e Mon Sep 17 00:00:00 2001 From: Mathias Jakobsen Date: Fri, 27 Mar 2026 08:47:07 +0000 Subject: [PATCH] [web]+clsi] Allow docx import via pandoc (#32004) Co-authored-by: Jakob Ackermann GitOrigin-RevId: 246b3290ec04867f71545b1a7c5d95d0f68379ff --- package-lock.json | 6 +- services/clsi/.gitignore | 1 + services/clsi/Makefile | 2 + services/clsi/app.js | 9 + services/clsi/app/js/ConversionController.js | 46 ++++ services/clsi/app/js/ConversionManager.js | 101 +++++++ services/clsi/app/js/FileUploadMiddleware.js | 31 +++ services/clsi/app/js/LocalCommandRunner.js | 2 +- services/clsi/buildscript.txt | 2 +- services/clsi/config/settings.defaults.cjs | 9 + services/clsi/docker-compose.ci.yml | 3 +- services/clsi/docker-compose.yml | 3 +- services/clsi/entrypoint.sh | 1 + services/clsi/package.json | 5 +- .../fixtures/conversion-source.docx | Bin 0 -> 8297 bytes .../test/acceptance/js/ConversionTests.js | 83 ++++++ .../clsi/test/acceptance/js/helpers/Client.js | 18 +- .../test/acceptance/js/helpers/ClsiApp.js | 5 + .../test/unit/js/ConversionController.test.js | 158 +++++++++++ .../test/unit/js/ConversionManager.test.js | 253 +++++++++++++++++ .../clsi/test/unit/js/DockerRunner.test.js | 22 +- .../src/Features/Compile/CompileManager.mjs | 15 +- .../Project/ProjectListController.mjs | 14 +- .../Uploads/DocumentConversionManager.mjs | 84 ++++++ .../Uploads/ProjectUploadController.mjs | 52 +++- .../src/Features/Uploads/UploadsRouter.mjs | 8 + .../web/frontend/extracted-translations.json | 1 + .../components/new-project-button.tsx | 16 ++ .../new-project-button/import-docx-modal.tsx | 127 +++++++++ .../new-project-button-modal.tsx | 8 + ...me-message-create-new-project-dropdown.tsx | 15 + services/web/locales/en.json | 1 + services/web/package.json | 1 + .../components/welcome-message.test.tsx | 11 +- .../DocumentConversionManager.test.mjs | 257 ++++++++++++++++++ .../Uploads/ProjectUploadController.test.mjs | 128 ++++++++- 36 files changed, 1469 insertions(+), 29 deletions(-) create mode 100644 services/clsi/app/js/ConversionController.js create mode 100644 services/clsi/app/js/ConversionManager.js create mode 100644 services/clsi/app/js/FileUploadMiddleware.js create mode 100644 services/clsi/test/acceptance/fixtures/conversion-source.docx create mode 100644 services/clsi/test/acceptance/js/ConversionTests.js create mode 100644 services/clsi/test/unit/js/ConversionController.test.js create mode 100644 services/clsi/test/unit/js/ConversionManager.test.js create mode 100644 services/web/app/src/Features/Uploads/DocumentConversionManager.mjs create mode 100644 services/web/frontend/js/features/project-list/components/new-project-button/import-docx-modal.tsx create mode 100644 services/web/test/unit/src/Uploads/DocumentConversionManager.test.mjs diff --git a/package-lock.json b/package-lock.json index 226a476472..8026488726 100644 --- a/package-lock.json +++ b/package-lock.json @@ -50568,6 +50568,7 @@ "dockerode": "^4.0.9", "express": "4.22.1", "lodash": "^4.17.21", + "multer": "2.1.1", "overleaf-editor-core": "*", "p-limit": "^3.1.0", "request": "2.88.2", @@ -50579,6 +50580,7 @@ "@istanbuljs/esm-loader-hook": "^0.3.0", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", + "form-data": "^4.0.5", "mocha": "^11.1.0", "mocha-junit-reporter": "^2.2.1", "mocha-multi-reporters": "^1.5.1", @@ -50589,7 +50591,8 @@ "sinon-chai": "^3.7.0", "timekeeper": "2.2.0", "typescript": "^5.0.4", - "vitest": "^4.0.0" + "vitest": "^4.0.0", + "yauzl": "^2.10.0" } }, "services/clsi-cache": { @@ -53183,6 +53186,7 @@ "express-session": "^1.17.1", "file-type": "^21.3.4", "focus-trap-react": "^11.0.4", + "form-data": "^4.0.5", "globby": "^5.0.0", "helmet": "^6.0.1", "https-proxy-agent": "^7.0.6", diff --git a/services/clsi/.gitignore b/services/clsi/.gitignore index a85e6b757a..fd94a86eea 100644 --- a/services/clsi/.gitignore +++ b/services/clsi/.gitignore @@ -1,3 +1,4 @@ compiles output cache +uploads \ No newline at end of file diff --git a/services/clsi/Makefile b/services/clsi/Makefile index f873e2fa27..7924db4e20 100644 --- a/services/clsi/Makefile +++ b/services/clsi/Makefile @@ -154,6 +154,8 @@ test_acceptance_clean: $(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0 test_acceptance_pre_run: + docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc:3.9 + docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc-staging:3.9 ifneq (,$(wildcard test/acceptance/js/scripts/pre-run)) $(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run endif diff --git a/services/clsi/app.js b/services/clsi/app.js index 0a8423b726..40210426d3 100644 --- a/services/clsi/app.js +++ b/services/clsi/app.js @@ -20,6 +20,8 @@ import bodyParser from 'body-parser' import net from 'node:net' import os from 'node:os' import OError from '@overleaf/o-error' +import ConversionController from './app/js/ConversionController.js' +import FileUploadMiddleware from './app/js/FileUploadMiddleware.js' logger.initialize('clsi') logger.logger.serializers.clsiRequest = LoggerSerializers.clsiRequest @@ -122,6 +124,13 @@ app.get( OutputController.createOutputZip ) +// Conversion endpoints +app.post( + '/convert/docx-to-latex', + FileUploadMiddleware.multerMiddleware, + ConversionController.convertDocxToLaTeX +) + if (process.env.NODE_ENV === 'development' && global.__coverage__) { app.get('/coverage', (req, res) => { const coverage = {} diff --git a/services/clsi/app/js/ConversionController.js b/services/clsi/app/js/ConversionController.js new file mode 100644 index 0000000000..afdce93d0d --- /dev/null +++ b/services/clsi/app/js/ConversionController.js @@ -0,0 +1,46 @@ +import logger from '@overleaf/logger' +import { expressify } from '@overleaf/promise-utils' +import fs from 'node:fs/promises' +import fsSync from 'node:fs' +import ConversionManager from './ConversionManager.js' +import { pipeline } from 'node:stream/promises' +import Settings from '@overleaf/settings' +import Path from 'node:path' + +async function convertDocxToLaTeX(req, res) { + const { path } = req.file + if (!Settings.enablePandocConversions) { + await fs.unlink(path).catch(() => {}) + return res.sendStatus(404) + } + logger.debug({ path }, 'received file for conversion') + const conversionId = crypto.randomUUID() + let zipPath + try { + zipPath = await ConversionManager.promises.convertDocxToLaTeXWithLock( + conversionId, + path + ) + } finally { + await fs.unlink(path).catch(() => {}) + } + + try { + const zipStat = await fs.stat(zipPath) + + res.setHeader('Content-Length', zipStat.size) + res.attachment('conversion.zip') + res.setHeader('X-Content-Type-Options', 'nosniff') + + const readStream = fsSync.createReadStream(zipPath) + await pipeline(readStream, res) + } finally { + await fs + .rm(Path.dirname(zipPath), { recursive: true, force: true }) + .catch(() => {}) + } +} + +export default { + convertDocxToLaTeX: expressify(convertDocxToLaTeX), +} diff --git a/services/clsi/app/js/ConversionManager.js b/services/clsi/app/js/ConversionManager.js new file mode 100644 index 0000000000..e68583d465 --- /dev/null +++ b/services/clsi/app/js/ConversionManager.js @@ -0,0 +1,101 @@ +import logger from '@overleaf/logger' +import Settings from '@overleaf/settings' +import fs from 'node:fs/promises' +import Path from 'node:path' +import CommandRunner from './CommandRunner.js' +import LockManager from './LockManager.js' +import OError from '@overleaf/o-error' + +async function convertDocxToLaTeXWithLock(conversionId, inputPath) { + const conversionDir = Path.join(Settings.path.compilesDir, conversionId) + const lock = LockManager.acquire(conversionDir) + try { + return await convertDocxToLaTeX(conversionId, conversionDir, inputPath) + } finally { + lock.release() + } +} + +async function convertDocxToLaTeX(conversionId, conversionDir, inputPath) { + await fs.mkdir(conversionDir, { recursive: true }) + const newSourcePath = Path.join(conversionDir, 'input.docx') + await fs.copyFile(inputPath, newSourcePath) + const outputName = crypto.randomUUID() + '.zip' + + try { + const { + stdout: stdoutPandoc, + stderr: stderrPandoc, + exitCode: exitCodePandoc, + } = await CommandRunner.promises.run( + conversionId, + [ + 'pandoc', + 'input.docx', + '--output', + 'main.tex', + '--extract-media=.', + '--from', + 'docx+citations', + '--to', + 'latex', + '--citeproc', + '--standalone', + ], + conversionDir, + Settings.pandocImage, + Settings.conversionTimeoutSeconds * 1000, + {}, + 'conversions' + ) + if (exitCodePandoc !== 0) { + throw new OError('Non-zero exit code from pandoc', { + exitCode: exitCodePandoc, + stderr: stderrPandoc, + }) + } + logger.debug( + { stdout: stdoutPandoc, stderr: stderrPandoc, exitCode: exitCodePandoc }, + 'conversion command completed' + ) + + // Clean up the source document to leave only the conversion result + await fs.unlink(newSourcePath).catch(() => {}) + + const { + stdout: stdoutZip, + stderr: stderrZip, + exitCode: exitCodeZip, + } = await CommandRunner.promises.run( + conversionId, + ['zip', '-r', outputName, '.'], + conversionDir, + Settings.pandocImage, + Settings.conversionTimeoutSeconds * 1000, + {}, + 'conversions' + ) + if (exitCodeZip !== 0) { + throw new OError('Non-zero exit code from pandoc', { + exitCode: exitCodeZip, + stderr: stderrZip, + }) + } + logger.debug( + { stdout: stdoutZip, stderr: stderrZip, exitCode: exitCodeZip }, + 'conversion output compressed' + ) + } catch (error) { + // Clean up the conversion directory on error to avoid leaving failed conversions around + await fs.rm(conversionDir, { force: true, recursive: true }).catch(() => {}) + throw new OError('pandoc conversion failed').withCause(error) + } + + return Path.join(conversionDir, outputName) +} + +export default { + promises: { + convertDocxToLaTeXWithLock, + }, +} diff --git a/services/clsi/app/js/FileUploadMiddleware.js b/services/clsi/app/js/FileUploadMiddleware.js new file mode 100644 index 0000000000..21e84fd7bc --- /dev/null +++ b/services/clsi/app/js/FileUploadMiddleware.js @@ -0,0 +1,31 @@ +import multer from 'multer' +import Settings from '@overleaf/settings' +import logger from '@overleaf/logger' + +const upload = multer({ + dest: Settings.path.uploadFolder, + limits: { + fileSize: Settings.maxUploadSize, + parts: 2, + }, +}) + +function multerMiddleware(req, res, next) { + return upload.single('qqfile')(req, res, function (err) { + if (err instanceof multer.MulterError && err.code === 'LIMIT_FILE_SIZE') { + return res.status(422).json({ success: false, error: 'file_too_large' }) + } + if (err) return next(err) + if (!req.file?.path) { + logger.info({ req }, 'missing req.file.path on upload') + return res + .status(400) + .json({ success: false, error: 'invalid_upload_request' }) + } + next() + }) +} + +export default { + multerMiddleware, +} diff --git a/services/clsi/app/js/LocalCommandRunner.js b/services/clsi/app/js/LocalCommandRunner.js index ea9b85526b..c0cbbbe67b 100644 --- a/services/clsi/app/js/LocalCommandRunner.js +++ b/services/clsi/app/js/LocalCommandRunner.js @@ -82,7 +82,7 @@ export default CommandRunner = { err.code = code return callback(err) } else { - return callback(null, { stdout }) + return callback(null, { stdout, exitCode: code }) } }) diff --git a/services/clsi/buildscript.txt b/services/clsi/buildscript.txt index f69c82e793..1fc8abc9de 100644 --- a/services/clsi/buildscript.txt +++ b/services/clsi/buildscript.txt @@ -1,7 +1,7 @@ clsi --data-dirs=cache,compiles,output --dependencies= ---env-add=DOWNLOAD_HOST=http://clsi-nginx:8080,ALLOWED_COMPILE_GROUPS="clsi-perf simple-latex-file",ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES="quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1",TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output +--env-add=DOWNLOAD_HOST=http://clsi-nginx:8080,ALLOWED_COMPILE_GROUPS="clsi-perf simple-latex-file",ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES="quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9",TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output,ENABLE_PANDOC_CONVERSIONS=true --env-pass-through= --esmock-loader=False --node-version=24.13.0 diff --git a/services/clsi/config/settings.defaults.cjs b/services/clsi/config/settings.defaults.cjs index bf0e7df63c..074398874d 100644 --- a/services/clsi/config/settings.defaults.cjs +++ b/services/clsi/config/settings.defaults.cjs @@ -20,11 +20,19 @@ module.exports = { process.env.CLSI_OUTPUT_PATH || Path.resolve(__dirname, '../output'), clsiCacheDir: process.env.CLSI_CACHE_PATH || Path.resolve(__dirname, '../cache'), + uploadFolder: + process.env.CLSI_UPLOAD_PATH || Path.resolve(__dirname, '../uploads'), synctexBaseDir(projectId) { return Path.join(this.compilesDir, projectId) }, }, + conversionTimeoutSeconds: + parseInt(process.env.CLSI_CONVERSION_TIMEOUT_SECONDS, 10) || 60, + pandocImage: process.env.PANDOC_IMAGE || 'quay.io/sharelatex/pandoc:3.9', + enablePandocConversions: process.env.ENABLE_PANDOC_CONVERSIONS === 'true', + maxUploadSize: 50 * 1024 * 1024, + internal: { clsi: { port: 3013, @@ -152,6 +160,7 @@ if ((process.env.DOCKER_RUNNER || process.env.SANDBOXED_COMPILES) === 'true') { wordcount: { 'HostConfig.AutoRemove': true }, synctex: { 'HostConfig.AutoRemove': true }, 'synctex-output': { 'HostConfig.AutoRemove': true }, + conversions: { 'HostConfig.AutoRemove': true }, } module.exports.clsi.docker.compileGroupConfig = Object.assign( defaultCompileGroupConfig, diff --git a/services/clsi/docker-compose.ci.yml b/services/clsi/docker-compose.ci.yml index a1e188b4e5..a7022c702a 100644 --- a/services/clsi/docker-compose.ci.yml +++ b/services/clsi/docker-compose.ci.yml @@ -31,13 +31,14 @@ services: ALLOWED_COMPILE_GROUPS: "clsi-perf simple-latex-file" ENABLE_PDF_CACHING: "true" PDF_CACHING_ENABLE_WORKER_POOL: "true" - ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1" + ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9" TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2025.1 TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker TEXLIVE_IMAGE_USER: "tex" SANDBOXED_COMPILES: "true" SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output + ENABLE_PANDOC_CONVERSIONS: true volumes: - ./reports:/overleaf/services/clsi/reports - ./compiles:/overleaf/services/clsi/compiles diff --git a/services/clsi/docker-compose.yml b/services/clsi/docker-compose.yml index 0ab691e535..9ba11247dc 100644 --- a/services/clsi/docker-compose.yml +++ b/services/clsi/docker-compose.yml @@ -45,13 +45,14 @@ services: ALLOWED_COMPILE_GROUPS: "clsi-perf simple-latex-file" ENABLE_PDF_CACHING: "true" PDF_CACHING_ENABLE_WORKER_POOL: "true" - ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1" + ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9" TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2025.1 TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker TEXLIVE_IMAGE_USER: "tex" SANDBOXED_COMPILES: "true" SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output + ENABLE_PANDOC_CONVERSIONS: true depends_on: clsi-nginx: condition: service_started diff --git a/services/clsi/entrypoint.sh b/services/clsi/entrypoint.sh index b45899ab17..b106d11716 100755 --- a/services/clsi/entrypoint.sh +++ b/services/clsi/entrypoint.sh @@ -9,5 +9,6 @@ usermod -aG dockeronhost node mkdir -p /overleaf/services/clsi/cache && chown node:node /overleaf/services/clsi/cache mkdir -p /overleaf/services/clsi/compiles && chown node:node /overleaf/services/clsi/compiles mkdir -p /overleaf/services/clsi/output && chown node:node /overleaf/services/clsi/output +mkdir -p /overleaf/services/clsi/uploads && chown node:node /overleaf/services/clsi/uploads exec runuser -u node -- "$@" diff --git a/services/clsi/package.json b/services/clsi/package.json index d749cf925e..c73525ff6e 100644 --- a/services/clsi/package.json +++ b/services/clsi/package.json @@ -30,6 +30,7 @@ "dockerode": "^4.0.9", "express": "4.22.1", "lodash": "^4.17.21", + "multer": "2.1.1", "overleaf-editor-core": "*", "p-limit": "^3.1.0", "request": "2.88.2", @@ -41,6 +42,7 @@ "@istanbuljs/esm-loader-hook": "^0.3.0", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", + "form-data": "^4.0.5", "mocha": "^11.1.0", "mocha-junit-reporter": "^2.2.1", "mocha-multi-reporters": "^1.5.1", @@ -51,6 +53,7 @@ "sinon-chai": "^3.7.0", "timekeeper": "2.2.0", "typescript": "^5.0.4", - "vitest": "^4.0.0" + "vitest": "^4.0.0", + "yauzl": "^2.10.0" } } diff --git a/services/clsi/test/acceptance/fixtures/conversion-source.docx b/services/clsi/test/acceptance/fixtures/conversion-source.docx new file mode 100644 index 0000000000000000000000000000000000000000..c94fa6fa5404358da22761444805179e288efb67 GIT binary patch literal 8297 zcmaKR1yEeu()Hl(5;V93cXxMpw+ZeVJU~K%1P|`+?i!rnI!JI2?hZfX$<598zuQxF zYNl$|K6Tb!r@QxRC0PhaOaLq_EWpdjMGx>>AUuCJbOPEqGcvqfOQRQMAcPS=9Nl4x z9yZTuphl%0gu}LC6r!NMt!ftK6*QA5wLc;bVTcv~Hb_pj?H%OOTrF7m>g&GAScRpI z5`kO|Ll|Bf^3@*x>_zcml%iS@xgrBl-CbmcDK6R$tvYd5)y`y1iAoexG50Hr38`~V zl;c7jh+2?8BmG@b>ol=fGnlJ~>^=Sw7ve}o_|}{=^()dbXA;>DV`8hW(N?5Aj%B#y zWUr81?=tJog0l+2{No$z{5G3(SJs?Yjpi80lx1YaG@qo|G`+i8Br306*A$b~gRAX# zmcf){VPF$Vd*BzJpIHnX08sj$te`#r!_3}P$;sZqnbFkV3CQ4KYZI;_uh_+e*m_1= z^kboBF(Jc>7j8^JPGp`EwLa2AKSMsd`|kEC=U^Ub7VHhoU2D8KuL2LhT5dgrDX^3U z9g5^c=tM_K-OQ{u<;K0cx^L}Hu zFe(V&2)5H1T3WNr+(0ryaS(T4ohj{lwLhW@$focEH)2`!cF)&zwx@Og|1zaP%YReC z%7u5opU8huHZP>q>zE+6U8g1*%t}L>CDgh8ycY7r!R57PMO*gIpa;L4yNyeyoTZFX z8zZXZ_IOF#aT436x`hbEmDzAMat4Xd6fO=#k2euFoG%;XI$jQE0tNsq{No0p{=PxR z4h}E-lNvQ_*U5w$bmSA#za~Ra5~H+ghoYm2dQx+x1r zjxshUe4L&dspx~jM@-wmk)>M$iP*MGw{{S`K?%>f5h+kxLy1eAiG50y6EG0oz}5RC zVqMH|g^>JnKa$D=s&&r{^4qSV_8fJy%oXw0>}yCrm$FG2i65V|b{SFZL1?j?Z)A^D zIkYXExef$~LiD4kvb&L&kCA95Qh+h}frmfethJ3-R0xHFzBUcow zt=`j6_ZLbxXBgo~F$h#@K!`?K#&(;uVP?bx)h_r-1f9Fh;GvvPzc9J4Tl<6CdwqY; zt4EDv3-MaxDueEUNAfQg@@6AHozR|~oSWP+C41L+`t)X8(8>!xFB3f-m&rRz;|W{i zgNfLxG=9R@&;O{VW>iguGd|JZd5HE#MH@40>=4Tx&l62^rU)}r!P1aMr%R8dwgbDV zNlbV%r}{%)%mB$mtSN%Gd{6A|xCY9cA>7hntzzZ8Oe!rs96K~PiAM!3Q!z|}*(qc& z;Ydbinh7_kVUg)we7|%80opKs|NCBnv7M}KWdLKM&8ohp);L7P&nd{7_6LjVe*W`!~GPhv;TDDyoZbBB`?j5&F zddc(a7&K;6RCURf#sU_P5gP_ja0Ty-S%p<|8z|$ZlqG02wv{qcqNvS#mk>+SKhE@J zUd+`aVuVcQvbFV5+{m1bbz9HPhTcMKrnv=i#~)A?j1h=}Nv~0>wPW$;mz3<2ruC+c z_~R^kwz?kG$i%S?O%I+4U#ZDfe)_x-Ua)SkrK*f$6_F2fwlQpjY+(Q8auMJHb2*4z z(uo(3S8RpAlNElaQ(oWOWiM)xCum}pLk zpViZl#F1`>(}QjxyA8U<65yV6ODleARsquOUPY4b>ZG*Nf4>E_A!lr`kOTm?Gt^WP z4q;lp>N+s}bBKGezYtoUGi1`qbc2kpY&QhPw-*gnAyvn>z(Ie4C>*?+qDMJ= ziT(GztB(E_X0o$d?8?j=#O^)nhb;ov0VxwTF{RWRkma+7S*i(&7+|O~L%1ryKS@|x zZhxu_jI)86Yk&NK-TUqep1I%h-H6jX4DBvyEqBC^b&j@H$TaH1J-s)su<+`Rw%T5wEp%9de8@_tBNy0H z(qtP9gG$slFE|q{jpt(O4-x9*X)TCe4}bA|FI!?fl^cnXM976pXm{dMZ&Q-Hii~jU z(2T>QQKVcF>4Z6$Q+v2}K*g)78QuV=h5&ZVO<_|YS_V&7ThmGewUy`G86ZnFM0n^x zc(mQvHqhzn3=8>FP;>lRa_mI#CAS^UlYC5t1OVjG|Ebuc{3o|LyS%djI=^JLVKsfl z1tv_NJW1}dl~1*F8-K|3FtEIUvd_C%U(1IJeWB zT@*)N<45sjRO$_c9UNU~2UICh#K(j!E*e~Z(KaFMM-PflsKnKAbGzL&@9uSYjv&8(|Liw{D#g(bV=<@Z zFu2H=Pnr9Ri!FJlYFDRG!M!cyOYBETl8+|y@~I5gnemE`%$vDMTRC5pO-6F2=at|` zbo0yJHc8yb4JBeXGwvKBgz`^fF^VAW)+JlA$OXRt45sfjk;z8a>afuYs~}^+XNnV*-+ z%{^y5FJtDF6-O+EKp8cMq@Y}RM2N6=b$?Un1nTxfal#dSgz0-jWG!<9WGcfZy9zm1j}jtF>o$05>{^(lSXm&C)`EY*T)t{43e(6`-M_x;3eA|;Z4(MZ)x5Nr)3IKo*H3a|J zZCdNQ ze8A9k(}8I&{hYPyV8bR$ME0goL3d84rPuO>-F8%csvCz52TzM&;#&%Gd33B{bN5M? zn31y_g0gaj;ZlQ@9__cr5WIq{@`uU$;plD#XYO^OO+x2&$?vJV0vV)a5IcWEp1pB9S#k5Z4cH5=5kc-!inNaPo zyoF(%cAG2Q$y^ldIw?noALzjr+d;3TVk_)zisdP49G!lB3y5jW$$H8Wy-mzfkgJhW zl-?UQvyLW?ziZfCGDr~(O*7diM4VF0^v6(Z?RGmo9?bEvc>_>6?+vI$Qf4aR9prm=&;JBu9>&@ZgJ7*`98`$ZR4&17PL#IrA( z5{E6wx|p=4&<(~9(C{@DDbU+zF6P4qTuf+;d`=#j$VvaJGzT=nQ?A@pM?8e$>ddy+ z4yV@eNe2A<>RjGP`}bf%%3H4zL_%r;6~yZRvCProjuMf{iUesCG+s1tFvPYHr~(EC zI7a}$YH;H#f5$jPH-Qk>ictw7*?zGQAQ$u2Mx6dBc_P{rXUS)l$ie zp+R}>RIyiiq>l^%8d=CqDn?`HZO3@cIJlq0sz0a%Y^-eO*3k%j^+ALFoA{@z)SaHM z({ZBjS=iPe{S*qV@98}^^@&M7DCh);>G~cP-4pEf%lm{B zp-adgLVuY%{P-wkqDMD(Rn~~cgQ3#g(9`Tg*TvByf=mi*#fe4!p}S@3-Ll13B6CKt z>(}IISt@p>t|y%bdcDg0&30+pv?W3l10@T?hw*KQQh_hVy*{DS$V3qY9Dzx|5iEN; zv@BnRGq2A{o-R4cV6oh!VZ0AOu=DWy^hUE4LW0nStvSaQLm=kCBIWw8ku&xt-8wale#)iXZLio$6- z;-2gl8#El45;YIiJta0vOqB*FP7t{vGMPg`3&a@_yg8wL5c(GVnZ{|IjgN3L|2#+O z0H25j>(gv}V)b0sJUDhPLGCiB$U9<)o>8}P7v!fNM|JAngVUj7`E$ z!mD+`B}FvSx4A7>wD4=UZ@?MXq4ftIopR2FA?4J<=IGA!TmaLt_czd)FxX>6lcY+H z=^B?aoSA0ZzbsR<9j+~CjeA~JftQeo$_T-=JXV&7Z+@1mBUPK8LXL~$wfXev$-Up+WZuSQEw|buN@=Bn^Mj!X*P)}Spct=RR$Ca<;emk} znQcE1)+7Bo1bnV?p!2}#aoBHDzfogg)+E6R`mFSgcBd4s#^~|c^_c zRF9LSF_6yIFl{K+SbvVg&exkPk_XDx$Y>+IsU0dqv;6y;wJE^#fsnl4j)Mb@@^O~R z{DZd{!Ik!azFymflaC5jTT?^%;}Dl5W|)THY0 zzBX!=B?L9J^;@T99^OhQ_V@^3z z0avFftjLMEk;NPo>(N4hHGxW*=;+JpGL4^n>TRCD#&dQ?K+MH109kMAif7uCuoJw* zd;C;12vb9*XE4gJZdcGnKTg-D)Ayh|pg@t{1jpt*`FJGzio{M={VlZ7W5^NP1$B7F zsR5Mchig4E5^j9q2(Pq@Dl^${#8wCU$)O&J_>O)rQsy@X(nWx9*bBu=gaW zqL`tq8klbTzKz#GmfhY+LzzfDEW=DKJe2Y%*VW(dv+Q+~t?OpyoPmRb_sM9QTqz#5 zDRN#4ws!=F*qCvgzgpseZiH_-@o67pPaej4&j@b2E!5R>UhBD4CuvoswiUSihFUKx z@|*lhupjbVhn{El8UE)Q9h$cc^qoULn!@Kf;z8(`acXx9%k)YelFOL1)-dlLykS$G$)a6klOimb$haWggfOd)47>#+mRW4Z`Wzqi4tYl5j4f~S5x z5_;T`sP`6In%5CI-IlBn)H$CGa?4cD+h0y~)#_j&vCn;Q)IavZ&quz0IaY7&?OfE1 zO>BO7xTVpf&mJLS?~zvswY!w?JexWuo8`Ra8q+L9!pelyXVsqfVfBUxI!VjwDdgm%5_OGHvCC5)mp6kn;f-L2-PfgFW|lU%$_*nnewM z9#k8k?5$G^aScTF*w@V|nHJ#sbc5$cb5mg(2(f_&k9gf9F8*RxaD{*UeSf1N^Spo@#8o%yfxG%)hp^T7l0;}M_uCa-)jWSG@~ z-6w?naF}Bi_wcE1<+ZY~{J!iY&y}s~yMtvvw>x`5>{l{#juu9$8cIqPKN=nER`#wR z8$OUCd_ipBh zM3#H6fM{4%y%i^dL8zG4FTsZy6cwFI>^V{g?hHYA!4>Y*Jt@ppv=LwmQ9iv8;E4X% z(EA=dWUIkh8iO8uP#~*h+PG2w16XUGPSfFr1Qkf}5EfH@;9+nhRX(LLUPTh|sFLRS-uajpL+COvlzef%KJ$|@Y0BwPczpl(L9>ijty#2xp zXLncuhsA3Ra1+D|%ybBD)D@z2i@1q((^6F=g$Jic`TYuO<(XYfMmyf0rTD$YL&92{ zo-)bbNtlWkV&E;xQ40?bs_pP)F1Oh&Gg4O~yTDgXC5(~jc((i22jtQ!k({i~GmvC{ zaGs849&E+mm)Fj`^wPG(NgPT9&$Ps__oZqZV)^E%dP?n{~R^dw`AaoD3@fE7=s56 z1-OUR446P_q~k*Z0d|dVla^{`@SXTu0f6nc*wwjl@HOcl74!uLUp7!jk1_`9&)30F z9K-k@uD>rMUw_E=x~v_Br>Ro+&NETHqdAR<)M}_^bcpEFo3i)1p#ASMiAdE=9^V0 zoX`=@GZ!{GG#zGq@1P?^Gno0d*8>OX;#CG~GCQ0)J?xG%c4`*MR`?>{Q+Z{)D>9{Y zWuoj_S#iLc!^G9EDUd?qQ|Es|J~%^h(8>NR6E{HTi7?ME4*cG3>23d@qHZZtI`RfA z@op5%of6R*ShCzgw`m&YZVJ)MvRF$Vjxhi*InrpNLYQx&QPxx8?#?GEL#xZux&-*(aj{Q zJX3h46in6`7v(j*x$vEuCOM@-i9wHml1udfELI^k_rk6xInYiTLueS|%K|OjT-e=p zKRabb{1jZa`C)!2d1=DCJ#f2F7*FqGNe}5%UN&$dtP}~cy(0))Z5pth$|>s1NN9TD zwEo8mgR0c)`s0m{dSE0g4`b|$;%u6E^G8am+B?^>3Uz}j_{!=toLmwL74X0h@!wR zJEE{kQ{C=47hprbY!?_fCg87;?C)dAmyzt>-r%!~en|88o%n6Q5Z#B*u?1^>&E{S*AV zx%1+C{T0K{1pWg54;Spum45FhUb69D0gm+VmHxDb{#@es()iMN{1tQ9|6by6ZOEVa z-%G{ciB!h<4->zY(?8+ACxX8}!F$|)!~d) { + yauzl.open(outputStream.path, { lazyEntries: true }, (err, zipfile) => { + if (err) { + return reject(err) + } + zipfile.on('error', reject) + zipfile.on('end', resolve) + zipfile.readEntry() + zipfile.on('entry', entry => { + if (entry.fileName === 'main.tex') { + zipfile.openReadStream(entry, (err, readStream) => { + if (err) { + return reject(err) + } + let data = '' + readStream.on('data', chunk => { + data += chunk.toString() + }) + readStream.on('end', () => { + try { + expect(data).to.include('\\begin{document}') + expect(data).to.include( + '\\[x = \\frac{- b \\pm \\sqrt{b^{2} - 4ac}}{2a}\\]' + ) + zipfile.readEntry() + } catch (err) { + reject(err) + } + }) + }) + } else if (entry.fileName === 'media/') { + // Skip the media directory entry + zipfile.readEntry() + } else if (entry.fileName.startsWith('media/')) { + expect(entry.fileName).to.equal('media/image1.png') + zipfile.readEntry() + } else { + reject(new Error('Unexpected file in zip: ' + entry.fileName)) + } + }) + }) + }) + }) + + it('should fail when file is not a docx', async function () { + const sourcePath = Path.join( + import.meta.dirname, + '../fixtures/minimal.pdf' + ) + await expect(Client.convertDocx(sourcePath)).to.eventually.be.rejected + }) + }) +}) diff --git a/services/clsi/test/acceptance/js/helpers/Client.js b/services/clsi/test/acceptance/js/helpers/Client.js index c397954a21..b7038380c5 100644 --- a/services/clsi/test/acceptance/js/helpers/Client.js +++ b/services/clsi/test/acceptance/js/helpers/Client.js @@ -1,8 +1,14 @@ import express from 'express' -import { fetchJson, fetchNothing, fetchString } from '@overleaf/fetch-utils' +import { + fetchJson, + fetchNothing, + fetchStream, + fetchString, +} from '@overleaf/fetch-utils' import fs from 'node:fs' import fsPromises from 'node:fs/promises' import Settings from '@overleaf/settings' +import FormData from 'form-data' const host = Settings.apis.clsi.url @@ -24,6 +30,15 @@ function compile(projectId, data) { }) } +async function convertDocx(path) { + const formData = new FormData() + formData.append('qqfile', fs.createReadStream(path)) + return await fetchStream(`${host}/convert/docx-to-latex`, { + method: 'POST', + body: formData, + }) +} + async function stopCompile(projectId) { return await fetchNothing(`${host}/project/${projectId}/compile/stop`, { method: 'POST', @@ -187,6 +202,7 @@ function smokeTest() { export default { randomId, compile, + convertDocx, stopCompile, clearCache, getOutputFile, diff --git a/services/clsi/test/acceptance/js/helpers/ClsiApp.js b/services/clsi/test/acceptance/js/helpers/ClsiApp.js index 1f0725c0c6..332d59fc52 100644 --- a/services/clsi/test/acceptance/js/helpers/ClsiApp.js +++ b/services/clsi/test/acceptance/js/helpers/ClsiApp.js @@ -1,5 +1,6 @@ import app from '../../../../app.js' import Settings from '@overleaf/settings' +import testLogRecorder from '@overleaf/logger/test-log-recorder.js' function startApp() { return new Promise((resolve, reject) => { @@ -26,6 +27,10 @@ async function ensureRunning() { await appStartedPromise } +if (process.env.CI === 'true') { + beforeEach('record error logs in junit', testLogRecorder) +} + export default { ensureRunning, } diff --git a/services/clsi/test/unit/js/ConversionController.test.js b/services/clsi/test/unit/js/ConversionController.test.js new file mode 100644 index 0000000000..e9c5e77c9a --- /dev/null +++ b/services/clsi/test/unit/js/ConversionController.test.js @@ -0,0 +1,158 @@ +import sinon from 'sinon' +import { vi, describe, it, beforeEach, expect } from 'vitest' +import Path from 'node:path' +import { PassThrough } from 'node:stream' + +const MODULE_PATH = Path.join( + import.meta.dirname, + '../../../app/js/ConversionController' +) + +describe('ConversionController', function () { + beforeEach(async function (ctx) { + ctx.conversionDir = '/path/to/conversion/result' + ctx.zipPath = '/path/to/conversion/result/output.zip' + ctx.zipStat = { size: 1234 } + ctx.Settings = { + enablePandocConversions: true, + } + ctx.ConversionManager = { + promises: { + convertDocxToLaTeXWithLock: sinon.stub().resolves(ctx.zipPath), + }, + } + + ctx.fs = { + stat: sinon.stub().resolves(ctx.zipStat), + unlink: sinon.stub().resolves(), + rm: sinon.stub().resolves(), + } + + ctx.readStream = new PassThrough() + ctx.fsSync = { + createReadStream: sinon.stub().returns(ctx.readStream), + } + ctx.pipeline = sinon.stub().resolves() + + vi.doMock('node:fs/promises', () => ({ + default: ctx.fs, + })) + + vi.doMock('node:fs', () => ({ + default: ctx.fsSync, + })) + + vi.doMock('node:stream/promises', () => ({ + pipeline: ctx.pipeline, + })) + + vi.doMock('@overleaf/settings', () => ({ + default: ctx.Settings, + })) + + vi.doMock('../../../app/js/ConversionManager', () => ({ + default: ctx.ConversionManager, + })) + + ctx.res = new PassThrough() + ctx.res.attachment = sinon.stub() + ctx.res.setHeader = sinon.stub() + + ctx.ConversionController = (await import(MODULE_PATH)).default + }) + + describe('convertDocxToLaTeX', function () { + describe('when conversions are disabled', function () { + beforeEach(async function (ctx) { + ctx.Settings.enablePandocConversions = false + ctx.req = { + file: { path: '/path/to/uploaded/file.docx' }, + } + ctx.res.sendStatus = sinon.stub() + + await ctx.ConversionController.convertDocxToLaTeX(ctx.req, ctx.res) + }) + + it('should remove the uploaded file', function (ctx) { + sinon.assert.calledWith(ctx.fs.unlink, ctx.req.file.path) + }) + + it('should return 404', function (ctx) { + sinon.assert.calledWith(ctx.res.sendStatus, 404) + }) + + it('should not call the conversion manager', function (ctx) { + sinon.assert.notCalled( + ctx.ConversionManager.promises.convertDocxToLaTeXWithLock + ) + }) + }) + + describe('successfully', function () { + beforeEach(async function (ctx) { + ctx.req = { + file: { path: '/path/to/uploaded/file.docx' }, + } + + await ctx.ConversionController.convertDocxToLaTeX(ctx.req, ctx.res) + }) + + it('should call the conversion manager with the uploaded file path', function (ctx) { + sinon.assert.calledWith( + ctx.ConversionManager.promises.convertDocxToLaTeXWithLock, + sinon.match( + /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/ + ), + ctx.req.file.path + ) + }) + + it('should look up the generated zip file size', function (ctx) { + sinon.assert.calledWith(ctx.fs.stat, ctx.zipPath) + }) + + it('should set the response headers for a zip file download', function (ctx) { + sinon.assert.calledWith( + ctx.res.setHeader, + 'Content-Length', + ctx.zipStat.size + ) + sinon.assert.calledWith(ctx.res.attachment, 'conversion.zip') + sinon.assert.calledWith( + ctx.res.setHeader, + 'X-Content-Type-Options', + 'nosniff' + ) + }) + + it('should stream the generated zip file to the response', function (ctx) { + sinon.assert.calledWith(ctx.fsSync.createReadStream, ctx.zipPath) + sinon.assert.calledWith(ctx.pipeline, ctx.readStream, ctx.res) + }) + + it('should clean up the generated zip file', function (ctx) { + sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir) + }) + }) + + describe('unsuccessfully', function () { + describe('on streaming error', function () { + it('should propagate the error and still clean up', async function (ctx) { + ctx.pipeline.rejects(new Error('mock stream error')) + + const res = new PassThrough() + res.attachment = sinon.stub() + res.setHeader = sinon.stub() + + const req = { file: { path: '/path/to/uploaded/file.docx' } } + + await expect( + ctx.ConversionController.convertDocxToLaTeX(req, res) + ).to.be.rejectedWith('mock stream error') + + sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir) + }) + }) + }) + }) +}) diff --git a/services/clsi/test/unit/js/ConversionManager.test.js b/services/clsi/test/unit/js/ConversionManager.test.js new file mode 100644 index 0000000000..b8288eba5e --- /dev/null +++ b/services/clsi/test/unit/js/ConversionManager.test.js @@ -0,0 +1,253 @@ +import Path from 'node:path' +import sinon from 'sinon' +import { vi, describe, beforeEach, afterEach, it, expect } from 'vitest' +const MODULE_PATH = Path.join( + import.meta.dirname, + '../../../app/js/ConversionManager' +) + +describe('ConversionManager', function () { + beforeEach(async function (ctx) { + ctx.CommandRunner = { + promises: { + run: sinon.stub().resolves({ stdout: '', stderr: '', exitCode: 0 }), + }, + } + + ctx.lock = { + release: sinon.stub(), + } + + ctx.LockManager = { + acquire: sinon.stub().returns(ctx.lock), + } + + ctx.Settings = { + pandocImage: 'mock-pandoc-image', + conversionTimeoutSeconds: 60, + path: { compilesDir: '/compiles' }, + } + + ctx.fs = { + mkdir: sinon.stub().resolves(), + copyFile: sinon.stub().resolves(), + rm: sinon.stub().resolves(), + unlink: sinon.stub().resolves(), + } + + ctx.conversionId = 'test-conversion-id' + ctx.inputPath = '/path/to/input.docx' + ctx.conversionDir = '/compiles/test-conversion-id' + ctx.outputPath = '/compiles/test-conversion-id/output-uuid.zip' + + ctx.uuidStub = sinon + .stub(globalThis.crypto, 'randomUUID') + .returns('output-uuid') + + vi.doMock('../../../app/js/LockManager', () => ({ + default: ctx.LockManager, + })) + + vi.doMock('@overleaf/settings', () => ({ + default: ctx.Settings, + })) + + vi.doMock('../../../app/js/CommandRunner', () => ({ + default: ctx.CommandRunner, + })) + + vi.doMock('node:fs/promises', () => ({ default: ctx.fs })) + + ctx.ConversionManager = (await import(MODULE_PATH)).default + }) + + afterEach(function (ctx) { + ctx.uuidStub.restore() + }) + + describe('convertDocxToLaTeXWithLock', function () { + describe('general behavior', function () { + beforeEach(async function (ctx) { + ctx.result = + await ctx.ConversionManager.promises.convertDocxToLaTeXWithLock( + ctx.conversionId, + ctx.inputPath + ) + }) + + it('should acquire a lock', async function (ctx) { + sinon.assert.calledWith(ctx.LockManager.acquire, ctx.conversionDir) + }) + + it('should copy the input file to the conversion directory', async function (ctx) { + sinon.assert.calledWith(ctx.fs.mkdir, ctx.conversionDir, { + recursive: true, + }) + sinon.assert.calledWith( + ctx.fs.copyFile, + ctx.inputPath, + Path.join(ctx.conversionDir, 'input.docx') + ) + }) + + it('should convert conversion timeout to milliseconds', async function (ctx) { + expect(ctx.CommandRunner.promises.run.firstCall.args[4]).toBe(60_000) + expect(ctx.CommandRunner.promises.run.secondCall.args[4]).toBe(60_000) + }) + + it('should run pandoc followed by zip in the conversion directory', function (ctx) { + expect(ctx.CommandRunner.promises.run.callCount).toBe(2) + expect(ctx.CommandRunner.promises.run.firstCall.args).toEqual([ + ctx.conversionId, + [ + 'pandoc', + 'input.docx', + '--output', + 'main.tex', + '--extract-media=.', + '--from', + 'docx+citations', + '--to', + 'latex', + '--citeproc', + '--standalone', + ], + ctx.conversionDir, + ctx.Settings.pandocImage, + 60_000, + {}, + 'conversions', + ]) + expect(ctx.CommandRunner.promises.run.secondCall.args).toEqual([ + ctx.conversionId, + ['zip', '-r', 'output-uuid.zip', '.'], + ctx.conversionDir, + ctx.Settings.pandocImage, + 60_000, + {}, + 'conversions', + ]) + }) + }) + + describe('successful conversion', function () { + beforeEach(async function (ctx) { + ctx.CommandRunner.promises.run.resolves({ + stdout: 'mock-stdout', + stderr: 'mock-stderr', + exitCode: 0, + }) + + ctx.result = + await ctx.ConversionManager.promises.convertDocxToLaTeXWithLock( + ctx.conversionId, + ctx.inputPath + ) + }) + + it('should remove the source document after conversion', async function (ctx) { + sinon.assert.calledWith( + ctx.fs.unlink, + Path.join(ctx.conversionDir, 'input.docx') + ) + }) + + it('should return the conversion directory', function (ctx) { + expect(ctx.result).toBe(ctx.outputPath) + }) + + it('should release the lock', function (ctx) { + sinon.assert.called(ctx.lock.release) + }) + }) + + describe('unsuccessful conversion (exitcode)', function () { + beforeEach(async function (ctx) { + ctx.CommandRunner.promises.run.resolves({ + stdout: 'mock-stdout', + stderr: 'mock-stderr', + exitCode: 63, + }) + + await expect( + ctx.ConversionManager.promises.convertDocxToLaTeXWithLock( + ctx.conversionId, + ctx.inputPath + ) + ).to.be.rejectedWith('pandoc conversion failed') + }) + + it('should remove the entire conversion directory', async function (ctx) { + sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir, { + force: true, + recursive: true, + }) + }) + + it('should release the lock', function (ctx) { + sinon.assert.called(ctx.lock.release) + }) + }) + + describe('unsuccessful compression (exitcode)', function () { + beforeEach(async function (ctx) { + ctx.CommandRunner.promises.run + .onFirstCall() + .resolves({ + stdout: 'mock-pandoc-stdout', + stderr: 'mock-pandoc-stderr', + exitCode: 0, + }) + .onSecondCall() + .resolves({ + stdout: 'mock-zip-stdout', + stderr: 'mock-zip-stderr', + exitCode: 12, + }) + + await expect( + ctx.ConversionManager.promises.convertDocxToLaTeXWithLock( + ctx.conversionId, + ctx.inputPath + ) + ).to.be.rejectedWith('pandoc conversion failed') + }) + + it('should remove the entire conversion directory', async function (ctx) { + sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir, { + force: true, + recursive: true, + }) + }) + + it('should release the lock', function (ctx) { + sinon.assert.called(ctx.lock.release) + }) + }) + + describe('unsuccessful conversion (throws)', function () { + beforeEach(async function (ctx) { + ctx.CommandRunner.promises.run.rejects( + new Error('mock conversion error') + ) + await expect( + ctx.ConversionManager.promises.convertDocxToLaTeXWithLock( + ctx.conversionId, + ctx.inputPath + ) + ).to.be.rejectedWith('pandoc conversion failed') + }) + + it('should remove the entire conversion directory', async function (ctx) { + sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir, { + force: true, + recursive: true, + }) + }) + + it('should release the lock', function (ctx) { + sinon.assert.called(ctx.lock.release) + }) + }) + }) +}) diff --git a/services/clsi/test/unit/js/DockerRunner.test.js b/services/clsi/test/unit/js/DockerRunner.test.js index b591cf93a3..60e13db919 100644 --- a/services/clsi/test/unit/js/DockerRunner.test.js +++ b/services/clsi/test/unit/js/DockerRunner.test.js @@ -123,7 +123,7 @@ describe('DockerRunner', () => { await new Promise((resolve, reject) => { ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) return ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -168,7 +168,7 @@ describe('DockerRunner', () => { ctx.directory = '/var/lib/overleaf/data/compiles/xyz' ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) return ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -199,7 +199,7 @@ describe('DockerRunner', () => { ctx.directory = '/var/lib/overleaf/data/output/xyz/generated-files/id' ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -230,7 +230,7 @@ describe('DockerRunner', () => { ctx.directory = '/var/lib/overleaf/data/compile/xyz' ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -261,7 +261,7 @@ describe('DockerRunner', () => { ctx.directory = '/var/lib/overleaf/data/compile/xyz' ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -290,7 +290,7 @@ describe('DockerRunner', () => { describe('when the run throws an error', () => { beforeEach(ctx => { let firstTime = true - ctx.output = 'mock-output' + ctx.output = { stdout: 'mock-output' } ctx.DockerRunner._runAndWaitForContainer = ( options, volumes, @@ -342,7 +342,7 @@ describe('DockerRunner', () => { beforeEach(ctx => { ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -372,7 +372,7 @@ describe('DockerRunner', () => { ctx.Settings.texliveImageNameOveride = 'overrideimage.com/something' ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -399,7 +399,7 @@ describe('DockerRunner', () => { ] ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) }) describe('with a valid image', () => { @@ -477,7 +477,7 @@ describe('DockerRunner', () => { } ctx.DockerRunner._runAndWaitForContainer = sinon .stub() - .callsArgWith(3, null, (ctx.output = 'mock-output')) + .callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' })) ctx.DockerRunner.run( ctx.project_id, ctx.command, @@ -520,7 +520,7 @@ describe('DockerRunner', () => { attachStreamHandler, callback ) => { - attachStreamHandler(null, (ctx.output = 'mock-output')) + attachStreamHandler(null, (ctx.output = { stdout: 'mock-output' })) callback(null, (ctx.containerId = 'container-id')) } sinon.spy(ctx.DockerRunner, 'startContainer') diff --git a/services/web/app/src/Features/Compile/CompileManager.mjs b/services/web/app/src/Features/Compile/CompileManager.mjs index 4d5017b04a..b10f05945c 100644 --- a/services/web/app/src/Features/Compile/CompileManager.mjs +++ b/services/web/app/src/Features/Compile/CompileManager.mjs @@ -116,7 +116,15 @@ async function _getProjectCompileLimits(project) { if (!project) { throw new Error('project not found') } - const owner = await UserGetter.promises.getUser(project.owner_ref, { + const limits = await _getUserCompileLimits(project.owner_ref) + if (project.fromV1TemplateId === Settings.overrideCompileTimeForTemplate) { + limits.timeout = Math.max(limits.timeout, 20) + } + return limits +} + +async function _getUserCompileLimits(userId) { + const owner = await UserGetter.promises.getUser(userId, { _id: 1, alphaProgram: 1, analyticsId: 1, @@ -141,9 +149,7 @@ async function _getProjectCompileLimits(project) { compileBackendClass: compileGroup === 'standard' ? 'c3d' : 'c4d', ownerAnalyticsId: analyticsId, } - if (project.fromV1TemplateId === Settings.overrideCompileTimeForTemplate) { - limits.timeout = Math.max(limits.timeout, 20) - } + return limits } @@ -208,6 +214,7 @@ export default CompileManager = { stopCompile, wordCount, syncTeX, + _getUserCompileLimits, }, compile: callbackifyMultiResult(instrumentedCompile, [ 'status', diff --git a/services/web/app/src/Features/Project/ProjectListController.mjs b/services/web/app/src/Features/Project/ProjectListController.mjs index 0080f2fd69..750d85eed9 100644 --- a/services/web/app/src/Features/Project/ProjectListController.mjs +++ b/services/web/app/src/Features/Project/ProjectListController.mjs @@ -527,10 +527,16 @@ async function projectListPage(req, res, next) { const hasAiAssist = Features.hasFeature('saas') && (await _userHasAIAssist(user)) - await SplitTestHandler.promises.getAssignment( - req, - res, - 'themed-project-dashboard' + const splitTests = [ + // Split tests that will be made available to the frontend + 'themed-project-dashboard', + 'import-docx', + ].filter(Boolean) + + await Promise.all( + splitTests.map(splitTestName => + SplitTestHandler.promises.getAssignment(req, res, splitTestName) + ) ) const userSettings = await UserSettingsHelper.buildUserSettings( diff --git a/services/web/app/src/Features/Uploads/DocumentConversionManager.mjs b/services/web/app/src/Features/Uploads/DocumentConversionManager.mjs new file mode 100644 index 0000000000..97021f4429 --- /dev/null +++ b/services/web/app/src/Features/Uploads/DocumentConversionManager.mjs @@ -0,0 +1,84 @@ +import Settings from '@overleaf/settings' +import CompileManager from '../Compile/CompileManager.mjs' +import fs from 'node:fs' +import fsPromises from 'node:fs/promises' +import logger from '@overleaf/logger' +import Path from 'node:path' +import { fetchStreamWithResponse } from '@overleaf/fetch-utils' +import { pipeline } from 'node:stream/promises' +import OError from '@overleaf/o-error' +import FormData from 'form-data' +import { FileTooLargeError } from '../Errors/Errors.js' + +async function convertDocxToLaTeXZipArchive(path, userId) { + const clsiUrl = new URL(Settings.apis.clsi.url) + const limits = await CompileManager.promises._getUserCompileLimits(userId) + + clsiUrl.pathname = '/convert/docx-to-latex' + clsiUrl.searchParams.set('compileBackendClass', limits.compileBackendClass) + clsiUrl.searchParams.set('compileGroup', limits.compileGroup) + + const formData = new FormData() + formData.append('qqfile', fs.createReadStream(path)) + + logger.debug( + { clsiUrl: clsiUrl.toString() }, + 'sending docx to CLSI for conversion' + ) + + const outputFileName = crypto.randomUUID() + '.zip' + const outputPath = Path.join(Settings.path.dumpFolder, outputFileName) + let outputStream + const abortController = new AbortController() + + try { + const { stream, response } = await fetchStreamWithResponse(clsiUrl, { + method: 'POST', + body: formData, + signal: abortController.signal, + }) + + const contentLengthHeader = response.headers.get('Content-Length') + if (contentLengthHeader == null) { + logger.warn( + 'CLSI did not provide Content-Length header for converted document' + ) + throw new OError('CLSI response missing Content-Length header') + } + const contentLength = parseInt(contentLengthHeader, 10) + if (contentLength > Settings.maxUploadSize) { + abortController.abort() + stream.destroy() + throw new FileTooLargeError({ + message: 'converted document archive too large', + info: { + size: contentLength, + }, + }) + } + + outputStream = fs.createWriteStream(outputPath) + + await pipeline(stream, outputStream) + logger.debug({ outputPath }, 'received converted file from CLSI') + } catch (error) { + logger.error({ err: error }, 'error during document conversion') + outputStream?.destroy() + // Make sure to clean up the output file if conversion didn't work + await fsPromises.unlink(outputPath).catch(() => {}) + + if (error instanceof FileTooLargeError) { + throw error + } + + throw new OError('document conversion failed').withCause(error) + } + + return outputPath +} + +export default { + promises: { + convertDocxToLaTeXZipArchive, + }, +} diff --git a/services/web/app/src/Features/Uploads/ProjectUploadController.mjs b/services/web/app/src/Features/Uploads/ProjectUploadController.mjs index e360aaf768..50032f6601 100644 --- a/services/web/app/src/Features/Uploads/ProjectUploadController.mjs +++ b/services/web/app/src/Features/Uploads/ProjectUploadController.mjs @@ -1,6 +1,7 @@ import logger from '@overleaf/logger' import metrics from '@overleaf/metrics' import fs from 'node:fs' +import fsPromises from 'node:fs/promises' import Path from 'node:path' import FileSystemImportManager from './FileSystemImportManager.mjs' import ProjectUploadManager from './ProjectUploadManager.mjs' @@ -12,7 +13,8 @@ import { InvalidZipFileError } from './ArchiveErrors.mjs' import multer from 'multer' import lodash from 'lodash' import { expressify } from '@overleaf/promise-utils' -import { DuplicateNameError } from '../Errors/Errors.js' +import { DuplicateNameError, FileTooLargeError } from '../Errors/Errors.js' +import DocumentConversionManager from './DocumentConversionManager.mjs' const defaultsDeep = lodash.defaultsDeep @@ -166,6 +168,53 @@ async function uploadFile(req, res, next) { ) } +/** + * @param {any} req + * @param {any} res + * @param {any} next + */ +async function importDocx(req, res, next) { + const userId = SessionManager.getLoggedInUserId(req.session) + logger.debug({ path: req.file?.path, userId }, 'importing docx file') + const { path } = req.file + const name = Path.basename(req.body.name, '.docx') + try { + const archivePath = + await DocumentConversionManager.promises.convertDocxToLaTeXZipArchive( + path, + userId + ) + try { + const project = + await ProjectUploadManager.promises.createProjectFromZipArchive( + userId, + name, + archivePath + ) + res.json({ success: true, project_id: project._id }) + } finally { + await fsPromises.unlink(archivePath).catch(() => {}) + } + } catch (error) { + logger.error({ error }, 'error importing docx file') + if ( + error instanceof FileTooLargeError || + error?.name === 'FileTooLargeError' + ) { + return res.status(422).json({ + success: false, + error: 'file_too_large', + }) + } + res.status(500).json({ + success: false, + error: req.i18n.translate('upload_failed'), + }) + } finally { + await fsPromises.unlink(path).catch(() => {}) + } +} + /** * @param {any} req * @param {any} res @@ -202,4 +251,5 @@ export default { uploadProject, uploadFile: expressify(uploadFile), multerMiddleware, + importDocx: expressify(importDocx), } diff --git a/services/web/app/src/Features/Uploads/UploadsRouter.mjs b/services/web/app/src/Features/Uploads/UploadsRouter.mjs index d00c851dad..4727f434f1 100644 --- a/services/web/app/src/Features/Uploads/UploadsRouter.mjs +++ b/services/web/app/src/Features/Uploads/UploadsRouter.mjs @@ -26,6 +26,14 @@ export default { ProjectUploadController.uploadProject ) + webRouter.post( + '/project/new/import-docx', + AuthenticationController.requireLogin(), + RateLimiterMiddleware.rateLimit(rateLimiters.projectUpload), + ProjectUploadController.multerMiddleware, + ProjectUploadController.importDocx + ) + const fileUploadEndpoint = '/Project/:Project_id/upload' const fileUploadRateLimit = RateLimiterMiddleware.rateLimit( rateLimiters.fileUpload, diff --git a/services/web/frontend/extracted-translations.json b/services/web/frontend/extracted-translations.json index 0a488d6dfd..2c7294c466 100644 --- a/services/web/frontend/extracted-translations.json +++ b/services/web/frontend/extracted-translations.json @@ -885,6 +885,7 @@ "import_from_github": "", "import_idp_metadata": "", "import_to_sharelatex": "", + "import_word_document": "", "imported_from_another_project_at_date": "", "imported_from_external_provider_at_date": "", "imported_from_mendeley_at_date": "", diff --git a/services/web/frontend/js/features/project-list/components/new-project-button.tsx b/services/web/frontend/js/features/project-list/components/new-project-button.tsx index 874daf175b..636649017b 100644 --- a/services/web/frontend/js/features/project-list/components/new-project-button.tsx +++ b/services/web/frontend/js/features/project-list/components/new-project-button.tsx @@ -19,6 +19,7 @@ import { } from '@/shared/components/dropdown/dropdown-menu' import { useSendProjectListMB } from '@/features/project-list/components/project-list-events' import type { PortalTemplate } from '../../../../../types/portal-template' +import { useFeatureFlag } from '@/shared/context/split-test-context' type SendTrackingEvent = { dropdownMenu: string @@ -57,6 +58,7 @@ function NewProjectButton({ const portalTemplates = getMeta('ol-portalTemplates') || [] const { show: enableAddAffiliationWidget } = useAddAffiliation() const sendProjectListMB = useSendProjectListMB() + const docxImportEnabled = useFeatureFlag('import-docx') const sendTrackingEvent = useCallback( ({ dropdownMenu, @@ -208,6 +210,20 @@ function NewProjectButton({ {t('upload_project')} + {docxImportEnabled && ( +
  • + + handleModalMenuClick(e, { + modalVariant: 'import_docx', + dropdownMenuEvent: 'import-docx', + }) + } + > + {t('import_word_document')} + +
  • + )}
  • {ImportProjectFromGithubMenu && ( void + openProject: (projectId: string) => void +} + +function ImportDocxModal({ onHide, openProject }: ImportDocxModalProps) { + const { t } = useTranslation() + const { maxUploadSize, projectUploadTimeout } = getMeta('ol-ExposedSettings') + const [ableToUpload, setAbleToUpload] = useState(false) + + const [uppy] = useState(() => { + return new Uppy({ + allowMultipleUploadBatches: false, + restrictions: { + maxNumberOfFiles: 1, + maxFileSize: maxUploadSize, + allowedFileTypes: ['.docx'], + }, + }) + .use(XHRUpload, { + endpoint: '/project/new/import-docx', + headers: { + 'X-CSRF-TOKEN': getMeta('ol-csrfToken'), + }, + limit: 1, + fieldName: 'qqfile', // "qqfile" is needed for our express multer middleware + timeout: projectUploadTimeout, + }) + .on('file-added', () => { + // this function can be invoked multiple times depending on maxNumberOfFiles + // in this case, since have maxNumberOfFiles = 1, this function will be invoked + // once if the correct file were added + // if user dragged more files than the maxNumberOfFiles allow, + // the rest of the files will appear on the 'restriction-failed' event callback + setAbleToUpload(true) + }) + .on('upload-error', () => { + // refresh state so they can try uploading a new zip + setAbleToUpload(false) + }) + .on('upload-success', async (file, response) => { + const { project_id: projectId }: ImportResponse = response.body + + if (projectId) { + openProject(projectId) + } + }) + .on('restriction-failed', () => { + // 'restriction-failed event will be invoked when one of the "restrictions" above + // is not complied: + // 1. maxNumberOfFiles: if the uploaded files is more than 1, the rest of the files will appear here + // for example, user drop 5 files to the uploader, this function will be invoked 4 times and the `file-added` event + // will be invoked once + // 2. maxFileSize: if the uploaded file has size > maxFileSize, it will appear here + // 3. allowedFileTypes: if the type is not .zip, it will also appear here + + // reset state so they can try uploading a different file, etc + setAbleToUpload(false) + }) + }) + + useEffect(() => { + if (ableToUpload) { + uppy.upload() + } + }, [ableToUpload, uppy]) + + return ( + + + {t('import_word_document')} + + + + + + + {t('cancel')} + + + + ) +} + +export default ImportDocxModal diff --git a/services/web/frontend/js/features/project-list/components/new-project-button/new-project-button-modal.tsx b/services/web/frontend/js/features/project-list/components/new-project-button/new-project-button-modal.tsx index 0d290beeba..35d46d6646 100644 --- a/services/web/frontend/js/features/project-list/components/new-project-button/new-project-button-modal.tsx +++ b/services/web/frontend/js/features/project-list/components/new-project-button/new-project-button-modal.tsx @@ -7,12 +7,14 @@ import { FullSizeLoadingSpinner } from '@/shared/components/loading-spinner' import { useLocation } from '@/shared/hooks/use-location' const UploadProjectModal = lazy(() => import('./upload-project-modal')) +const ImportDocxModal = lazy(() => import('./import-docx-modal')) export type NewProjectButtonModalVariant = | 'blank_project' | 'example_project' | 'upload_project' | 'import_from_github' + | 'import_docx' type NewProjectButtonModalProps = { modal: Nullable @@ -47,6 +49,12 @@ function NewProjectButtonModal({ modal, onHide }: NewProjectButtonModalProps) { ) + case 'import_docx': + return ( + }> + + + ) case 'import_from_github': return default: diff --git a/services/web/frontend/js/features/project-list/components/welcome-message-new/welcome-message-create-new-project-dropdown.tsx b/services/web/frontend/js/features/project-list/components/welcome-message-new/welcome-message-create-new-project-dropdown.tsx index 2fb4a86306..6059671f86 100644 --- a/services/web/frontend/js/features/project-list/components/welcome-message-new/welcome-message-create-new-project-dropdown.tsx +++ b/services/web/frontend/js/features/project-list/components/welcome-message-new/welcome-message-create-new-project-dropdown.tsx @@ -12,6 +12,7 @@ import { DropdownToggle, } from '@/shared/components/dropdown/dropdown-menu' import createNewProjectImage from '../../images/create-a-new-project.svg' +import { useFeatureFlag } from '@/shared/context/split-test-context' const CustomDropdownToggle = forwardRef< HTMLButtonElement, @@ -59,6 +60,7 @@ function WelcomeMessageCreateNewProjectDropdown({ }: WelcomeMessageCreateNewProjectDropdownProps) { const { t } = useTranslation() const portalTemplates = getMeta('ol-portalTemplates') || [] + const docxImportEnabled = useFeatureFlag('import-docx') const { isOverleaf } = getMeta('ol-ExposedSettings') @@ -134,6 +136,19 @@ function WelcomeMessageCreateNewProjectDropdown({ {t('upload_project')}
  • + {docxImportEnabled && ( +
  • + + handleDropdownItemClick(e, 'import_docx', 'import-docx') + } + tabIndex={-1} + > + {t('import_word_document')} + +
  • + )} {isOverleaf && (
  • Another project/__sourceEntityPathHTML__, at __formattedDate__ __relativeDate__", "imported_from_external_provider_at_date": "Imported from <0>__shortenedUrlHTML__ at __formattedDate__ __relativeDate__", diff --git a/services/web/package.json b/services/web/package.json index 7905388d34..0e33b0f8cf 100644 --- a/services/web/package.json +++ b/services/web/package.json @@ -135,6 +135,7 @@ "express-session": "^1.17.1", "file-type": "^21.3.4", "focus-trap-react": "^11.0.4", + "form-data": "^4.0.5", "globby": "^5.0.0", "helmet": "^6.0.1", "https-proxy-agent": "^7.0.6", diff --git a/services/web/test/frontend/features/project-list/components/welcome-message.test.tsx b/services/web/test/frontend/features/project-list/components/welcome-message.test.tsx index 56c3693695..9d190502b2 100644 --- a/services/web/test/frontend/features/project-list/components/welcome-message.test.tsx +++ b/services/web/test/frontend/features/project-list/components/welcome-message.test.tsx @@ -1,7 +1,16 @@ import { fireEvent, render, screen } from '@testing-library/react' -import WelcomeMessage from '../../../../../frontend/js/features/project-list/components/welcome-message' +import WelcomeMessageComponent from '../../../../../frontend/js/features/project-list/components/welcome-message' import { expect } from 'chai' import getMeta from '@/utils/meta' +import { SplitTestProvider } from '@/shared/context/split-test-context' + +const WelcomeMessage = () => { + return ( + + + + ) +} describe('', function () { beforeEach(function () { diff --git a/services/web/test/unit/src/Uploads/DocumentConversionManager.test.mjs b/services/web/test/unit/src/Uploads/DocumentConversionManager.test.mjs new file mode 100644 index 0000000000..8c607f82d7 --- /dev/null +++ b/services/web/test/unit/src/Uploads/DocumentConversionManager.test.mjs @@ -0,0 +1,257 @@ +import { describe, expect, vi, beforeEach } from 'vitest' +import sinon from 'sinon' +import FormData from 'form-data' +import { FileTooLargeError } from '../../../../app/src/Features/Errors/Errors.js' + +const MODULE_PATH = + '../../../../app/src/Features/Uploads/DocumentConversionManager.mjs' + +describe('DocumentConversionManager', function () { + beforeEach(async function (ctx) { + ctx.fs = { + createReadStream: sinon.stub().returns('mocked-read-stream'), + createWriteStream: sinon.stub().returns('mocked-write-stream'), + } + + ctx.fsPromises = { + unlink: sinon.stub().resolves(), + } + + ctx.fetchUtils = { + fetchStreamWithResponse: sinon.stub().resolves(), + } + + ctx.nodeStream = { + pipeline: sinon.stub().resolves(), + } + + ctx.CompileManager = { + promises: { + _getUserCompileLimits: sinon.stub().resolves({ + compileBackendClass: 'test-backend-class', + compileGroup: 'test-compile-group', + }), + }, + } + + ctx.Settings = { + maxUploadSize: 100, + path: { + dumpFolder: '/path/to/dump/folder', + }, + apis: { + clsi: { + url: 'http://mock-clsi-url', + }, + }, + } + + vi.doMock('node:fs', () => ({ + default: ctx.fs, + })) + + vi.doMock('node:fs/promises', () => ({ + default: ctx.fsPromises, + })) + + vi.doMock('@overleaf/fetch-utils', () => ({ + fetchStreamWithResponse: ctx.fetchUtils.fetchStreamWithResponse, + })) + + vi.doMock('node:stream/promises', () => ({ + pipeline: ctx.nodeStream.pipeline, + })) + + vi.doMock('@overleaf/settings', () => ({ + default: ctx.Settings, + })) + + vi.doMock( + '../../../../app/src/Features/Compile/CompileManager.mjs', + () => ({ + default: ctx.CompileManager, + }) + ) + + ctx.DocumentConversionManager = (await import(MODULE_PATH)).default + }) + + describe('convertDocxToLaTeXZipArchive', function () { + describe('successfully', function () { + beforeEach(async function (ctx) { + ctx.path = '/path/to/input.docx' + ctx.userId = 'test-user-id' + ctx.outputPath = '/path/to/output.zip' + ctx.response = { + headers: { + get: sinon.stub().returns(null), + }, + } + ctx.response.headers.get.withArgs('Content-Length').returns('50') + + ctx.fetchUtils.fetchStreamWithResponse.resolves({ + stream: 'mocked-fetch-stream', + response: ctx.response, + }) + + ctx.result = + await ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive( + ctx.path, + ctx.userId + ) + }) + + it('should call fetchStreamWithResponse with the correct URL and form data', function (ctx) { + const expectedUrl = new URL(ctx.Settings.apis.clsi.url) + expectedUrl.pathname = '/convert/docx-to-latex' + expectedUrl.searchParams.set( + 'compileBackendClass', + 'test-backend-class' + ) + expectedUrl.searchParams.set('compileGroup', 'test-compile-group') + + sinon.assert.calledWith( + ctx.fetchUtils.fetchStreamWithResponse, + sinon.match(url => url.toString() === expectedUrl.toString()), + { + method: 'POST', + body: sinon.match.instanceOf(FormData), + signal: sinon.match.instanceOf(AbortSignal), + } + ) + }) + + it('should pipe result into the output file', function (ctx) { + sinon.assert.calledWith( + ctx.nodeStream.pipeline, + 'mocked-fetch-stream', + 'mocked-write-stream' + ) + }) + + it('should return a path to the output file', function (ctx) { + expect(ctx.result).to.match( + /\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/ + ) + }) + }) + + describe('when an error occurs during conversion', function () { + beforeEach(async function (ctx) { + ctx.path = '/path/to/input.docx' + ctx.userId = 'test-user-id' + + ctx.fetchUtils.fetchStreamWithResponse.rejects( + new Error('Conversion failed') + ) + + await expect( + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive( + ctx.path, + ctx.userId + ) + ).to.be.rejectedWith('document conversion failed') + }) + + it('should attempt to clean up the output file', function (ctx) { + sinon.assert.calledWith( + ctx.fsPromises.unlink, + sinon.match( + /\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/ + ) + ) + }) + }) + + describe('when the converted archive is too large', function () { + beforeEach(async function (ctx) { + ctx.path = '/path/to/input.docx' + ctx.userId = 'test-user-id' + ctx.stream = { + destroy: sinon.stub(), + } + ctx.response = { + headers: { + get: sinon.stub(), + }, + } + ctx.response.headers.get.withArgs('Content-Length').returns('150') + + ctx.fetchUtils.fetchStreamWithResponse.resolves({ + stream: ctx.stream, + response: ctx.response, + }) + + await expect( + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive( + ctx.path, + ctx.userId + ) + ).to.be.rejectedWith(sinon.match.instanceOf(FileTooLargeError)) + }) + + it('should abort the request', function (ctx) { + expect( + ctx.fetchUtils.fetchStreamWithResponse.firstCall.args[1].signal + .aborted + ).to.equal(true) + }) + + it('should destroy the response stream', function (ctx) { + sinon.assert.calledOnce(ctx.stream.destroy) + }) + + it('should not write the oversized archive to disk', function (ctx) { + sinon.assert.notCalled(ctx.fs.createWriteStream) + sinon.assert.notCalled(ctx.nodeStream.pipeline) + }) + + it('should attempt to clean up the output path', function (ctx) { + sinon.assert.calledWith( + ctx.fsPromises.unlink, + sinon.match( + /\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/ + ) + ) + }) + }) + + describe('when the Content-Length header is missing', function () { + beforeEach(async function (ctx) { + ctx.path = '/path/to/input.docx' + ctx.userId = 'test-user-id' + ctx.response = { + headers: { + get: sinon.stub().returns(null), + }, + } + + ctx.fetchUtils.fetchStreamWithResponse.resolves({ + stream: 'mocked-fetch-stream', + response: ctx.response, + }) + + await expect( + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive( + ctx.path, + ctx.userId + ) + ).to.be.rejectedWith('document conversion failed') + }) + + it('should not write the archive to disk', function (ctx) { + sinon.assert.notCalled(ctx.fs.createWriteStream) + sinon.assert.notCalled(ctx.nodeStream.pipeline) + }) + + it('should attempt to clean up the output path', function (ctx) { + sinon.assert.calledWith( + ctx.fsPromises.unlink, + sinon.match( + /\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/ + ) + ) + }) + }) + }) +}) diff --git a/services/web/test/unit/src/Uploads/ProjectUploadController.test.mjs b/services/web/test/unit/src/Uploads/ProjectUploadController.test.mjs index 53a6d05c37..2e83784cd1 100644 --- a/services/web/test/unit/src/Uploads/ProjectUploadController.test.mjs +++ b/services/web/test/unit/src/Uploads/ProjectUploadController.test.mjs @@ -10,6 +10,7 @@ import sinon from 'sinon' import MockRequest from '../helpers/MockRequest.mjs' import MockResponse from '../helpers/MockResponse.mjs' import ArchiveErrors from '../../../../app/src/Features/Uploads/ArchiveErrors.mjs' +import { FileTooLargeError } from '../../../../app/src/Features/Errors/Errors.js' const modulePath = '../../../../app/src/Features/Uploads/ProjectUploadController.mjs' @@ -40,6 +41,11 @@ describe('ProjectUploadController', function () { ctx.EditorController = { promises: {}, } + ctx.DocumentConversionManager = { + promises: { + convertDocxToLaTeXZipArchive: sinon.stub(), + }, + } vi.doMock('multer', () => ({ default: sinon.stub(), @@ -52,7 +58,7 @@ describe('ProjectUploadController', function () { vi.doMock( '../../../../app/src/Features/Uploads/ProjectUploadManager', () => ({ - default: (ctx.ProjectUploadManager = {}), + default: (ctx.ProjectUploadManager = { promises: {} }), }) ) @@ -87,10 +93,21 @@ describe('ProjectUploadController', function () { default: ctx.EditorController, })) + vi.doMock( + '../../../../app/src/Features/Uploads/DocumentConversionManager.mjs', + () => ({ + default: ctx.DocumentConversionManager, + }) + ) + vi.doMock('fs', () => ({ default: (ctx.fs = {}), })) + vi.doMock('node:fs/promises', () => ({ + default: (ctx.fsPromises = {}), + })) + ctx.ProjectUploadController = (await import(modulePath)).default }) @@ -415,4 +432,113 @@ describe('ProjectUploadController', function () { }) }) }) + + describe('importDocx', function () { + beforeEach(async function (ctx) { + ctx.req.file = { + path: '/path/to/uploaded/file.docx', + } + ctx.req.body = { + name: 'file.docx', + } + ctx.archivePath = '/path/to/archive.zip' + ctx.fsPromises.unlink = sinon.stub().resolves() + }) + + describe('successfully', async function () { + beforeEach(async function (ctx) { + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive = + sinon.stub().resolves(ctx.archivePath) + ctx.ProjectUploadManager.promises.createProjectFromZipArchive = sinon + .stub() + .resolves({ + _id: 'new-project-id', + }) + + await new Promise(resolve => { + ctx.res.json = data => { + expect(data.success).to.be.true + expect(data.project_id).to.equal('new-project-id') + resolve() + } + ctx.ProjectUploadController.importDocx(ctx.req, ctx.res) + }) + }) + + it('should call the DocumentConversionManager to convert the file', function (ctx) { + expect( + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive + ).to.have.been.calledWith(ctx.req.file.path, ctx.user_id) + }) + + it('should use the resulting archive to create a new project', function (ctx) { + expect( + ctx.ProjectUploadManager.promises.createProjectFromZipArchive + ).to.have.been.calledWith(ctx.user_id, 'file', ctx.archivePath) + }) + + it('should unlink the archive after creating the project', function (ctx) { + expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.archivePath) + }) + + it('should unlink the uploaded file', function (ctx) { + expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.req.file.path) + }) + }) + + describe('unsuccessfully', async function () { + beforeEach(async function (ctx) { + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive = + sinon.stub().rejects(new Error('Conversion failed')) + + await new Promise(resolve => { + ctx.res.json = data => { + expect(data.success).to.be.false + resolve() + } + ctx.ProjectUploadController.importDocx(ctx.req, ctx.res) + }) + }) + + it('should call the DocumentConversionManager to convert the file', function (ctx) { + expect( + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive + ).to.have.been.calledWith(ctx.req.file.path, ctx.user_id) + }) + + it('should unlink the uploaded file', function (ctx) { + expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.req.file.path) + }) + + it('should return http 500', function (ctx) { + expect(ctx.res.statusCode).to.equal(500) + }) + }) + + describe('when the converted archive is too large', async function () { + beforeEach(async function (ctx) { + ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive = + sinon.stub().rejects(new FileTooLargeError('file too large')) + + await new Promise(resolve => { + ctx.res.json = data => { + expect(data).to.deep.equal({ + success: false, + error: 'file_too_large', + }) + resolve() + } + ctx.ProjectUploadController.importDocx(ctx.req, ctx.res) + }) + }) + + it('should return http 422', function (ctx) { + expect(ctx.res.statusCode).to.equal(422) + }) + + it('should unlink the uploaded file', function (ctx) { + expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.req.file.path) + }) + }) + }) })