[web]+clsi] Allow docx import via pandoc (#32004)

Co-authored-by: Jakob Ackermann <jakob.ackermann@overleaf.com>
GitOrigin-RevId: 246b3290ec04867f71545b1a7c5d95d0f68379ff
This commit is contained in:
Mathias Jakobsen
2026-03-27 08:47:07 +00:00
committed by Copybot
parent 5ded3f5f90
commit 9c97876268
36 changed files with 1469 additions and 29 deletions

6
package-lock.json generated
View File

@@ -50568,6 +50568,7 @@
"dockerode": "^4.0.9",
"express": "4.22.1",
"lodash": "^4.17.21",
"multer": "2.1.1",
"overleaf-editor-core": "*",
"p-limit": "^3.1.0",
"request": "2.88.2",
@@ -50579,6 +50580,7 @@
"@istanbuljs/esm-loader-hook": "^0.3.0",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"form-data": "^4.0.5",
"mocha": "^11.1.0",
"mocha-junit-reporter": "^2.2.1",
"mocha-multi-reporters": "^1.5.1",
@@ -50589,7 +50591,8 @@
"sinon-chai": "^3.7.0",
"timekeeper": "2.2.0",
"typescript": "^5.0.4",
"vitest": "^4.0.0"
"vitest": "^4.0.0",
"yauzl": "^2.10.0"
}
},
"services/clsi-cache": {
@@ -53183,6 +53186,7 @@
"express-session": "^1.17.1",
"file-type": "^21.3.4",
"focus-trap-react": "^11.0.4",
"form-data": "^4.0.5",
"globby": "^5.0.0",
"helmet": "^6.0.1",
"https-proxy-agent": "^7.0.6",

View File

@@ -1,3 +1,4 @@
compiles
output
cache
uploads

View File

@@ -154,6 +154,8 @@ test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc:3.9
docker pull us-east1-docker.pkg.dev/overleaf-ops/ol-docker/pandoc-staging:3.9
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif

View File

@@ -20,6 +20,8 @@ import bodyParser from 'body-parser'
import net from 'node:net'
import os from 'node:os'
import OError from '@overleaf/o-error'
import ConversionController from './app/js/ConversionController.js'
import FileUploadMiddleware from './app/js/FileUploadMiddleware.js'
logger.initialize('clsi')
logger.logger.serializers.clsiRequest = LoggerSerializers.clsiRequest
@@ -122,6 +124,13 @@ app.get(
OutputController.createOutputZip
)
// Conversion endpoints
app.post(
'/convert/docx-to-latex',
FileUploadMiddleware.multerMiddleware,
ConversionController.convertDocxToLaTeX
)
if (process.env.NODE_ENV === 'development' && global.__coverage__) {
app.get('/coverage', (req, res) => {
const coverage = {}

View File

@@ -0,0 +1,46 @@
import logger from '@overleaf/logger'
import { expressify } from '@overleaf/promise-utils'
import fs from 'node:fs/promises'
import fsSync from 'node:fs'
import ConversionManager from './ConversionManager.js'
import { pipeline } from 'node:stream/promises'
import Settings from '@overleaf/settings'
import Path from 'node:path'
async function convertDocxToLaTeX(req, res) {
const { path } = req.file
if (!Settings.enablePandocConversions) {
await fs.unlink(path).catch(() => {})
return res.sendStatus(404)
}
logger.debug({ path }, 'received file for conversion')
const conversionId = crypto.randomUUID()
let zipPath
try {
zipPath = await ConversionManager.promises.convertDocxToLaTeXWithLock(
conversionId,
path
)
} finally {
await fs.unlink(path).catch(() => {})
}
try {
const zipStat = await fs.stat(zipPath)
res.setHeader('Content-Length', zipStat.size)
res.attachment('conversion.zip')
res.setHeader('X-Content-Type-Options', 'nosniff')
const readStream = fsSync.createReadStream(zipPath)
await pipeline(readStream, res)
} finally {
await fs
.rm(Path.dirname(zipPath), { recursive: true, force: true })
.catch(() => {})
}
}
export default {
convertDocxToLaTeX: expressify(convertDocxToLaTeX),
}

View File

@@ -0,0 +1,101 @@
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import fs from 'node:fs/promises'
import Path from 'node:path'
import CommandRunner from './CommandRunner.js'
import LockManager from './LockManager.js'
import OError from '@overleaf/o-error'
async function convertDocxToLaTeXWithLock(conversionId, inputPath) {
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
const lock = LockManager.acquire(conversionDir)
try {
return await convertDocxToLaTeX(conversionId, conversionDir, inputPath)
} finally {
lock.release()
}
}
async function convertDocxToLaTeX(conversionId, conversionDir, inputPath) {
await fs.mkdir(conversionDir, { recursive: true })
const newSourcePath = Path.join(conversionDir, 'input.docx')
await fs.copyFile(inputPath, newSourcePath)
const outputName = crypto.randomUUID() + '.zip'
try {
const {
stdout: stdoutPandoc,
stderr: stderrPandoc,
exitCode: exitCodePandoc,
} = await CommandRunner.promises.run(
conversionId,
[
'pandoc',
'input.docx',
'--output',
'main.tex',
'--extract-media=.',
'--from',
'docx+citations',
'--to',
'latex',
'--citeproc',
'--standalone',
],
conversionDir,
Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions'
)
if (exitCodePandoc !== 0) {
throw new OError('Non-zero exit code from pandoc', {
exitCode: exitCodePandoc,
stderr: stderrPandoc,
})
}
logger.debug(
{ stdout: stdoutPandoc, stderr: stderrPandoc, exitCode: exitCodePandoc },
'conversion command completed'
)
// Clean up the source document to leave only the conversion result
await fs.unlink(newSourcePath).catch(() => {})
const {
stdout: stdoutZip,
stderr: stderrZip,
exitCode: exitCodeZip,
} = await CommandRunner.promises.run(
conversionId,
['zip', '-r', outputName, '.'],
conversionDir,
Settings.pandocImage,
Settings.conversionTimeoutSeconds * 1000,
{},
'conversions'
)
if (exitCodeZip !== 0) {
throw new OError('Non-zero exit code from pandoc', {
exitCode: exitCodeZip,
stderr: stderrZip,
})
}
logger.debug(
{ stdout: stdoutZip, stderr: stderrZip, exitCode: exitCodeZip },
'conversion output compressed'
)
} catch (error) {
// Clean up the conversion directory on error to avoid leaving failed conversions around
await fs.rm(conversionDir, { force: true, recursive: true }).catch(() => {})
throw new OError('pandoc conversion failed').withCause(error)
}
return Path.join(conversionDir, outputName)
}
export default {
promises: {
convertDocxToLaTeXWithLock,
},
}

View File

@@ -0,0 +1,31 @@
import multer from 'multer'
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
const upload = multer({
dest: Settings.path.uploadFolder,
limits: {
fileSize: Settings.maxUploadSize,
parts: 2,
},
})
function multerMiddleware(req, res, next) {
return upload.single('qqfile')(req, res, function (err) {
if (err instanceof multer.MulterError && err.code === 'LIMIT_FILE_SIZE') {
return res.status(422).json({ success: false, error: 'file_too_large' })
}
if (err) return next(err)
if (!req.file?.path) {
logger.info({ req }, 'missing req.file.path on upload')
return res
.status(400)
.json({ success: false, error: 'invalid_upload_request' })
}
next()
})
}
export default {
multerMiddleware,
}

View File

@@ -82,7 +82,7 @@ export default CommandRunner = {
err.code = code
return callback(err)
} else {
return callback(null, { stdout })
return callback(null, { stdout, exitCode: code })
}
})

View File

@@ -1,7 +1,7 @@
clsi
--data-dirs=cache,compiles,output
--dependencies=
--env-add=DOWNLOAD_HOST=http://clsi-nginx:8080,ALLOWED_COMPILE_GROUPS="clsi-perf simple-latex-file",ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES="quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1",TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output
--env-add=DOWNLOAD_HOST=http://clsi-nginx:8080,ALLOWED_COMPILE_GROUPS="clsi-perf simple-latex-file",ENABLE_PDF_CACHING="true",PDF_CACHING_ENABLE_WORKER_POOL="true",ALLOWED_IMAGES="quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9",TEXLIVE_IMAGE=quay.io/sharelatex/texlive-full:2025.1,TEX_LIVE_IMAGE_NAME_OVERRIDE=us-east1-docker.pkg.dev/overleaf-ops/ol-docker,TEXLIVE_IMAGE_USER="tex",SANDBOXED_COMPILES="true",SANDBOXED_COMPILES_HOST_DIR_COMPILES=$PWD/compiles,SANDBOXED_COMPILES_HOST_DIR_OUTPUT=$PWD/output,ENABLE_PANDOC_CONVERSIONS=true
--env-pass-through=
--esmock-loader=False
--node-version=24.13.0

View File

@@ -20,11 +20,19 @@ module.exports = {
process.env.CLSI_OUTPUT_PATH || Path.resolve(__dirname, '../output'),
clsiCacheDir:
process.env.CLSI_CACHE_PATH || Path.resolve(__dirname, '../cache'),
uploadFolder:
process.env.CLSI_UPLOAD_PATH || Path.resolve(__dirname, '../uploads'),
synctexBaseDir(projectId) {
return Path.join(this.compilesDir, projectId)
},
},
conversionTimeoutSeconds:
parseInt(process.env.CLSI_CONVERSION_TIMEOUT_SECONDS, 10) || 60,
pandocImage: process.env.PANDOC_IMAGE || 'quay.io/sharelatex/pandoc:3.9',
enablePandocConversions: process.env.ENABLE_PANDOC_CONVERSIONS === 'true',
maxUploadSize: 50 * 1024 * 1024,
internal: {
clsi: {
port: 3013,
@@ -152,6 +160,7 @@ if ((process.env.DOCKER_RUNNER || process.env.SANDBOXED_COMPILES) === 'true') {
wordcount: { 'HostConfig.AutoRemove': true },
synctex: { 'HostConfig.AutoRemove': true },
'synctex-output': { 'HostConfig.AutoRemove': true },
conversions: { 'HostConfig.AutoRemove': true },
}
module.exports.clsi.docker.compileGroupConfig = Object.assign(
defaultCompileGroupConfig,

View File

@@ -31,13 +31,14 @@ services:
ALLOWED_COMPILE_GROUPS: "clsi-perf simple-latex-file"
ENABLE_PDF_CACHING: "true"
PDF_CACHING_ENABLE_WORKER_POOL: "true"
ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1"
ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9"
TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2025.1
TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker
TEXLIVE_IMAGE_USER: "tex"
SANDBOXED_COMPILES: "true"
SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles
SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output
ENABLE_PANDOC_CONVERSIONS: true
volumes:
- ./reports:/overleaf/services/clsi/reports
- ./compiles:/overleaf/services/clsi/compiles

View File

@@ -45,13 +45,14 @@ services:
ALLOWED_COMPILE_GROUPS: "clsi-perf simple-latex-file"
ENABLE_PDF_CACHING: "true"
PDF_CACHING_ENABLE_WORKER_POOL: "true"
ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1"
ALLOWED_IMAGES: "quay.io/sharelatex/texlive-full:2017.1 quay.io/sharelatex/texlive-full:2025.1 quay.io/sharelatex/pandoc:3.9"
TEXLIVE_IMAGE: quay.io/sharelatex/texlive-full:2025.1
TEX_LIVE_IMAGE_NAME_OVERRIDE: us-east1-docker.pkg.dev/overleaf-ops/ol-docker
TEXLIVE_IMAGE_USER: "tex"
SANDBOXED_COMPILES: "true"
SANDBOXED_COMPILES_HOST_DIR_COMPILES: $PWD/compiles
SANDBOXED_COMPILES_HOST_DIR_OUTPUT: $PWD/output
ENABLE_PANDOC_CONVERSIONS: true
depends_on:
clsi-nginx:
condition: service_started

View File

@@ -9,5 +9,6 @@ usermod -aG dockeronhost node
mkdir -p /overleaf/services/clsi/cache && chown node:node /overleaf/services/clsi/cache
mkdir -p /overleaf/services/clsi/compiles && chown node:node /overleaf/services/clsi/compiles
mkdir -p /overleaf/services/clsi/output && chown node:node /overleaf/services/clsi/output
mkdir -p /overleaf/services/clsi/uploads && chown node:node /overleaf/services/clsi/uploads
exec runuser -u node -- "$@"

View File

@@ -30,6 +30,7 @@
"dockerode": "^4.0.9",
"express": "4.22.1",
"lodash": "^4.17.21",
"multer": "2.1.1",
"overleaf-editor-core": "*",
"p-limit": "^3.1.0",
"request": "2.88.2",
@@ -41,6 +42,7 @@
"@istanbuljs/esm-loader-hook": "^0.3.0",
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"form-data": "^4.0.5",
"mocha": "^11.1.0",
"mocha-junit-reporter": "^2.2.1",
"mocha-multi-reporters": "^1.5.1",
@@ -51,6 +53,7 @@
"sinon-chai": "^3.7.0",
"timekeeper": "2.2.0",
"typescript": "^5.0.4",
"vitest": "^4.0.0"
"vitest": "^4.0.0",
"yauzl": "^2.10.0"
}
}

View File

@@ -0,0 +1,83 @@
import Client from './helpers/Client.js'
import ClsiApp from './helpers/ClsiApp.js'
import Path from 'node:path'
import fs from 'node:fs'
import { pipeline } from 'node:stream/promises'
import yauzl from 'yauzl'
import { expect } from 'chai'
describe('Conversions', function () {
describe('docx conversion', function () {
before(async function () {
await ClsiApp.ensureRunning()
try {
this.body = await Client.compile(this.project_id, this.request)
} catch (error) {
this.error = error
}
})
it('should convert file to docx', async function () {
const sourcePath = Path.join(
import.meta.dirname,
'../fixtures/conversion-source.docx'
)
const outputStream = fs.createWriteStream(
'/tmp/clsi_acceptance_tests_' + crypto.randomUUID() + '.zip'
)
const stream = await Client.convertDocx(sourcePath)
await pipeline(stream, outputStream)
await new Promise((resolve, reject) => {
yauzl.open(outputStream.path, { lazyEntries: true }, (err, zipfile) => {
if (err) {
return reject(err)
}
zipfile.on('error', reject)
zipfile.on('end', resolve)
zipfile.readEntry()
zipfile.on('entry', entry => {
if (entry.fileName === 'main.tex') {
zipfile.openReadStream(entry, (err, readStream) => {
if (err) {
return reject(err)
}
let data = ''
readStream.on('data', chunk => {
data += chunk.toString()
})
readStream.on('end', () => {
try {
expect(data).to.include('\\begin{document}')
expect(data).to.include(
'\\[x = \\frac{- b \\pm \\sqrt{b^{2} - 4ac}}{2a}\\]'
)
zipfile.readEntry()
} catch (err) {
reject(err)
}
})
})
} else if (entry.fileName === 'media/') {
// Skip the media directory entry
zipfile.readEntry()
} else if (entry.fileName.startsWith('media/')) {
expect(entry.fileName).to.equal('media/image1.png')
zipfile.readEntry()
} else {
reject(new Error('Unexpected file in zip: ' + entry.fileName))
}
})
})
})
})
it('should fail when file is not a docx', async function () {
const sourcePath = Path.join(
import.meta.dirname,
'../fixtures/minimal.pdf'
)
await expect(Client.convertDocx(sourcePath)).to.eventually.be.rejected
})
})
})

View File

@@ -1,8 +1,14 @@
import express from 'express'
import { fetchJson, fetchNothing, fetchString } from '@overleaf/fetch-utils'
import {
fetchJson,
fetchNothing,
fetchStream,
fetchString,
} from '@overleaf/fetch-utils'
import fs from 'node:fs'
import fsPromises from 'node:fs/promises'
import Settings from '@overleaf/settings'
import FormData from 'form-data'
const host = Settings.apis.clsi.url
@@ -24,6 +30,15 @@ function compile(projectId, data) {
})
}
async function convertDocx(path) {
const formData = new FormData()
formData.append('qqfile', fs.createReadStream(path))
return await fetchStream(`${host}/convert/docx-to-latex`, {
method: 'POST',
body: formData,
})
}
async function stopCompile(projectId) {
return await fetchNothing(`${host}/project/${projectId}/compile/stop`, {
method: 'POST',
@@ -187,6 +202,7 @@ function smokeTest() {
export default {
randomId,
compile,
convertDocx,
stopCompile,
clearCache,
getOutputFile,

View File

@@ -1,5 +1,6 @@
import app from '../../../../app.js'
import Settings from '@overleaf/settings'
import testLogRecorder from '@overleaf/logger/test-log-recorder.js'
function startApp() {
return new Promise((resolve, reject) => {
@@ -26,6 +27,10 @@ async function ensureRunning() {
await appStartedPromise
}
if (process.env.CI === 'true') {
beforeEach('record error logs in junit', testLogRecorder)
}
export default {
ensureRunning,
}

View File

@@ -0,0 +1,158 @@
import sinon from 'sinon'
import { vi, describe, it, beforeEach, expect } from 'vitest'
import Path from 'node:path'
import { PassThrough } from 'node:stream'
const MODULE_PATH = Path.join(
import.meta.dirname,
'../../../app/js/ConversionController'
)
describe('ConversionController', function () {
beforeEach(async function (ctx) {
ctx.conversionDir = '/path/to/conversion/result'
ctx.zipPath = '/path/to/conversion/result/output.zip'
ctx.zipStat = { size: 1234 }
ctx.Settings = {
enablePandocConversions: true,
}
ctx.ConversionManager = {
promises: {
convertDocxToLaTeXWithLock: sinon.stub().resolves(ctx.zipPath),
},
}
ctx.fs = {
stat: sinon.stub().resolves(ctx.zipStat),
unlink: sinon.stub().resolves(),
rm: sinon.stub().resolves(),
}
ctx.readStream = new PassThrough()
ctx.fsSync = {
createReadStream: sinon.stub().returns(ctx.readStream),
}
ctx.pipeline = sinon.stub().resolves()
vi.doMock('node:fs/promises', () => ({
default: ctx.fs,
}))
vi.doMock('node:fs', () => ({
default: ctx.fsSync,
}))
vi.doMock('node:stream/promises', () => ({
pipeline: ctx.pipeline,
}))
vi.doMock('@overleaf/settings', () => ({
default: ctx.Settings,
}))
vi.doMock('../../../app/js/ConversionManager', () => ({
default: ctx.ConversionManager,
}))
ctx.res = new PassThrough()
ctx.res.attachment = sinon.stub()
ctx.res.setHeader = sinon.stub()
ctx.ConversionController = (await import(MODULE_PATH)).default
})
describe('convertDocxToLaTeX', function () {
describe('when conversions are disabled', function () {
beforeEach(async function (ctx) {
ctx.Settings.enablePandocConversions = false
ctx.req = {
file: { path: '/path/to/uploaded/file.docx' },
}
ctx.res.sendStatus = sinon.stub()
await ctx.ConversionController.convertDocxToLaTeX(ctx.req, ctx.res)
})
it('should remove the uploaded file', function (ctx) {
sinon.assert.calledWith(ctx.fs.unlink, ctx.req.file.path)
})
it('should return 404', function (ctx) {
sinon.assert.calledWith(ctx.res.sendStatus, 404)
})
it('should not call the conversion manager', function (ctx) {
sinon.assert.notCalled(
ctx.ConversionManager.promises.convertDocxToLaTeXWithLock
)
})
})
describe('successfully', function () {
beforeEach(async function (ctx) {
ctx.req = {
file: { path: '/path/to/uploaded/file.docx' },
}
await ctx.ConversionController.convertDocxToLaTeX(ctx.req, ctx.res)
})
it('should call the conversion manager with the uploaded file path', function (ctx) {
sinon.assert.calledWith(
ctx.ConversionManager.promises.convertDocxToLaTeXWithLock,
sinon.match(
/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/
),
ctx.req.file.path
)
})
it('should look up the generated zip file size', function (ctx) {
sinon.assert.calledWith(ctx.fs.stat, ctx.zipPath)
})
it('should set the response headers for a zip file download', function (ctx) {
sinon.assert.calledWith(
ctx.res.setHeader,
'Content-Length',
ctx.zipStat.size
)
sinon.assert.calledWith(ctx.res.attachment, 'conversion.zip')
sinon.assert.calledWith(
ctx.res.setHeader,
'X-Content-Type-Options',
'nosniff'
)
})
it('should stream the generated zip file to the response', function (ctx) {
sinon.assert.calledWith(ctx.fsSync.createReadStream, ctx.zipPath)
sinon.assert.calledWith(ctx.pipeline, ctx.readStream, ctx.res)
})
it('should clean up the generated zip file', function (ctx) {
sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir)
})
})
describe('unsuccessfully', function () {
describe('on streaming error', function () {
it('should propagate the error and still clean up', async function (ctx) {
ctx.pipeline.rejects(new Error('mock stream error'))
const res = new PassThrough()
res.attachment = sinon.stub()
res.setHeader = sinon.stub()
const req = { file: { path: '/path/to/uploaded/file.docx' } }
await expect(
ctx.ConversionController.convertDocxToLaTeX(req, res)
).to.be.rejectedWith('mock stream error')
sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir)
})
})
})
})
})

View File

@@ -0,0 +1,253 @@
import Path from 'node:path'
import sinon from 'sinon'
import { vi, describe, beforeEach, afterEach, it, expect } from 'vitest'
const MODULE_PATH = Path.join(
import.meta.dirname,
'../../../app/js/ConversionManager'
)
describe('ConversionManager', function () {
beforeEach(async function (ctx) {
ctx.CommandRunner = {
promises: {
run: sinon.stub().resolves({ stdout: '', stderr: '', exitCode: 0 }),
},
}
ctx.lock = {
release: sinon.stub(),
}
ctx.LockManager = {
acquire: sinon.stub().returns(ctx.lock),
}
ctx.Settings = {
pandocImage: 'mock-pandoc-image',
conversionTimeoutSeconds: 60,
path: { compilesDir: '/compiles' },
}
ctx.fs = {
mkdir: sinon.stub().resolves(),
copyFile: sinon.stub().resolves(),
rm: sinon.stub().resolves(),
unlink: sinon.stub().resolves(),
}
ctx.conversionId = 'test-conversion-id'
ctx.inputPath = '/path/to/input.docx'
ctx.conversionDir = '/compiles/test-conversion-id'
ctx.outputPath = '/compiles/test-conversion-id/output-uuid.zip'
ctx.uuidStub = sinon
.stub(globalThis.crypto, 'randomUUID')
.returns('output-uuid')
vi.doMock('../../../app/js/LockManager', () => ({
default: ctx.LockManager,
}))
vi.doMock('@overleaf/settings', () => ({
default: ctx.Settings,
}))
vi.doMock('../../../app/js/CommandRunner', () => ({
default: ctx.CommandRunner,
}))
vi.doMock('node:fs/promises', () => ({ default: ctx.fs }))
ctx.ConversionManager = (await import(MODULE_PATH)).default
})
afterEach(function (ctx) {
ctx.uuidStub.restore()
})
describe('convertDocxToLaTeXWithLock', function () {
describe('general behavior', function () {
beforeEach(async function (ctx) {
ctx.result =
await ctx.ConversionManager.promises.convertDocxToLaTeXWithLock(
ctx.conversionId,
ctx.inputPath
)
})
it('should acquire a lock', async function (ctx) {
sinon.assert.calledWith(ctx.LockManager.acquire, ctx.conversionDir)
})
it('should copy the input file to the conversion directory', async function (ctx) {
sinon.assert.calledWith(ctx.fs.mkdir, ctx.conversionDir, {
recursive: true,
})
sinon.assert.calledWith(
ctx.fs.copyFile,
ctx.inputPath,
Path.join(ctx.conversionDir, 'input.docx')
)
})
it('should convert conversion timeout to milliseconds', async function (ctx) {
expect(ctx.CommandRunner.promises.run.firstCall.args[4]).toBe(60_000)
expect(ctx.CommandRunner.promises.run.secondCall.args[4]).toBe(60_000)
})
it('should run pandoc followed by zip in the conversion directory', function (ctx) {
expect(ctx.CommandRunner.promises.run.callCount).toBe(2)
expect(ctx.CommandRunner.promises.run.firstCall.args).toEqual([
ctx.conversionId,
[
'pandoc',
'input.docx',
'--output',
'main.tex',
'--extract-media=.',
'--from',
'docx+citations',
'--to',
'latex',
'--citeproc',
'--standalone',
],
ctx.conversionDir,
ctx.Settings.pandocImage,
60_000,
{},
'conversions',
])
expect(ctx.CommandRunner.promises.run.secondCall.args).toEqual([
ctx.conversionId,
['zip', '-r', 'output-uuid.zip', '.'],
ctx.conversionDir,
ctx.Settings.pandocImage,
60_000,
{},
'conversions',
])
})
})
describe('successful conversion', function () {
beforeEach(async function (ctx) {
ctx.CommandRunner.promises.run.resolves({
stdout: 'mock-stdout',
stderr: 'mock-stderr',
exitCode: 0,
})
ctx.result =
await ctx.ConversionManager.promises.convertDocxToLaTeXWithLock(
ctx.conversionId,
ctx.inputPath
)
})
it('should remove the source document after conversion', async function (ctx) {
sinon.assert.calledWith(
ctx.fs.unlink,
Path.join(ctx.conversionDir, 'input.docx')
)
})
it('should return the conversion directory', function (ctx) {
expect(ctx.result).toBe(ctx.outputPath)
})
it('should release the lock', function (ctx) {
sinon.assert.called(ctx.lock.release)
})
})
describe('unsuccessful conversion (exitcode)', function () {
beforeEach(async function (ctx) {
ctx.CommandRunner.promises.run.resolves({
stdout: 'mock-stdout',
stderr: 'mock-stderr',
exitCode: 63,
})
await expect(
ctx.ConversionManager.promises.convertDocxToLaTeXWithLock(
ctx.conversionId,
ctx.inputPath
)
).to.be.rejectedWith('pandoc conversion failed')
})
it('should remove the entire conversion directory', async function (ctx) {
sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir, {
force: true,
recursive: true,
})
})
it('should release the lock', function (ctx) {
sinon.assert.called(ctx.lock.release)
})
})
describe('unsuccessful compression (exitcode)', function () {
beforeEach(async function (ctx) {
ctx.CommandRunner.promises.run
.onFirstCall()
.resolves({
stdout: 'mock-pandoc-stdout',
stderr: 'mock-pandoc-stderr',
exitCode: 0,
})
.onSecondCall()
.resolves({
stdout: 'mock-zip-stdout',
stderr: 'mock-zip-stderr',
exitCode: 12,
})
await expect(
ctx.ConversionManager.promises.convertDocxToLaTeXWithLock(
ctx.conversionId,
ctx.inputPath
)
).to.be.rejectedWith('pandoc conversion failed')
})
it('should remove the entire conversion directory', async function (ctx) {
sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir, {
force: true,
recursive: true,
})
})
it('should release the lock', function (ctx) {
sinon.assert.called(ctx.lock.release)
})
})
describe('unsuccessful conversion (throws)', function () {
beforeEach(async function (ctx) {
ctx.CommandRunner.promises.run.rejects(
new Error('mock conversion error')
)
await expect(
ctx.ConversionManager.promises.convertDocxToLaTeXWithLock(
ctx.conversionId,
ctx.inputPath
)
).to.be.rejectedWith('pandoc conversion failed')
})
it('should remove the entire conversion directory', async function (ctx) {
sinon.assert.calledWith(ctx.fs.rm, ctx.conversionDir, {
force: true,
recursive: true,
})
})
it('should release the lock', function (ctx) {
sinon.assert.called(ctx.lock.release)
})
})
})
})

View File

@@ -123,7 +123,7 @@ describe('DockerRunner', () => {
await new Promise((resolve, reject) => {
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
return ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -168,7 +168,7 @@ describe('DockerRunner', () => {
ctx.directory = '/var/lib/overleaf/data/compiles/xyz'
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
return ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -199,7 +199,7 @@ describe('DockerRunner', () => {
ctx.directory = '/var/lib/overleaf/data/output/xyz/generated-files/id'
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -230,7 +230,7 @@ describe('DockerRunner', () => {
ctx.directory = '/var/lib/overleaf/data/compile/xyz'
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -261,7 +261,7 @@ describe('DockerRunner', () => {
ctx.directory = '/var/lib/overleaf/data/compile/xyz'
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -290,7 +290,7 @@ describe('DockerRunner', () => {
describe('when the run throws an error', () => {
beforeEach(ctx => {
let firstTime = true
ctx.output = 'mock-output'
ctx.output = { stdout: 'mock-output' }
ctx.DockerRunner._runAndWaitForContainer = (
options,
volumes,
@@ -342,7 +342,7 @@ describe('DockerRunner', () => {
beforeEach(ctx => {
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -372,7 +372,7 @@ describe('DockerRunner', () => {
ctx.Settings.texliveImageNameOveride = 'overrideimage.com/something'
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -399,7 +399,7 @@ describe('DockerRunner', () => {
]
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
})
describe('with a valid image', () => {
@@ -477,7 +477,7 @@ describe('DockerRunner', () => {
}
ctx.DockerRunner._runAndWaitForContainer = sinon
.stub()
.callsArgWith(3, null, (ctx.output = 'mock-output'))
.callsArgWith(3, null, (ctx.output = { stdout: 'mock-output' }))
ctx.DockerRunner.run(
ctx.project_id,
ctx.command,
@@ -520,7 +520,7 @@ describe('DockerRunner', () => {
attachStreamHandler,
callback
) => {
attachStreamHandler(null, (ctx.output = 'mock-output'))
attachStreamHandler(null, (ctx.output = { stdout: 'mock-output' }))
callback(null, (ctx.containerId = 'container-id'))
}
sinon.spy(ctx.DockerRunner, 'startContainer')

View File

@@ -116,7 +116,15 @@ async function _getProjectCompileLimits(project) {
if (!project) {
throw new Error('project not found')
}
const owner = await UserGetter.promises.getUser(project.owner_ref, {
const limits = await _getUserCompileLimits(project.owner_ref)
if (project.fromV1TemplateId === Settings.overrideCompileTimeForTemplate) {
limits.timeout = Math.max(limits.timeout, 20)
}
return limits
}
async function _getUserCompileLimits(userId) {
const owner = await UserGetter.promises.getUser(userId, {
_id: 1,
alphaProgram: 1,
analyticsId: 1,
@@ -141,9 +149,7 @@ async function _getProjectCompileLimits(project) {
compileBackendClass: compileGroup === 'standard' ? 'c3d' : 'c4d',
ownerAnalyticsId: analyticsId,
}
if (project.fromV1TemplateId === Settings.overrideCompileTimeForTemplate) {
limits.timeout = Math.max(limits.timeout, 20)
}
return limits
}
@@ -208,6 +214,7 @@ export default CompileManager = {
stopCompile,
wordCount,
syncTeX,
_getUserCompileLimits,
},
compile: callbackifyMultiResult(instrumentedCompile, [
'status',

View File

@@ -527,10 +527,16 @@ async function projectListPage(req, res, next) {
const hasAiAssist =
Features.hasFeature('saas') && (await _userHasAIAssist(user))
await SplitTestHandler.promises.getAssignment(
req,
res,
'themed-project-dashboard'
const splitTests = [
// Split tests that will be made available to the frontend
'themed-project-dashboard',
'import-docx',
].filter(Boolean)
await Promise.all(
splitTests.map(splitTestName =>
SplitTestHandler.promises.getAssignment(req, res, splitTestName)
)
)
const userSettings = await UserSettingsHelper.buildUserSettings(

View File

@@ -0,0 +1,84 @@
import Settings from '@overleaf/settings'
import CompileManager from '../Compile/CompileManager.mjs'
import fs from 'node:fs'
import fsPromises from 'node:fs/promises'
import logger from '@overleaf/logger'
import Path from 'node:path'
import { fetchStreamWithResponse } from '@overleaf/fetch-utils'
import { pipeline } from 'node:stream/promises'
import OError from '@overleaf/o-error'
import FormData from 'form-data'
import { FileTooLargeError } from '../Errors/Errors.js'
async function convertDocxToLaTeXZipArchive(path, userId) {
const clsiUrl = new URL(Settings.apis.clsi.url)
const limits = await CompileManager.promises._getUserCompileLimits(userId)
clsiUrl.pathname = '/convert/docx-to-latex'
clsiUrl.searchParams.set('compileBackendClass', limits.compileBackendClass)
clsiUrl.searchParams.set('compileGroup', limits.compileGroup)
const formData = new FormData()
formData.append('qqfile', fs.createReadStream(path))
logger.debug(
{ clsiUrl: clsiUrl.toString() },
'sending docx to CLSI for conversion'
)
const outputFileName = crypto.randomUUID() + '.zip'
const outputPath = Path.join(Settings.path.dumpFolder, outputFileName)
let outputStream
const abortController = new AbortController()
try {
const { stream, response } = await fetchStreamWithResponse(clsiUrl, {
method: 'POST',
body: formData,
signal: abortController.signal,
})
const contentLengthHeader = response.headers.get('Content-Length')
if (contentLengthHeader == null) {
logger.warn(
'CLSI did not provide Content-Length header for converted document'
)
throw new OError('CLSI response missing Content-Length header')
}
const contentLength = parseInt(contentLengthHeader, 10)
if (contentLength > Settings.maxUploadSize) {
abortController.abort()
stream.destroy()
throw new FileTooLargeError({
message: 'converted document archive too large',
info: {
size: contentLength,
},
})
}
outputStream = fs.createWriteStream(outputPath)
await pipeline(stream, outputStream)
logger.debug({ outputPath }, 'received converted file from CLSI')
} catch (error) {
logger.error({ err: error }, 'error during document conversion')
outputStream?.destroy()
// Make sure to clean up the output file if conversion didn't work
await fsPromises.unlink(outputPath).catch(() => {})
if (error instanceof FileTooLargeError) {
throw error
}
throw new OError('document conversion failed').withCause(error)
}
return outputPath
}
export default {
promises: {
convertDocxToLaTeXZipArchive,
},
}

View File

@@ -1,6 +1,7 @@
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import fs from 'node:fs'
import fsPromises from 'node:fs/promises'
import Path from 'node:path'
import FileSystemImportManager from './FileSystemImportManager.mjs'
import ProjectUploadManager from './ProjectUploadManager.mjs'
@@ -12,7 +13,8 @@ import { InvalidZipFileError } from './ArchiveErrors.mjs'
import multer from 'multer'
import lodash from 'lodash'
import { expressify } from '@overleaf/promise-utils'
import { DuplicateNameError } from '../Errors/Errors.js'
import { DuplicateNameError, FileTooLargeError } from '../Errors/Errors.js'
import DocumentConversionManager from './DocumentConversionManager.mjs'
const defaultsDeep = lodash.defaultsDeep
@@ -166,6 +168,53 @@ async function uploadFile(req, res, next) {
)
}
/**
* @param {any} req
* @param {any} res
* @param {any} next
*/
async function importDocx(req, res, next) {
const userId = SessionManager.getLoggedInUserId(req.session)
logger.debug({ path: req.file?.path, userId }, 'importing docx file')
const { path } = req.file
const name = Path.basename(req.body.name, '.docx')
try {
const archivePath =
await DocumentConversionManager.promises.convertDocxToLaTeXZipArchive(
path,
userId
)
try {
const project =
await ProjectUploadManager.promises.createProjectFromZipArchive(
userId,
name,
archivePath
)
res.json({ success: true, project_id: project._id })
} finally {
await fsPromises.unlink(archivePath).catch(() => {})
}
} catch (error) {
logger.error({ error }, 'error importing docx file')
if (
error instanceof FileTooLargeError ||
error?.name === 'FileTooLargeError'
) {
return res.status(422).json({
success: false,
error: 'file_too_large',
})
}
res.status(500).json({
success: false,
error: req.i18n.translate('upload_failed'),
})
} finally {
await fsPromises.unlink(path).catch(() => {})
}
}
/**
* @param {any} req
* @param {any} res
@@ -202,4 +251,5 @@ export default {
uploadProject,
uploadFile: expressify(uploadFile),
multerMiddleware,
importDocx: expressify(importDocx),
}

View File

@@ -26,6 +26,14 @@ export default {
ProjectUploadController.uploadProject
)
webRouter.post(
'/project/new/import-docx',
AuthenticationController.requireLogin(),
RateLimiterMiddleware.rateLimit(rateLimiters.projectUpload),
ProjectUploadController.multerMiddleware,
ProjectUploadController.importDocx
)
const fileUploadEndpoint = '/Project/:Project_id/upload'
const fileUploadRateLimit = RateLimiterMiddleware.rateLimit(
rateLimiters.fileUpload,

View File

@@ -885,6 +885,7 @@
"import_from_github": "",
"import_idp_metadata": "",
"import_to_sharelatex": "",
"import_word_document": "",
"imported_from_another_project_at_date": "",
"imported_from_external_provider_at_date": "",
"imported_from_mendeley_at_date": "",

View File

@@ -19,6 +19,7 @@ import {
} from '@/shared/components/dropdown/dropdown-menu'
import { useSendProjectListMB } from '@/features/project-list/components/project-list-events'
import type { PortalTemplate } from '../../../../../types/portal-template'
import { useFeatureFlag } from '@/shared/context/split-test-context'
type SendTrackingEvent = {
dropdownMenu: string
@@ -57,6 +58,7 @@ function NewProjectButton({
const portalTemplates = getMeta('ol-portalTemplates') || []
const { show: enableAddAffiliationWidget } = useAddAffiliation()
const sendProjectListMB = useSendProjectListMB()
const docxImportEnabled = useFeatureFlag('import-docx')
const sendTrackingEvent = useCallback(
({
dropdownMenu,
@@ -208,6 +210,20 @@ function NewProjectButton({
{t('upload_project')}
</DropdownItem>
</li>
{docxImportEnabled && (
<li role="none">
<DropdownItem
onClick={e =>
handleModalMenuClick(e, {
modalVariant: 'import_docx',
dropdownMenuEvent: 'import-docx',
})
}
>
{t('import_word_document')}
</DropdownItem>
</li>
)}
<li role="none">
{ImportProjectFromGithubMenu && (
<ImportProjectFromGithubMenu

View File

@@ -0,0 +1,127 @@
import { useEffect, useState } from 'react'
import {
OLModal,
OLModalBody,
OLModalFooter,
OLModalHeader,
OLModalTitle,
} from '@/shared/components/ol/ol-modal'
import OLButton from '@/shared/components/ol/ol-button'
import { useTranslation } from 'react-i18next'
import Uppy from '@uppy/core'
import { Dashboard } from '@uppy/react'
import XHRUpload from '@uppy/xhr-upload'
import getMeta from '../../../../utils/meta'
import '@uppy/core/dist/style.css'
import '@uppy/dashboard/dist/style.css'
type ImportResponse = {
project_id: string
}
type ImportDocxModalProps = {
onHide: () => void
openProject: (projectId: string) => void
}
function ImportDocxModal({ onHide, openProject }: ImportDocxModalProps) {
const { t } = useTranslation()
const { maxUploadSize, projectUploadTimeout } = getMeta('ol-ExposedSettings')
const [ableToUpload, setAbleToUpload] = useState(false)
const [uppy] = useState(() => {
return new Uppy({
allowMultipleUploadBatches: false,
restrictions: {
maxNumberOfFiles: 1,
maxFileSize: maxUploadSize,
allowedFileTypes: ['.docx'],
},
})
.use(XHRUpload, {
endpoint: '/project/new/import-docx',
headers: {
'X-CSRF-TOKEN': getMeta('ol-csrfToken'),
},
limit: 1,
fieldName: 'qqfile', // "qqfile" is needed for our express multer middleware
timeout: projectUploadTimeout,
})
.on('file-added', () => {
// this function can be invoked multiple times depending on maxNumberOfFiles
// in this case, since have maxNumberOfFiles = 1, this function will be invoked
// once if the correct file were added
// if user dragged more files than the maxNumberOfFiles allow,
// the rest of the files will appear on the 'restriction-failed' event callback
setAbleToUpload(true)
})
.on('upload-error', () => {
// refresh state so they can try uploading a new zip
setAbleToUpload(false)
})
.on('upload-success', async (file, response) => {
const { project_id: projectId }: ImportResponse = response.body
if (projectId) {
openProject(projectId)
}
})
.on('restriction-failed', () => {
// 'restriction-failed event will be invoked when one of the "restrictions" above
// is not complied:
// 1. maxNumberOfFiles: if the uploaded files is more than 1, the rest of the files will appear here
// for example, user drop 5 files to the uploader, this function will be invoked 4 times and the `file-added` event
// will be invoked once
// 2. maxFileSize: if the uploaded file has size > maxFileSize, it will appear here
// 3. allowedFileTypes: if the type is not .zip, it will also appear here
// reset state so they can try uploading a different file, etc
setAbleToUpload(false)
})
})
useEffect(() => {
if (ableToUpload) {
uppy.upload()
}
}, [ableToUpload, uppy])
return (
<OLModal
show
animation
onHide={onHide}
id="upload-project-modal"
backdrop="static"
>
<OLModalHeader>
<OLModalTitle as="h3">{t('import_word_document')}</OLModalTitle>
</OLModalHeader>
<OLModalBody>
<Dashboard
uppy={uppy}
proudlyDisplayPoweredByUppy={false}
showLinkToFileUploadResult={false}
hideUploadButton
showSelectedFiles={false}
height={300}
locale={{
strings: {
browseFiles: 'Select a .docx file',
dropPasteFiles: '%{browseFiles} or \n\n drag a .docx file',
},
}}
className="project-list-upload-project-modal-uppy-dashboard"
/>
</OLModalBody>
<OLModalFooter>
<OLButton variant="secondary" onClick={onHide}>
{t('cancel')}
</OLButton>
</OLModalFooter>
</OLModal>
)
}
export default ImportDocxModal

View File

@@ -7,12 +7,14 @@ import { FullSizeLoadingSpinner } from '@/shared/components/loading-spinner'
import { useLocation } from '@/shared/hooks/use-location'
const UploadProjectModal = lazy(() => import('./upload-project-modal'))
const ImportDocxModal = lazy(() => import('./import-docx-modal'))
export type NewProjectButtonModalVariant =
| 'blank_project'
| 'example_project'
| 'upload_project'
| 'import_from_github'
| 'import_docx'
type NewProjectButtonModalProps = {
modal: Nullable<NewProjectButtonModalVariant>
@@ -47,6 +49,12 @@ function NewProjectButtonModal({ modal, onHide }: NewProjectButtonModalProps) {
<UploadProjectModal onHide={onHide} openProject={openProject} />
</Suspense>
)
case 'import_docx':
return (
<Suspense fallback={<FullSizeLoadingSpinner delay={500} />}>
<ImportDocxModal onHide={onHide} openProject={openProject} />
</Suspense>
)
case 'import_from_github':
return <ImportProjectFromGithubModalWrapper onHide={onHide} />
default:

View File

@@ -12,6 +12,7 @@ import {
DropdownToggle,
} from '@/shared/components/dropdown/dropdown-menu'
import createNewProjectImage from '../../images/create-a-new-project.svg'
import { useFeatureFlag } from '@/shared/context/split-test-context'
const CustomDropdownToggle = forwardRef<
HTMLButtonElement,
@@ -59,6 +60,7 @@ function WelcomeMessageCreateNewProjectDropdown({
}: WelcomeMessageCreateNewProjectDropdownProps) {
const { t } = useTranslation()
const portalTemplates = getMeta('ol-portalTemplates') || []
const docxImportEnabled = useFeatureFlag('import-docx')
const { isOverleaf } = getMeta('ol-ExposedSettings')
@@ -134,6 +136,19 @@ function WelcomeMessageCreateNewProjectDropdown({
{t('upload_project')}
</DropdownItem>
</li>
{docxImportEnabled && (
<li role="none">
<DropdownItem
as="button"
onClick={e =>
handleDropdownItemClick(e, 'import_docx', 'import-docx')
}
tabIndex={-1}
>
{t('import_word_document')}
</DropdownItem>
</li>
)}
{isOverleaf && (
<li role="none">
<DropdownItem

View File

@@ -1111,6 +1111,7 @@
"import_from_github": "Import from GitHub",
"import_idp_metadata": "Import IdP metadata",
"import_to_sharelatex": "Import to __appName__",
"import_word_document": "Import Word document",
"important_message": "Important message",
"imported_from_another_project_at_date": "Imported from <0>Another project</0>/__sourceEntityPathHTML__, at __formattedDate__ __relativeDate__",
"imported_from_external_provider_at_date": "Imported from <0>__shortenedUrlHTML__</0> at __formattedDate__ __relativeDate__",

View File

@@ -135,6 +135,7 @@
"express-session": "^1.17.1",
"file-type": "^21.3.4",
"focus-trap-react": "^11.0.4",
"form-data": "^4.0.5",
"globby": "^5.0.0",
"helmet": "^6.0.1",
"https-proxy-agent": "^7.0.6",

View File

@@ -1,7 +1,16 @@
import { fireEvent, render, screen } from '@testing-library/react'
import WelcomeMessage from '../../../../../frontend/js/features/project-list/components/welcome-message'
import WelcomeMessageComponent from '../../../../../frontend/js/features/project-list/components/welcome-message'
import { expect } from 'chai'
import getMeta from '@/utils/meta'
import { SplitTestProvider } from '@/shared/context/split-test-context'
const WelcomeMessage = () => {
return (
<SplitTestProvider>
<WelcomeMessageComponent />
</SplitTestProvider>
)
}
describe('<WelcomeMessage />', function () {
beforeEach(function () {

View File

@@ -0,0 +1,257 @@
import { describe, expect, vi, beforeEach } from 'vitest'
import sinon from 'sinon'
import FormData from 'form-data'
import { FileTooLargeError } from '../../../../app/src/Features/Errors/Errors.js'
const MODULE_PATH =
'../../../../app/src/Features/Uploads/DocumentConversionManager.mjs'
describe('DocumentConversionManager', function () {
beforeEach(async function (ctx) {
ctx.fs = {
createReadStream: sinon.stub().returns('mocked-read-stream'),
createWriteStream: sinon.stub().returns('mocked-write-stream'),
}
ctx.fsPromises = {
unlink: sinon.stub().resolves(),
}
ctx.fetchUtils = {
fetchStreamWithResponse: sinon.stub().resolves(),
}
ctx.nodeStream = {
pipeline: sinon.stub().resolves(),
}
ctx.CompileManager = {
promises: {
_getUserCompileLimits: sinon.stub().resolves({
compileBackendClass: 'test-backend-class',
compileGroup: 'test-compile-group',
}),
},
}
ctx.Settings = {
maxUploadSize: 100,
path: {
dumpFolder: '/path/to/dump/folder',
},
apis: {
clsi: {
url: 'http://mock-clsi-url',
},
},
}
vi.doMock('node:fs', () => ({
default: ctx.fs,
}))
vi.doMock('node:fs/promises', () => ({
default: ctx.fsPromises,
}))
vi.doMock('@overleaf/fetch-utils', () => ({
fetchStreamWithResponse: ctx.fetchUtils.fetchStreamWithResponse,
}))
vi.doMock('node:stream/promises', () => ({
pipeline: ctx.nodeStream.pipeline,
}))
vi.doMock('@overleaf/settings', () => ({
default: ctx.Settings,
}))
vi.doMock(
'../../../../app/src/Features/Compile/CompileManager.mjs',
() => ({
default: ctx.CompileManager,
})
)
ctx.DocumentConversionManager = (await import(MODULE_PATH)).default
})
describe('convertDocxToLaTeXZipArchive', function () {
describe('successfully', function () {
beforeEach(async function (ctx) {
ctx.path = '/path/to/input.docx'
ctx.userId = 'test-user-id'
ctx.outputPath = '/path/to/output.zip'
ctx.response = {
headers: {
get: sinon.stub().returns(null),
},
}
ctx.response.headers.get.withArgs('Content-Length').returns('50')
ctx.fetchUtils.fetchStreamWithResponse.resolves({
stream: 'mocked-fetch-stream',
response: ctx.response,
})
ctx.result =
await ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive(
ctx.path,
ctx.userId
)
})
it('should call fetchStreamWithResponse with the correct URL and form data', function (ctx) {
const expectedUrl = new URL(ctx.Settings.apis.clsi.url)
expectedUrl.pathname = '/convert/docx-to-latex'
expectedUrl.searchParams.set(
'compileBackendClass',
'test-backend-class'
)
expectedUrl.searchParams.set('compileGroup', 'test-compile-group')
sinon.assert.calledWith(
ctx.fetchUtils.fetchStreamWithResponse,
sinon.match(url => url.toString() === expectedUrl.toString()),
{
method: 'POST',
body: sinon.match.instanceOf(FormData),
signal: sinon.match.instanceOf(AbortSignal),
}
)
})
it('should pipe result into the output file', function (ctx) {
sinon.assert.calledWith(
ctx.nodeStream.pipeline,
'mocked-fetch-stream',
'mocked-write-stream'
)
})
it('should return a path to the output file', function (ctx) {
expect(ctx.result).to.match(
/\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/
)
})
})
describe('when an error occurs during conversion', function () {
beforeEach(async function (ctx) {
ctx.path = '/path/to/input.docx'
ctx.userId = 'test-user-id'
ctx.fetchUtils.fetchStreamWithResponse.rejects(
new Error('Conversion failed')
)
await expect(
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive(
ctx.path,
ctx.userId
)
).to.be.rejectedWith('document conversion failed')
})
it('should attempt to clean up the output file', function (ctx) {
sinon.assert.calledWith(
ctx.fsPromises.unlink,
sinon.match(
/\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/
)
)
})
})
describe('when the converted archive is too large', function () {
beforeEach(async function (ctx) {
ctx.path = '/path/to/input.docx'
ctx.userId = 'test-user-id'
ctx.stream = {
destroy: sinon.stub(),
}
ctx.response = {
headers: {
get: sinon.stub(),
},
}
ctx.response.headers.get.withArgs('Content-Length').returns('150')
ctx.fetchUtils.fetchStreamWithResponse.resolves({
stream: ctx.stream,
response: ctx.response,
})
await expect(
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive(
ctx.path,
ctx.userId
)
).to.be.rejectedWith(sinon.match.instanceOf(FileTooLargeError))
})
it('should abort the request', function (ctx) {
expect(
ctx.fetchUtils.fetchStreamWithResponse.firstCall.args[1].signal
.aborted
).to.equal(true)
})
it('should destroy the response stream', function (ctx) {
sinon.assert.calledOnce(ctx.stream.destroy)
})
it('should not write the oversized archive to disk', function (ctx) {
sinon.assert.notCalled(ctx.fs.createWriteStream)
sinon.assert.notCalled(ctx.nodeStream.pipeline)
})
it('should attempt to clean up the output path', function (ctx) {
sinon.assert.calledWith(
ctx.fsPromises.unlink,
sinon.match(
/\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/
)
)
})
})
describe('when the Content-Length header is missing', function () {
beforeEach(async function (ctx) {
ctx.path = '/path/to/input.docx'
ctx.userId = 'test-user-id'
ctx.response = {
headers: {
get: sinon.stub().returns(null),
},
}
ctx.fetchUtils.fetchStreamWithResponse.resolves({
stream: 'mocked-fetch-stream',
response: ctx.response,
})
await expect(
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive(
ctx.path,
ctx.userId
)
).to.be.rejectedWith('document conversion failed')
})
it('should not write the archive to disk', function (ctx) {
sinon.assert.notCalled(ctx.fs.createWriteStream)
sinon.assert.notCalled(ctx.nodeStream.pipeline)
})
it('should attempt to clean up the output path', function (ctx) {
sinon.assert.calledWith(
ctx.fsPromises.unlink,
sinon.match(
/\/path\/to\/dump\/folder\/[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\.zip/
)
)
})
})
})
})

View File

@@ -10,6 +10,7 @@ import sinon from 'sinon'
import MockRequest from '../helpers/MockRequest.mjs'
import MockResponse from '../helpers/MockResponse.mjs'
import ArchiveErrors from '../../../../app/src/Features/Uploads/ArchiveErrors.mjs'
import { FileTooLargeError } from '../../../../app/src/Features/Errors/Errors.js'
const modulePath =
'../../../../app/src/Features/Uploads/ProjectUploadController.mjs'
@@ -40,6 +41,11 @@ describe('ProjectUploadController', function () {
ctx.EditorController = {
promises: {},
}
ctx.DocumentConversionManager = {
promises: {
convertDocxToLaTeXZipArchive: sinon.stub(),
},
}
vi.doMock('multer', () => ({
default: sinon.stub(),
@@ -52,7 +58,7 @@ describe('ProjectUploadController', function () {
vi.doMock(
'../../../../app/src/Features/Uploads/ProjectUploadManager',
() => ({
default: (ctx.ProjectUploadManager = {}),
default: (ctx.ProjectUploadManager = { promises: {} }),
})
)
@@ -87,10 +93,21 @@ describe('ProjectUploadController', function () {
default: ctx.EditorController,
}))
vi.doMock(
'../../../../app/src/Features/Uploads/DocumentConversionManager.mjs',
() => ({
default: ctx.DocumentConversionManager,
})
)
vi.doMock('fs', () => ({
default: (ctx.fs = {}),
}))
vi.doMock('node:fs/promises', () => ({
default: (ctx.fsPromises = {}),
}))
ctx.ProjectUploadController = (await import(modulePath)).default
})
@@ -415,4 +432,113 @@ describe('ProjectUploadController', function () {
})
})
})
describe('importDocx', function () {
beforeEach(async function (ctx) {
ctx.req.file = {
path: '/path/to/uploaded/file.docx',
}
ctx.req.body = {
name: 'file.docx',
}
ctx.archivePath = '/path/to/archive.zip'
ctx.fsPromises.unlink = sinon.stub().resolves()
})
describe('successfully', async function () {
beforeEach(async function (ctx) {
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive =
sinon.stub().resolves(ctx.archivePath)
ctx.ProjectUploadManager.promises.createProjectFromZipArchive = sinon
.stub()
.resolves({
_id: 'new-project-id',
})
await new Promise(resolve => {
ctx.res.json = data => {
expect(data.success).to.be.true
expect(data.project_id).to.equal('new-project-id')
resolve()
}
ctx.ProjectUploadController.importDocx(ctx.req, ctx.res)
})
})
it('should call the DocumentConversionManager to convert the file', function (ctx) {
expect(
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive
).to.have.been.calledWith(ctx.req.file.path, ctx.user_id)
})
it('should use the resulting archive to create a new project', function (ctx) {
expect(
ctx.ProjectUploadManager.promises.createProjectFromZipArchive
).to.have.been.calledWith(ctx.user_id, 'file', ctx.archivePath)
})
it('should unlink the archive after creating the project', function (ctx) {
expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.archivePath)
})
it('should unlink the uploaded file', function (ctx) {
expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.req.file.path)
})
})
describe('unsuccessfully', async function () {
beforeEach(async function (ctx) {
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive =
sinon.stub().rejects(new Error('Conversion failed'))
await new Promise(resolve => {
ctx.res.json = data => {
expect(data.success).to.be.false
resolve()
}
ctx.ProjectUploadController.importDocx(ctx.req, ctx.res)
})
})
it('should call the DocumentConversionManager to convert the file', function (ctx) {
expect(
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive
).to.have.been.calledWith(ctx.req.file.path, ctx.user_id)
})
it('should unlink the uploaded file', function (ctx) {
expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.req.file.path)
})
it('should return http 500', function (ctx) {
expect(ctx.res.statusCode).to.equal(500)
})
})
describe('when the converted archive is too large', async function () {
beforeEach(async function (ctx) {
ctx.DocumentConversionManager.promises.convertDocxToLaTeXZipArchive =
sinon.stub().rejects(new FileTooLargeError('file too large'))
await new Promise(resolve => {
ctx.res.json = data => {
expect(data).to.deep.equal({
success: false,
error: 'file_too_large',
})
resolve()
}
ctx.ProjectUploadController.importDocx(ctx.req, ctx.res)
})
})
it('should return http 422', function (ctx) {
expect(ctx.res.statusCode).to.equal(422)
})
it('should unlink the uploaded file', function (ctx) {
expect(ctx.fsPromises.unlink).to.have.been.calledWith(ctx.req.file.path)
})
})
})
})