mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 09:09:36 +02:00
Merge pull request #33277 from overleaf/mj-pandoc-clsi-two-step-download
[clsi] Use clsi-nginx for downloading pandoc exports GitOrigin-RevId: b6013fae6f53c7af714634d700ceed491d724653
This commit is contained in:
committed by
Copybot
parent
ae31ad218c
commit
32da6548c8
@@ -1,8 +1,11 @@
|
||||
import crypto from 'node:crypto'
|
||||
import logger from '@overleaf/logger'
|
||||
import { expressify } from '@overleaf/promise-utils'
|
||||
import fs from 'node:fs/promises'
|
||||
import fsSync from 'node:fs'
|
||||
import ConversionManager from './ConversionManager.js'
|
||||
import ConversionOutputCleaner from './ConversionOutputCleaner.js'
|
||||
import OutputCacheManager from './OutputCacheManager.js'
|
||||
import ResourceWriter from './ResourceWriter.js'
|
||||
import RequestParser from './RequestParser.js'
|
||||
import { pipeline } from 'node:stream/promises'
|
||||
@@ -70,6 +73,8 @@ async function convertProjectToDocument(req, res) {
|
||||
request.user_id = req.params.user_id
|
||||
request.metricsOpts = {}
|
||||
|
||||
const responseFormat = req.query.responseFormat === 'json' ? 'json' : 'stream'
|
||||
|
||||
const conversionId = crypto.randomUUID()
|
||||
const conversionDir = Path.join(Settings.path.compilesDir, conversionId)
|
||||
|
||||
@@ -94,12 +99,31 @@ async function convertProjectToDocument(req, res) {
|
||||
type
|
||||
)
|
||||
|
||||
const documentStat = await fs.stat(documentPath)
|
||||
res.setHeader('Content-Length', documentStat.size)
|
||||
res.attachment(`output.${config.extension}`)
|
||||
res.setHeader('X-Content-Type-Options', 'nosniff')
|
||||
const readStream = fsSync.createReadStream(documentPath)
|
||||
await pipeline(readStream, res)
|
||||
const outputName = `output.${config.extension}`
|
||||
if (responseFormat === 'json') {
|
||||
// TODO: drop the streaming branch once web is migrated to the two-step flow
|
||||
const buildId = await OutputCacheManager.promises.generateBuildId()
|
||||
const buildDir = Path.join(
|
||||
Settings.path.outputDir,
|
||||
conversionId,
|
||||
OutputCacheManager.CACHE_SUBDIR,
|
||||
buildId
|
||||
)
|
||||
try {
|
||||
await fs.mkdir(buildDir, { recursive: true })
|
||||
await fs.copyFile(documentPath, Path.join(buildDir, outputName))
|
||||
res.json({ conversionId, buildId, file: outputName })
|
||||
} finally {
|
||||
ConversionOutputCleaner.scheduleCleanup(conversionId)
|
||||
}
|
||||
} else {
|
||||
const documentStat = await fs.stat(documentPath)
|
||||
res.setHeader('Content-Length', documentStat.size)
|
||||
res.attachment(outputName)
|
||||
res.setHeader('X-Content-Type-Options', 'nosniff')
|
||||
const readStream = fsSync.createReadStream(documentPath)
|
||||
await pipeline(readStream, res)
|
||||
}
|
||||
} finally {
|
||||
await fs.rm(conversionDir, { recursive: true, force: true }).catch(() => {})
|
||||
}
|
||||
|
||||
23
services/clsi/app/js/ConversionOutputCleaner.js
Normal file
23
services/clsi/app/js/ConversionOutputCleaner.js
Normal file
@@ -0,0 +1,23 @@
|
||||
import fs from 'node:fs/promises'
|
||||
import Path from 'node:path'
|
||||
import logger from '@overleaf/logger'
|
||||
import Settings from '@overleaf/settings'
|
||||
|
||||
const TTL_MS = 60 * 1000
|
||||
|
||||
function scheduleCleanup(conversionId, ttlMs = TTL_MS) {
|
||||
const conversionOutputDir = Path.join(Settings.path.outputDir, conversionId)
|
||||
setTimeout(() => {
|
||||
fs.rm(conversionOutputDir, { recursive: true, force: true }).catch(err => {
|
||||
logger.warn(
|
||||
{ err, conversionId, conversionOutputDir },
|
||||
'failed to clean up conversion output directory'
|
||||
)
|
||||
})
|
||||
}, ttlMs)
|
||||
}
|
||||
|
||||
export default {
|
||||
TTL_MS,
|
||||
scheduleCleanup,
|
||||
}
|
||||
@@ -684,4 +684,5 @@ OutputCacheManager.promises = {
|
||||
OutputCacheManager.saveOutputFilesInBuildDir
|
||||
),
|
||||
queueDirOperation,
|
||||
generateBuildId: promisify(OutputCacheManager.generateBuildId),
|
||||
}
|
||||
|
||||
@@ -3,8 +3,11 @@ import ClsiApp from './helpers/ClsiApp.js'
|
||||
import Path from 'node:path'
|
||||
import fs from 'node:fs'
|
||||
import { pipeline } from 'node:stream/promises'
|
||||
import { buffer } from 'node:stream/consumers'
|
||||
import yauzl from 'yauzl'
|
||||
import { expect } from 'chai'
|
||||
import { fetchStreamWithResponse } from '@overleaf/fetch-utils'
|
||||
import Settings from '@overleaf/settings'
|
||||
|
||||
describe('Conversions', function () {
|
||||
describe('docx conversion', function () {
|
||||
@@ -81,4 +84,58 @@ describe('Conversions', function () {
|
||||
.rejected
|
||||
})
|
||||
})
|
||||
|
||||
describe('project-to-document conversion (responseFormat=json)', function () {
|
||||
before(async function () {
|
||||
await ClsiApp.ensureRunning()
|
||||
})
|
||||
|
||||
it('returns ids and serves the docx output via nginx', async function () {
|
||||
const projectId = Client.randomId()
|
||||
const userId = '0123456789abcdef01234567'
|
||||
const request = {
|
||||
rootResourcePath: 'main.tex',
|
||||
resources: [
|
||||
{
|
||||
path: 'main.tex',
|
||||
content: `\
|
||||
\\documentclass{article}
|
||||
\\begin{document}
|
||||
Hello world
|
||||
\\end{document}\
|
||||
`,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const { conversionId, buildId, file } =
|
||||
await Client.convertProjectToDocument(
|
||||
projectId,
|
||||
userId,
|
||||
'docx',
|
||||
request,
|
||||
'json'
|
||||
)
|
||||
|
||||
expect(conversionId).to.match(
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
||||
)
|
||||
expect(buildId).to.match(/^[0-9a-f]+-[0-9a-f]+$/)
|
||||
|
||||
const downloadUrl = new URL(Settings.apis.clsi.downloadHost)
|
||||
downloadUrl.pathname = `/project/${conversionId}/build/${buildId}/output/${file}`
|
||||
const { stream, response } = await fetchStreamWithResponse(
|
||||
downloadUrl.href
|
||||
)
|
||||
expect(response.status).to.equal(200)
|
||||
|
||||
const body = await buffer(stream)
|
||||
expect(body.length).to.be.greaterThan(0)
|
||||
// .docx is a zip archive — verify the PK\x03\x04 magic bytes
|
||||
expect(body[0]).to.equal(0x50)
|
||||
expect(body[1]).to.equal(0x4b)
|
||||
expect(body[2]).to.equal(0x03)
|
||||
expect(body[3]).to.equal(0x04)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -39,6 +39,27 @@ async function convertDocument(path, type) {
|
||||
})
|
||||
}
|
||||
|
||||
async function convertProjectToDocument(
|
||||
projectId,
|
||||
userId,
|
||||
type,
|
||||
request,
|
||||
responseFormat
|
||||
) {
|
||||
const url = new URL(
|
||||
`${host}/project/${projectId}/user/${userId}/download/project-to-document`
|
||||
)
|
||||
url.searchParams.set('type', type)
|
||||
if (responseFormat) {
|
||||
url.searchParams.set('responseFormat', responseFormat)
|
||||
}
|
||||
const opts = { method: 'POST', json: { compile: request } }
|
||||
if (responseFormat === 'json') {
|
||||
return await fetchJson(url.href, opts)
|
||||
}
|
||||
return await fetchStream(url.href, opts)
|
||||
}
|
||||
|
||||
async function stopCompile(projectId) {
|
||||
return await fetchNothing(`${host}/project/${projectId}/compile/stop`, {
|
||||
method: 'POST',
|
||||
@@ -202,6 +223,7 @@ function smokeTest() {
|
||||
export default {
|
||||
randomId,
|
||||
compile,
|
||||
convertProjectToDocument,
|
||||
convertDocument,
|
||||
stopCompile,
|
||||
clearCache,
|
||||
|
||||
@@ -17,7 +17,16 @@ describe('ConversionController', function () {
|
||||
ctx.documentStat = { size: 5678 }
|
||||
ctx.Settings = {
|
||||
enablePandocConversions: true,
|
||||
path: { compilesDir: '/compiles' },
|
||||
path: { compilesDir: '/compiles', outputDir: '/output' },
|
||||
}
|
||||
ctx.OutputCacheManager = {
|
||||
CACHE_SUBDIR: 'generated-files',
|
||||
promises: {
|
||||
generateBuildId: sinon.stub().resolves('00000000001-0000000000000001'),
|
||||
},
|
||||
}
|
||||
ctx.ConversionOutputCleaner = {
|
||||
scheduleCleanup: sinon.stub(),
|
||||
}
|
||||
ctx.parsedRequest = { rootResourcePath: 'main.tex' }
|
||||
ctx.ConversionManager = {
|
||||
@@ -43,6 +52,8 @@ describe('ConversionController', function () {
|
||||
stat: sinon.stub().resolves(ctx.zipStat),
|
||||
unlink: sinon.stub().resolves(),
|
||||
rm: sinon.stub().resolves(),
|
||||
mkdir: sinon.stub().resolves(),
|
||||
copyFile: sinon.stub().resolves(),
|
||||
}
|
||||
|
||||
ctx.readStream = new PassThrough()
|
||||
@@ -79,9 +90,18 @@ describe('ConversionController', function () {
|
||||
default: ctx.RequestParser,
|
||||
}))
|
||||
|
||||
vi.doMock('../../../app/js/OutputCacheManager', () => ({
|
||||
default: ctx.OutputCacheManager,
|
||||
}))
|
||||
|
||||
vi.doMock('../../../app/js/ConversionOutputCleaner', () => ({
|
||||
default: ctx.ConversionOutputCleaner,
|
||||
}))
|
||||
|
||||
ctx.res = new PassThrough()
|
||||
ctx.res.attachment = sinon.stub()
|
||||
ctx.res.setHeader = sinon.stub()
|
||||
ctx.res.json = sinon.stub()
|
||||
|
||||
ctx.ConversionController = (await import(MODULE_PATH)).default
|
||||
})
|
||||
@@ -322,7 +342,7 @@ describe('ConversionController', function () {
|
||||
const uuidDirPattern =
|
||||
/^\/compiles\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
||||
|
||||
describe('successfully', function () {
|
||||
describe('successfully (default streaming response)', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
await ctx.ConversionController.convertProjectToDocument(
|
||||
ctx.req,
|
||||
@@ -376,6 +396,11 @@ describe('ConversionController', function () {
|
||||
sinon.assert.calledWith(ctx.pipeline, ctx.readStream, ctx.res)
|
||||
})
|
||||
|
||||
it('should not move the document or schedule cleanup', function (ctx) {
|
||||
sinon.assert.notCalled(ctx.fs.copyFile)
|
||||
sinon.assert.notCalled(ctx.ConversionOutputCleaner.scheduleCleanup)
|
||||
})
|
||||
|
||||
it('should clean up the conversion directory', function (ctx) {
|
||||
sinon.assert.calledWith(ctx.fs.rm, sinon.match(uuidDirPattern), {
|
||||
recursive: true,
|
||||
@@ -384,6 +409,71 @@ describe('ConversionController', function () {
|
||||
})
|
||||
})
|
||||
|
||||
describe('successfully (responseFormat=json)', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.req.query.responseFormat = 'json'
|
||||
await ctx.ConversionController.convertProjectToDocument(
|
||||
ctx.req,
|
||||
ctx.res,
|
||||
sinon.stub()
|
||||
)
|
||||
})
|
||||
|
||||
it('should move the document into the conversion output build dir', function (ctx) {
|
||||
const outputBuildDirPattern =
|
||||
/^\/output\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\/generated-files\/[0-9a-f]+-[0-9a-f]+$/
|
||||
sinon.assert.calledWith(
|
||||
ctx.fs.mkdir,
|
||||
sinon.match(outputBuildDirPattern),
|
||||
{ recursive: true }
|
||||
)
|
||||
sinon.assert.calledWith(
|
||||
ctx.fs.copyFile,
|
||||
ctx.documentPath,
|
||||
sinon.match(filePath => {
|
||||
return (
|
||||
filePath.startsWith('/output/') &&
|
||||
filePath.endsWith('/output.docx')
|
||||
)
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('should schedule cleanup of the conversion output dir', function (ctx) {
|
||||
sinon.assert.calledWith(
|
||||
ctx.ConversionOutputCleaner.scheduleCleanup,
|
||||
sinon.match(
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
it('should respond with the conversion id, build id, and file name', function (ctx) {
|
||||
sinon.assert.calledWith(
|
||||
ctx.res.json,
|
||||
sinon.match({
|
||||
conversionId: sinon.match(
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/
|
||||
),
|
||||
buildId: sinon.match(/^[0-9a-f]+-[0-9a-f]+$/),
|
||||
file: 'output.docx',
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it('should not stream the document', function (ctx) {
|
||||
sinon.assert.notCalled(ctx.fsSync.createReadStream)
|
||||
sinon.assert.notCalled(ctx.pipeline)
|
||||
})
|
||||
|
||||
it('should clean up the working conversion directory', function (ctx) {
|
||||
sinon.assert.calledWith(ctx.fs.rm, sinon.match(uuidDirPattern), {
|
||||
recursive: true,
|
||||
force: true,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('with conversionType=markdown', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.req.query = { type: 'markdown', projectName: 'My_Project' }
|
||||
|
||||
48
services/clsi/test/unit/js/ConversionOutputCleaner.test.js
Normal file
48
services/clsi/test/unit/js/ConversionOutputCleaner.test.js
Normal file
@@ -0,0 +1,48 @@
|
||||
import sinon from 'sinon'
|
||||
import { vi, describe, it, beforeEach, afterEach } from 'vitest'
|
||||
import Path from 'node:path'
|
||||
|
||||
const MODULE_PATH = Path.join(
|
||||
import.meta.dirname,
|
||||
'../../../app/js/ConversionOutputCleaner'
|
||||
)
|
||||
|
||||
describe('ConversionOutputCleaner', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.clock = sinon.useFakeTimers()
|
||||
ctx.Settings = {
|
||||
path: { outputDir: '/output' },
|
||||
}
|
||||
ctx.fs = {
|
||||
rm: sinon.stub().resolves(),
|
||||
}
|
||||
ctx.logger = {
|
||||
warn: sinon.stub(),
|
||||
}
|
||||
|
||||
vi.doMock('node:fs/promises', () => ({ default: ctx.fs }))
|
||||
vi.doMock('@overleaf/settings', () => ({ default: ctx.Settings }))
|
||||
vi.doMock('@overleaf/logger', () => ({ default: ctx.logger }))
|
||||
|
||||
ctx.ConversionOutputCleaner = (await import(MODULE_PATH)).default
|
||||
})
|
||||
|
||||
afterEach(function (ctx) {
|
||||
ctx.clock.restore()
|
||||
})
|
||||
|
||||
it('does not remove the directory before the TTL elapses', function (ctx) {
|
||||
ctx.ConversionOutputCleaner.scheduleCleanup('test-conversion-id')
|
||||
ctx.clock.tick(ctx.ConversionOutputCleaner.TTL_MS - 1)
|
||||
sinon.assert.notCalled(ctx.fs.rm)
|
||||
})
|
||||
|
||||
it('removes the conversion output directory once the TTL elapses', function (ctx) {
|
||||
ctx.ConversionOutputCleaner.scheduleCleanup('test-conversion-id')
|
||||
ctx.clock.tick(ctx.ConversionOutputCleaner.TTL_MS)
|
||||
sinon.assert.calledWith(ctx.fs.rm, '/output/test-conversion-id', {
|
||||
recursive: true,
|
||||
force: true,
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user