From 19e6fc4fe3391c1920013f1119b9a05e846f27a2 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 17 Sep 2025 12:04:30 +0100 Subject: [PATCH] Merge pull request #28506 from overleaf/bg-size-limits add size check when cloning project (logging only) GitOrigin-RevId: 1f56ed80a2d05b28c44fab8532d751ad8e758943 --- .../history-v1/api/controllers/projects.js | 26 ++ services/history-v1/api/swagger/projects.js | 48 +++ .../test/acceptance/js/api/projects.test.js | 367 +++++++++++++----- .../src/Features/Errors/ErrorController.mjs | 6 + .../src/Features/History/HistoryManager.js | 9 + .../Features/Project/ProjectDuplicator.mjs | 10 + .../acceptance/src/mocks/MockV1HistoryApi.mjs | 50 +++ .../src/Project/ProjectDuplicator.test.mjs | 11 + 8 files changed, 428 insertions(+), 99 deletions(-) diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js index e24ee81ca1..d5a6fe3afc 100644 --- a/services/history-v1/api/controllers/projects.js +++ b/services/history-v1/api/controllers/projects.js @@ -13,6 +13,7 @@ const logger = require('@overleaf/logger') const { Chunk, ChunkResponse, Blob } = require('overleaf-editor-core') const { BlobStore, + BatchBlobStore, blobHash, chunkStore, redisBuffer, @@ -377,6 +378,30 @@ function sumUpByteLength(blobs) { return blobs.reduce((sum, blob) => sum + blob.getByteLength(), 0) } +async function getBlobStats(req, res) { + const projectId = req.swagger.params.project_id.value + const blobHashes = req.swagger.params.body.value.blobHashes || [] + for (const hash of blobHashes) { + assert.blobHash(hash, 'bad hash') + } + const blobStore = new BlobStore(projectId) + const batchBlobStore = new BatchBlobStore(blobStore) + await batchBlobStore.preload(Array.from(blobHashes)) + const blobs = Array.from(batchBlobStore.blobs.values()).filter(Boolean) + const textBlobs = blobs.filter(b => b.getStringLength() !== null) + const binaryBlobs = blobs.filter(b => b.getStringLength() === null) + const textBlobBytes = sumUpByteLength(textBlobs) + const binaryBlobBytes = sumUpByteLength(binaryBlobs) + res.json({ + projectId, + textBlobBytes, + binaryBlobBytes, + totalBytes: textBlobBytes + binaryBlobBytes, + nTextBlobs: textBlobs.length, + nBinaryBlobs: binaryBlobs.length, + }) +} + async function getProjectBlobsStats(req, res) { const projectIds = req.swagger.params.body.value.projectIds const { blobs } = await getProjectBlobsBatch( @@ -425,5 +450,6 @@ module.exports = { getProjectBlob: expressify(getProjectBlob), headProjectBlob: expressify(headProjectBlob), copyProjectBlob: expressify(copyProjectBlob), + getBlobStats: expressify(getBlobStats), getProjectBlobsStats: expressify(getProjectBlobsStats), } diff --git a/services/history-v1/api/swagger/projects.js b/services/history-v1/api/swagger/projects.js index f008670ad8..fd68d570e9 100644 --- a/services/history-v1/api/swagger/projects.js +++ b/services/history-v1/api/swagger/projects.js @@ -77,6 +77,54 @@ exports.paths = { ], }, }, + '/projects/{project_id}/blob-stats': { + post: { + 'x-swagger-router-controller': 'projects', + operationId: 'getBlobStats', + tags: ['Project'], + description: 'Get specific blob stats for a project.', + consumes: ['application/json'], + parameters: [ + { + name: 'project_id', + in: 'path', + description: 'project id', + required: true, + type: 'string', + }, + { + name: 'body', + in: 'body', + required: true, + schema: { + type: 'object', + properties: { + blobHashes: { + type: 'array', + items: { + type: 'string', + }, + }, + }, + required: ['blobHashes'], + }, + }, + ], + responses: { + 200: { + description: 'Success', + schema: { + $ref: '#/definitions/ProjectBlobStats', + }, + }, + }, + security: [ + { + basic: [], + }, + ], + }, + }, '/projects/{project_id}': { delete: { 'x-swagger-router-controller': 'projects', diff --git a/services/history-v1/test/acceptance/js/api/projects.test.js b/services/history-v1/test/acceptance/js/api/projects.test.js index fbae11fb25..d72ea66ff0 100644 --- a/services/history-v1/test/acceptance/js/api/projects.test.js +++ b/services/history-v1/test/acceptance/js/api/projects.test.js @@ -2,6 +2,7 @@ const { expect } = require('chai') const fs = require('node:fs') +const { Readable } = require('node:stream') const HTTPStatus = require('http-status') const fetch = require('node-fetch') const sinon = require('sinon') @@ -15,6 +16,7 @@ const { BlobStore, persistChanges, redisBuffer, + blobHash, } = require('../../../../storage') const { expectHttpError } = require('./support/expect_response') @@ -145,88 +147,152 @@ describe('project controller', function () { await populateProject(populatedMongoProjectId) }) - it('handles empty postgres project', async function () { - const { body } = - await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ - body: { projectIds: [emptyPostgresProjectId] }, - }) - expect(body).to.deep.equal([ - { - projectId: emptyPostgresProjectId, - textBlobBytes: 0, - binaryBlobBytes: 0, - totalBytes: 0, - nTextBlobs: 0, - nBinaryBlobs: 0, - }, - ]) - }) - it('handles populated postgres project', async function () { - const { body } = - await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ - body: { projectIds: [populatedPostgresProjectId] }, - }) - expect(body).to.deep.equal([ - { - projectId: populatedPostgresProjectId, - textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, - binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, - totalBytes: - testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, - nTextBlobs: 1, - nBinaryBlobs: 1, - }, - ]) - }) - - it('handles empty mongo project', async function () { - const { body } = - await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ - body: { projectIds: [emptyMongoProjectId] }, - }) - expect(body).to.deep.equal([ - { - projectId: emptyMongoProjectId, - textBlobBytes: 0, - binaryBlobBytes: 0, - totalBytes: 0, - nTextBlobs: 0, - nBinaryBlobs: 0, - }, - ]) - }) - it('handles populated mongo project', async function () { - const { body } = - await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ - body: { projectIds: [populatedMongoProjectId] }, - }) - expect(body).to.deep.equal([ - { - projectId: populatedMongoProjectId, - textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, - binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, - totalBytes: - testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, - nTextBlobs: 1, - nBinaryBlobs: 1, - }, - ]) - }) - - it('handles batch of projects', async function () { - const { body } = - await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ - body: { - projectIds: [ - populatedPostgresProjectId, - populatedMongoProjectId, - emptyPostgresProjectId, - emptyMongoProjectId, - ], + describe('getProjectBlobsStats', function () { + it('handles empty postgres project', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ + body: { projectIds: [emptyPostgresProjectId] }, + }) + expect(body).to.deep.equal([ + { + projectId: emptyPostgresProjectId, + textBlobBytes: 0, + binaryBlobBytes: 0, + totalBytes: 0, + nTextBlobs: 0, + nBinaryBlobs: 0, }, + ]) + }) + it('handles populated postgres project', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ + body: { projectIds: [populatedPostgresProjectId] }, + }) + expect(body).to.deep.equal([ + { + projectId: populatedPostgresProjectId, + textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, + binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, + totalBytes: + testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, + nTextBlobs: 1, + nBinaryBlobs: 1, + }, + ]) + }) + + it('handles empty mongo project', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ + body: { projectIds: [emptyMongoProjectId] }, + }) + expect(body).to.deep.equal([ + { + projectId: emptyMongoProjectId, + textBlobBytes: 0, + binaryBlobBytes: 0, + totalBytes: 0, + nTextBlobs: 0, + nBinaryBlobs: 0, + }, + ]) + }) + it('handles populated mongo project', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ + body: { projectIds: [populatedMongoProjectId] }, + }) + expect(body).to.deep.equal([ + { + projectId: populatedMongoProjectId, + textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, + binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, + totalBytes: + testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, + nTextBlobs: 1, + nBinaryBlobs: 1, + }, + ]) + }) + + it('handles batch of projects', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({ + body: { + projectIds: [ + populatedPostgresProjectId, + populatedMongoProjectId, + emptyPostgresProjectId, + emptyMongoProjectId, + ], + }, + }) + expect(body).to.deep.equal([ + { + projectId: populatedPostgresProjectId, + textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, + binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, + totalBytes: + testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, + nTextBlobs: 1, + nBinaryBlobs: 1, + }, + { + projectId: populatedMongoProjectId, + textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, + binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, + totalBytes: + testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, + nTextBlobs: 1, + nBinaryBlobs: 1, + }, + { + projectId: emptyPostgresProjectId, + textBlobBytes: 0, + binaryBlobBytes: 0, + totalBytes: 0, + nTextBlobs: 0, + nBinaryBlobs: 0, + }, + { + projectId: emptyMongoProjectId, + textBlobBytes: 0, + binaryBlobBytes: 0, + totalBytes: 0, + nTextBlobs: 0, + nBinaryBlobs: 0, + }, + ]) + }) + }) + + describe('getBlobStats', function () { + it('handles empty list of hashes', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: populatedPostgresProjectId, + body: { blobHashes: [] }, + }) + expect(body).to.deep.equal({ + projectId: populatedPostgresProjectId, + textBlobBytes: 0, + binaryBlobBytes: 0, + totalBytes: 0, + nTextBlobs: 0, + nBinaryBlobs: 0, }) - expect(body).to.deep.equal([ - { + }) + + it('handles a mix of text and binary blobs', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: populatedPostgresProjectId, + body: { + blobHashes: [testFiles.HELLO_TXT_HASH, testFiles.GRAPH_PNG_HASH], + }, + }) + expect(body).to.deep.equal({ projectId: populatedPostgresProjectId, textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, @@ -234,33 +300,136 @@ describe('project controller', function () { testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, nTextBlobs: 1, nBinaryBlobs: 1, - }, - { - projectId: populatedMongoProjectId, + }) + }) + + it('handles only text blobs', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: populatedPostgresProjectId, + body: { + blobHashes: [testFiles.HELLO_TXT_HASH], + }, + }) + expect(body).to.deep.equal({ + projectId: populatedPostgresProjectId, textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH, - binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, - totalBytes: - testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH, + binaryBlobBytes: 0, + totalBytes: testFiles.HELLO_TXT_BYTE_LENGTH, nTextBlobs: 1, + nBinaryBlobs: 0, + }) + }) + + it('handles only binary blobs', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: populatedPostgresProjectId, + body: { + blobHashes: [testFiles.GRAPH_PNG_HASH], + }, + }) + expect(body).to.deep.equal({ + projectId: populatedPostgresProjectId, + textBlobBytes: 0, + binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, + totalBytes: testFiles.GRAPH_PNG_BYTE_LENGTH, + nTextBlobs: 0, nBinaryBlobs: 1, - }, - { - projectId: emptyPostgresProjectId, + }) + }) + + it('handles non-existent blobs', async function () { + const { body } = + await testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: populatedPostgresProjectId, + body: { + blobHashes: [testFiles.STRING_AB_HASH], + }, + }) + expect(body).to.deep.equal({ + projectId: populatedPostgresProjectId, textBlobBytes: 0, binaryBlobBytes: 0, totalBytes: 0, nTextBlobs: 0, nBinaryBlobs: 0, - }, - { - projectId: emptyMongoProjectId, - textBlobBytes: 0, - binaryBlobBytes: 0, - totalBytes: 0, - nTextBlobs: 0, - nBinaryBlobs: 0, - }, - ]) + }) + }) + + it('throws an error for bad hashes', async function () { + await expectHttpError( + testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: populatedPostgresProjectId, + body: { + blobHashes: ['non-existent-hash'], + }, + }), + HTTPStatus.INTERNAL_SERVER_ERROR + ) + }) + + it('handles a request with a large number of blobs', async function () { + const projectId = await testProjects.createEmptyProject() + const blobHashes = [] + let expectedTextBytes = 0 + let expectedBinaryBytes = 0 + const nTextBlobs = 10 + const nBinaryBlobs = 10 + + for (let i = 0; i < nTextBlobs; i++) { + const content = `text blob ${i}` + const hash = blobHash.fromString(content) + blobHashes.push(hash) + expectedTextBytes += content.length + const res = await fetch( + testServer.url(`/api/projects/${projectId}/blobs/${hash}`), + { + method: 'PUT', + body: content, + headers: { Authorization: testServer.basicAuthHeader }, + } + ) + expect(res.status).to.equal(HTTPStatus.CREATED) + } + + for (let i = 0; i < nBinaryBlobs; i++) { + const content = Buffer.from([0, i, i + 1, i + 2]) + const hash = await blobHash.fromStream( + content.length, + Readable.from(content) + ) + blobHashes.push(hash) + expectedBinaryBytes += content.length + const res = await fetch( + testServer.url(`/api/projects/${projectId}/blobs/${hash}`), + { + method: 'PUT', + body: content, + headers: { + Authorization: testServer.basicAuthHeader, + 'Content-Type': 'application/octet-stream', + }, + } + ) + expect(res.status).to.equal(HTTPStatus.CREATED) + } + + const { body } = + await testServer.basicAuthClient.apis.Project.getBlobStats({ + project_id: projectId, + body: { blobHashes }, + }) + + expect(body).to.deep.equal({ + projectId, + textBlobBytes: expectedTextBytes, + binaryBlobBytes: expectedBinaryBytes, + totalBytes: expectedTextBytes + expectedBinaryBytes, + nTextBlobs, + nBinaryBlobs, + }) + }) }) }) diff --git a/services/web/app/src/Features/Errors/ErrorController.mjs b/services/web/app/src/Features/Errors/ErrorController.mjs index 908c9e02db..d608078aee 100644 --- a/services/web/app/src/Features/Errors/ErrorController.mjs +++ b/services/web/app/src/Features/Errors/ErrorController.mjs @@ -89,6 +89,12 @@ async function handleError(error, req, res, next) { if (shouldSendErrorResponse) { HttpErrorHandler.badRequest(req, res, error.message) } + } else if (error instanceof Errors.FileTooLargeError) { + req.logger.setLevel('warn') + if (shouldSendErrorResponse) { + res.status(400) + plainTextResponse(res, error.message) + } } else if (isZodErrorLike(error)) { req.logger.setLevel('warn') res.status(400) diff --git a/services/web/app/src/Features/History/HistoryManager.js b/services/web/app/src/Features/History/HistoryManager.js index 46d8831a55..30be65db38 100644 --- a/services/web/app/src/Features/History/HistoryManager.js +++ b/services/web/app/src/Features/History/HistoryManager.js @@ -307,6 +307,14 @@ async function getHistoryId(projectId) { return historyId } +async function getBlobStats(historyId, blobHashes) { + return await fetchJson(`${HISTORY_V1_URL}/projects/${historyId}/blob-stats`, { + method: 'POST', + basicAuth: HISTORY_V1_BASIC_AUTH, + json: { blobHashes: blobHashes.map(id => id.toString()) }, + }) +} + async function getProjectBlobStats(historyIds) { return await fetchJson(`${HISTORY_V1_URL}/projects/blob-stats`, { method: 'POST', @@ -427,5 +435,6 @@ module.exports = { getLatestHistory, getChanges, getProjectBlobStats, + getBlobStats, }, } diff --git a/services/web/app/src/Features/Project/ProjectDuplicator.mjs b/services/web/app/src/Features/Project/ProjectDuplicator.mjs index 5014eff139..f4788b1242 100644 --- a/services/web/app/src/Features/Project/ProjectDuplicator.mjs +++ b/services/web/app/src/Features/Project/ProjectDuplicator.mjs @@ -20,6 +20,7 @@ import TpdsProjectFlusher from '../ThirdPartyDataStore/TpdsProjectFlusher.js' import _ from 'lodash' import TagsHandler from '../Tags/TagsHandler.js' import ClsiCacheManager from '../Compile/ClsiCacheManager.js' +import Modules from '../../infrastructure/Modules.js' export default { duplicate: callbackify(duplicate), @@ -48,6 +49,15 @@ async function duplicate(owner, originalProjectId, newProjectName, tags = []) { const originalEntries = _getFolderEntries(originalProject.rootFolder[0]) + await Modules.promises.hooks.fire('preDuplicateProject', { + owner, + originalProjectId, + newProjectName, + tags, + originalProject, + originalEntries, + }) + // Pass template ID as analytics segmentation if duplicating project from a template const segmentation = _.pick(originalProject, [ 'fromV1TemplateId', diff --git a/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs b/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs index 2dfdf4478c..8c9db1de78 100644 --- a/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs +++ b/services/web/test/acceptance/src/mocks/MockV1HistoryApi.mjs @@ -31,6 +31,56 @@ class MockV1HistoryApi extends AbstractMockApi { ) }) + this.app.post('/api/projects/:historyId/blob-stats', (req, res, next) => { + const { historyId } = req.params + const { blobHashes } = req.body + + let textBlobBytes = 0 + let binaryBlobBytes = 0 + let nTextBlobs = 0 + let nBinaryBlobs = 0 + + // Calculate actual sizes from uploaded blobs + if (blobHashes && this.blobs[historyId]) { + for (const hash of blobHashes) { + const buf = this.blobs[historyId][hash] + if (buf) { + const size = buf.byteLength + + // Check if the blob content is valid UTF-8 + let isText = false + try { + const decoder = new TextDecoder('utf-8', { fatal: true }) + decoder.decode(buf) + isText = true + } catch (e) { + // Not valid UTF-8, treat as binary + isText = false + } + + if (isText) { + textBlobBytes += size + nTextBlobs++ + } else { + binaryBlobBytes += size + nBinaryBlobs++ + } + } + } + } + + const totalBytes = textBlobBytes + binaryBlobBytes + + res.json({ + projectId: historyId, + textBlobBytes, + binaryBlobBytes, + totalBytes, + nTextBlobs, + nBinaryBlobs, + }) + }) + this.app.get( '/api/projects/:project_id/version/:version/zip', (req, res, next) => { diff --git a/services/web/test/unit/src/Project/ProjectDuplicator.test.mjs b/services/web/test/unit/src/Project/ProjectDuplicator.test.mjs index f7fc10d731..1184706903 100644 --- a/services/web/test/unit/src/Project/ProjectDuplicator.test.mjs +++ b/services/web/test/unit/src/Project/ProjectDuplicator.test.mjs @@ -212,6 +212,13 @@ describe('ProjectDuplicator', function () { flushProjectToTpds: sinon.stub().resolves(), }, } + ctx.Modules = { + promises: { + hooks: { + fire: sinon.stub().resolves([]), + }, + }, + } vi.doMock('../../../../app/src/models/Doc', () => ({ Doc: ctx.Doc, @@ -287,6 +294,10 @@ describe('ProjectDuplicator', function () { default: ctx.HistoryManager, })) + vi.doMock('../../../../app/src/infrastructure/Modules', () => ({ + default: ctx.Modules, + })) + vi.doMock('../../../../app/src/Features/Compile/ClsiCacheManager', () => ({ default: { prepareClsiCache: sinon.stub().rejects(new Error('ignore this')),