Merge pull request #28506 from overleaf/bg-size-limits

add size check when cloning project (logging only)

GitOrigin-RevId: 1f56ed80a2d05b28c44fab8532d751ad8e758943
This commit is contained in:
Brian Gough
2025-09-17 12:04:30 +01:00
committed by Copybot
parent fdc2ed99c7
commit 19e6fc4fe3
8 changed files with 428 additions and 99 deletions

View File

@@ -13,6 +13,7 @@ const logger = require('@overleaf/logger')
const { Chunk, ChunkResponse, Blob } = require('overleaf-editor-core')
const {
BlobStore,
BatchBlobStore,
blobHash,
chunkStore,
redisBuffer,
@@ -377,6 +378,30 @@ function sumUpByteLength(blobs) {
return blobs.reduce((sum, blob) => sum + blob.getByteLength(), 0)
}
async function getBlobStats(req, res) {
const projectId = req.swagger.params.project_id.value
const blobHashes = req.swagger.params.body.value.blobHashes || []
for (const hash of blobHashes) {
assert.blobHash(hash, 'bad hash')
}
const blobStore = new BlobStore(projectId)
const batchBlobStore = new BatchBlobStore(blobStore)
await batchBlobStore.preload(Array.from(blobHashes))
const blobs = Array.from(batchBlobStore.blobs.values()).filter(Boolean)
const textBlobs = blobs.filter(b => b.getStringLength() !== null)
const binaryBlobs = blobs.filter(b => b.getStringLength() === null)
const textBlobBytes = sumUpByteLength(textBlobs)
const binaryBlobBytes = sumUpByteLength(binaryBlobs)
res.json({
projectId,
textBlobBytes,
binaryBlobBytes,
totalBytes: textBlobBytes + binaryBlobBytes,
nTextBlobs: textBlobs.length,
nBinaryBlobs: binaryBlobs.length,
})
}
async function getProjectBlobsStats(req, res) {
const projectIds = req.swagger.params.body.value.projectIds
const { blobs } = await getProjectBlobsBatch(
@@ -425,5 +450,6 @@ module.exports = {
getProjectBlob: expressify(getProjectBlob),
headProjectBlob: expressify(headProjectBlob),
copyProjectBlob: expressify(copyProjectBlob),
getBlobStats: expressify(getBlobStats),
getProjectBlobsStats: expressify(getProjectBlobsStats),
}

View File

@@ -77,6 +77,54 @@ exports.paths = {
],
},
},
'/projects/{project_id}/blob-stats': {
post: {
'x-swagger-router-controller': 'projects',
operationId: 'getBlobStats',
tags: ['Project'],
description: 'Get specific blob stats for a project.',
consumes: ['application/json'],
parameters: [
{
name: 'project_id',
in: 'path',
description: 'project id',
required: true,
type: 'string',
},
{
name: 'body',
in: 'body',
required: true,
schema: {
type: 'object',
properties: {
blobHashes: {
type: 'array',
items: {
type: 'string',
},
},
},
required: ['blobHashes'],
},
},
],
responses: {
200: {
description: 'Success',
schema: {
$ref: '#/definitions/ProjectBlobStats',
},
},
},
security: [
{
basic: [],
},
],
},
},
'/projects/{project_id}': {
delete: {
'x-swagger-router-controller': 'projects',

View File

@@ -2,6 +2,7 @@
const { expect } = require('chai')
const fs = require('node:fs')
const { Readable } = require('node:stream')
const HTTPStatus = require('http-status')
const fetch = require('node-fetch')
const sinon = require('sinon')
@@ -15,6 +16,7 @@ const {
BlobStore,
persistChanges,
redisBuffer,
blobHash,
} = require('../../../../storage')
const { expectHttpError } = require('./support/expect_response')
@@ -145,88 +147,152 @@ describe('project controller', function () {
await populateProject(populatedMongoProjectId)
})
it('handles empty postgres project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [emptyPostgresProjectId] },
})
expect(body).to.deep.equal([
{
projectId: emptyPostgresProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
])
})
it('handles populated postgres project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [populatedPostgresProjectId] },
})
expect(body).to.deep.equal([
{
projectId: populatedPostgresProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
])
})
it('handles empty mongo project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [emptyMongoProjectId] },
})
expect(body).to.deep.equal([
{
projectId: emptyMongoProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
])
})
it('handles populated mongo project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [populatedMongoProjectId] },
})
expect(body).to.deep.equal([
{
projectId: populatedMongoProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
])
})
it('handles batch of projects', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: {
projectIds: [
populatedPostgresProjectId,
populatedMongoProjectId,
emptyPostgresProjectId,
emptyMongoProjectId,
],
describe('getProjectBlobsStats', function () {
it('handles empty postgres project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [emptyPostgresProjectId] },
})
expect(body).to.deep.equal([
{
projectId: emptyPostgresProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
])
})
it('handles populated postgres project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [populatedPostgresProjectId] },
})
expect(body).to.deep.equal([
{
projectId: populatedPostgresProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
])
})
it('handles empty mongo project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [emptyMongoProjectId] },
})
expect(body).to.deep.equal([
{
projectId: emptyMongoProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
])
})
it('handles populated mongo project', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: { projectIds: [populatedMongoProjectId] },
})
expect(body).to.deep.equal([
{
projectId: populatedMongoProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
])
})
it('handles batch of projects', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getProjectBlobsStats({
body: {
projectIds: [
populatedPostgresProjectId,
populatedMongoProjectId,
emptyPostgresProjectId,
emptyMongoProjectId,
],
},
})
expect(body).to.deep.equal([
{
projectId: populatedPostgresProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
{
projectId: populatedMongoProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
{
projectId: emptyPostgresProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
{
projectId: emptyMongoProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
])
})
})
describe('getBlobStats', function () {
it('handles empty list of hashes', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: populatedPostgresProjectId,
body: { blobHashes: [] },
})
expect(body).to.deep.equal({
projectId: populatedPostgresProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
})
expect(body).to.deep.equal([
{
})
it('handles a mix of text and binary blobs', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: populatedPostgresProjectId,
body: {
blobHashes: [testFiles.HELLO_TXT_HASH, testFiles.GRAPH_PNG_HASH],
},
})
expect(body).to.deep.equal({
projectId: populatedPostgresProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
@@ -234,33 +300,136 @@ describe('project controller', function () {
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 1,
},
{
projectId: populatedMongoProjectId,
})
})
it('handles only text blobs', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: populatedPostgresProjectId,
body: {
blobHashes: [testFiles.HELLO_TXT_HASH],
},
})
expect(body).to.deep.equal({
projectId: populatedPostgresProjectId,
textBlobBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes:
testFiles.HELLO_TXT_BYTE_LENGTH + testFiles.GRAPH_PNG_BYTE_LENGTH,
binaryBlobBytes: 0,
totalBytes: testFiles.HELLO_TXT_BYTE_LENGTH,
nTextBlobs: 1,
nBinaryBlobs: 0,
})
})
it('handles only binary blobs', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: populatedPostgresProjectId,
body: {
blobHashes: [testFiles.GRAPH_PNG_HASH],
},
})
expect(body).to.deep.equal({
projectId: populatedPostgresProjectId,
textBlobBytes: 0,
binaryBlobBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
totalBytes: testFiles.GRAPH_PNG_BYTE_LENGTH,
nTextBlobs: 0,
nBinaryBlobs: 1,
},
{
projectId: emptyPostgresProjectId,
})
})
it('handles non-existent blobs', async function () {
const { body } =
await testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: populatedPostgresProjectId,
body: {
blobHashes: [testFiles.STRING_AB_HASH],
},
})
expect(body).to.deep.equal({
projectId: populatedPostgresProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
{
projectId: emptyMongoProjectId,
textBlobBytes: 0,
binaryBlobBytes: 0,
totalBytes: 0,
nTextBlobs: 0,
nBinaryBlobs: 0,
},
])
})
})
it('throws an error for bad hashes', async function () {
await expectHttpError(
testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: populatedPostgresProjectId,
body: {
blobHashes: ['non-existent-hash'],
},
}),
HTTPStatus.INTERNAL_SERVER_ERROR
)
})
it('handles a request with a large number of blobs', async function () {
const projectId = await testProjects.createEmptyProject()
const blobHashes = []
let expectedTextBytes = 0
let expectedBinaryBytes = 0
const nTextBlobs = 10
const nBinaryBlobs = 10
for (let i = 0; i < nTextBlobs; i++) {
const content = `text blob ${i}`
const hash = blobHash.fromString(content)
blobHashes.push(hash)
expectedTextBytes += content.length
const res = await fetch(
testServer.url(`/api/projects/${projectId}/blobs/${hash}`),
{
method: 'PUT',
body: content,
headers: { Authorization: testServer.basicAuthHeader },
}
)
expect(res.status).to.equal(HTTPStatus.CREATED)
}
for (let i = 0; i < nBinaryBlobs; i++) {
const content = Buffer.from([0, i, i + 1, i + 2])
const hash = await blobHash.fromStream(
content.length,
Readable.from(content)
)
blobHashes.push(hash)
expectedBinaryBytes += content.length
const res = await fetch(
testServer.url(`/api/projects/${projectId}/blobs/${hash}`),
{
method: 'PUT',
body: content,
headers: {
Authorization: testServer.basicAuthHeader,
'Content-Type': 'application/octet-stream',
},
}
)
expect(res.status).to.equal(HTTPStatus.CREATED)
}
const { body } =
await testServer.basicAuthClient.apis.Project.getBlobStats({
project_id: projectId,
body: { blobHashes },
})
expect(body).to.deep.equal({
projectId,
textBlobBytes: expectedTextBytes,
binaryBlobBytes: expectedBinaryBytes,
totalBytes: expectedTextBytes + expectedBinaryBytes,
nTextBlobs,
nBinaryBlobs,
})
})
})
})

View File

@@ -89,6 +89,12 @@ async function handleError(error, req, res, next) {
if (shouldSendErrorResponse) {
HttpErrorHandler.badRequest(req, res, error.message)
}
} else if (error instanceof Errors.FileTooLargeError) {
req.logger.setLevel('warn')
if (shouldSendErrorResponse) {
res.status(400)
plainTextResponse(res, error.message)
}
} else if (isZodErrorLike(error)) {
req.logger.setLevel('warn')
res.status(400)

View File

@@ -307,6 +307,14 @@ async function getHistoryId(projectId) {
return historyId
}
async function getBlobStats(historyId, blobHashes) {
return await fetchJson(`${HISTORY_V1_URL}/projects/${historyId}/blob-stats`, {
method: 'POST',
basicAuth: HISTORY_V1_BASIC_AUTH,
json: { blobHashes: blobHashes.map(id => id.toString()) },
})
}
async function getProjectBlobStats(historyIds) {
return await fetchJson(`${HISTORY_V1_URL}/projects/blob-stats`, {
method: 'POST',
@@ -427,5 +435,6 @@ module.exports = {
getLatestHistory,
getChanges,
getProjectBlobStats,
getBlobStats,
},
}

View File

@@ -20,6 +20,7 @@ import TpdsProjectFlusher from '../ThirdPartyDataStore/TpdsProjectFlusher.js'
import _ from 'lodash'
import TagsHandler from '../Tags/TagsHandler.js'
import ClsiCacheManager from '../Compile/ClsiCacheManager.js'
import Modules from '../../infrastructure/Modules.js'
export default {
duplicate: callbackify(duplicate),
@@ -48,6 +49,15 @@ async function duplicate(owner, originalProjectId, newProjectName, tags = []) {
const originalEntries = _getFolderEntries(originalProject.rootFolder[0])
await Modules.promises.hooks.fire('preDuplicateProject', {
owner,
originalProjectId,
newProjectName,
tags,
originalProject,
originalEntries,
})
// Pass template ID as analytics segmentation if duplicating project from a template
const segmentation = _.pick(originalProject, [
'fromV1TemplateId',

View File

@@ -31,6 +31,56 @@ class MockV1HistoryApi extends AbstractMockApi {
)
})
this.app.post('/api/projects/:historyId/blob-stats', (req, res, next) => {
const { historyId } = req.params
const { blobHashes } = req.body
let textBlobBytes = 0
let binaryBlobBytes = 0
let nTextBlobs = 0
let nBinaryBlobs = 0
// Calculate actual sizes from uploaded blobs
if (blobHashes && this.blobs[historyId]) {
for (const hash of blobHashes) {
const buf = this.blobs[historyId][hash]
if (buf) {
const size = buf.byteLength
// Check if the blob content is valid UTF-8
let isText = false
try {
const decoder = new TextDecoder('utf-8', { fatal: true })
decoder.decode(buf)
isText = true
} catch (e) {
// Not valid UTF-8, treat as binary
isText = false
}
if (isText) {
textBlobBytes += size
nTextBlobs++
} else {
binaryBlobBytes += size
nBinaryBlobs++
}
}
}
}
const totalBytes = textBlobBytes + binaryBlobBytes
res.json({
projectId: historyId,
textBlobBytes,
binaryBlobBytes,
totalBytes,
nTextBlobs,
nBinaryBlobs,
})
})
this.app.get(
'/api/projects/:project_id/version/:version/zip',
(req, res, next) => {

View File

@@ -212,6 +212,13 @@ describe('ProjectDuplicator', function () {
flushProjectToTpds: sinon.stub().resolves(),
},
}
ctx.Modules = {
promises: {
hooks: {
fire: sinon.stub().resolves([]),
},
},
}
vi.doMock('../../../../app/src/models/Doc', () => ({
Doc: ctx.Doc,
@@ -287,6 +294,10 @@ describe('ProjectDuplicator', function () {
default: ctx.HistoryManager,
}))
vi.doMock('../../../../app/src/infrastructure/Modules', () => ({
default: ctx.Modules,
}))
vi.doMock('../../../../app/src/Features/Compile/ClsiCacheManager', () => ({
default: {
prepareClsiCache: sinon.stub().rejects(new Error('ignore this')),