From c18b3f95b227c8dce355a8204ee13e8b9b7b528f Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Wed, 21 May 2025 11:34:01 -0400 Subject: [PATCH] Merge pull request #25492 from overleaf/em-paginate-changes-1 Paginate history changes endpoint GitOrigin-RevId: 2b48044d64244404efcd2e090b682c1f571a5567 --- .../history-v1/api/controllers/projects.js | 32 ++--- .../storage/lib/chunk_store/index.js | 6 +- .../test/acceptance/js/api/projects.test.js | 132 ++++++++++-------- .../src/Features/History/HistoryController.js | 34 ++++- .../js/infrastructure/project-snapshot.ts | 49 ++++++- .../infrastructure/project-snapshot.test.ts | 12 +- 6 files changed, 176 insertions(+), 89 deletions(-) diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js index b5c3b629a9..47a1d959ad 100644 --- a/services/history-v1/api/controllers/projects.js +++ b/services/history-v1/api/controllers/projects.js @@ -152,29 +152,27 @@ async function getChanges(req, res, next) { }) } - const changes = [] - let chunk = await chunkStore.loadLatest(projectId) - - if (since > chunk.getEndVersion()) { - return res.status(400).json({ - error: `Version out of bounds: ${since}`, + let chunk + try { + chunk = await chunkStore.loadAtVersion(projectId, since, { + preferNewer: true, }) + } catch (err) { + if (err instanceof Chunk.VersionNotFoundError) { + return res.status(400).json({ + error: `Version out of bounds: ${since}`, + }) + } + throw err } - // Fetch all chunks that come after the chunk that contains the start version - while (chunk.getStartVersion() > since) { - const changesInChunk = chunk.getChanges() - changes.unshift(...changesInChunk) - chunk = await chunkStore.loadAtVersion(projectId, chunk.getStartVersion()) - } + const latestChunkMetadata = await chunkStore.getLatestChunkMetadata(projectId) // Extract the relevant changes from the chunk that contains the start version - const changesInChunk = chunk - .getChanges() - .slice(since - chunk.getStartVersion()) - changes.unshift(...changesInChunk) + const changes = chunk.getChanges().slice(since - chunk.getStartVersion()) + const hasMore = latestChunkMetadata.endVersion > chunk.getEndVersion() - res.json(changes.map(change => change.toRaw())) + res.json({ changes: changes.map(change => change.toRaw()), hasMore }) } async function getZip(req, res, next) { diff --git a/services/history-v1/storage/lib/chunk_store/index.js b/services/history-v1/storage/lib/chunk_store/index.js index b026a073d4..6dab84f929 100644 --- a/services/history-v1/storage/lib/chunk_store/index.js +++ b/services/history-v1/storage/lib/chunk_store/index.js @@ -141,6 +141,8 @@ async function loadLatest(projectId, opts = {}) { * @param {number} version * @param {object} [opts] * @param {boolean} [opts.persistedOnly] - only include persisted changes + * @param {boolean} [opts.preferNewer] - If the version is at the boundary of + * two chunks, return the newer chunk. */ async function loadAtVersion(projectId, version, opts = {}) { assert.projectId(projectId, 'bad projectId') @@ -150,7 +152,9 @@ async function loadAtVersion(projectId, version, opts = {}) { const blobStore = new BlobStore(projectId) const batchBlobStore = new BatchBlobStore(blobStore) - const chunkRecord = await backend.getChunkForVersion(projectId, version) + const chunkRecord = await backend.getChunkForVersion(projectId, version, { + preferNewer: opts.preferNewer, + }) const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id) const history = History.fromRaw(rawHistory) diff --git a/services/history-v1/test/acceptance/js/api/projects.test.js b/services/history-v1/test/acceptance/js/api/projects.test.js index 3c333d8698..7654829516 100644 --- a/services/history-v1/test/acceptance/js/api/projects.test.js +++ b/services/history-v1/test/acceptance/js/api/projects.test.js @@ -10,7 +10,7 @@ const cleanup = require('../storage/support/cleanup') const fixtures = require('../storage/support/fixtures') const testFiles = require('../storage/support/test_files') -const { zipStore, persistChanges } = require('../../../../storage') +const { zipStore, BlobStore, persistChanges } = require('../../../../storage') const { expectHttpError } = require('./support/expect_response') const testServer = require('./support/test_server') @@ -155,12 +155,13 @@ describe('project controller', function () { project_id: projectId, }) expect(response.status).to.equal(HTTPStatus.OK) - const changes = response.obj + const { changes, hasMore } = response.obj expect(changes.length).to.equal(2) const filenames = changes .flatMap(change => change.operations) .map(operation => operation.pathname) expect(filenames).to.deep.equal(['test.tex', 'other.tex']) + expect(hasMore).to.be.false }) it('returns only requested changes', async function () { @@ -170,12 +171,13 @@ describe('project controller', function () { since: 1, }) expect(response.status).to.equal(HTTPStatus.OK) - const changes = response.obj + const { changes, hasMore } = response.obj expect(changes.length).to.equal(1) const filenames = changes .flatMap(change => change.operations) .map(operation => operation.pathname) expect(filenames).to.deep.equal(['other.tex']) + expect(hasMore).to.be.false }) it('rejects negative versions', async function () { @@ -196,68 +198,84 @@ describe('project controller', function () { ).to.be.rejectedWith('Bad Request') }) }) - }) - describe('project with many chunks', function () { - let projectId + describe('project with many chunks', function () { + let projectId, changes - beforeEach(async function () { - // used to provide a limit which forces us to persist all of the changes. - const farFuture = new Date() - farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000) - const limits = { - minChangeTimestamp: farFuture, - maxChangeTimestamp: farFuture, - maxChunkChanges: 5, - } - const changes = [ - new Change( - [new AddFileOperation('test.tex', File.fromString(''))], - new Date(), - [] - ), - ] - - for (let i = 0; i < 20; i++) { - const textOperation = new TextOperation() - textOperation.retain(i) - textOperation.insert('x') - changes.push( + beforeEach(async function () { + // used to provide a limit which forces us to persist all of the changes. + const farFuture = new Date() + farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000) + const limits = { + minChangeTimestamp: farFuture, + maxChangeTimestamp: farFuture, + maxChunkChanges: 5, + } + projectId = await createEmptyProject() + const blobStore = new BlobStore(projectId) + const blob = await blobStore.putString('') + changes = [ new Change( - [new EditFileOperation('test.tex', textOperation)], + [new AddFileOperation('test.tex', File.createLazyFromBlobs(blob))], new Date(), [] + ), + ] + + for (let i = 0; i < 20; i++) { + const textOperation = new TextOperation() + textOperation.retain(i) + textOperation.insert('x') + changes.push( + new Change( + [new EditFileOperation('test.tex', textOperation)], + new Date(), + [] + ) ) - ) - } - - projectId = await createEmptyProject() - await persistChanges(projectId, changes, limits, 0) - }) - - it('returns all changes when not given a limit', async function () { - const response = await testServer.basicAuthClient.apis.Project.getChanges( - { - project_id: projectId, } - ) - expect(response.status).to.equal(HTTPStatus.OK) - const changes = response.obj - expect(changes.length).to.equal(21) - expect(changes[10].operations[0].textOperation).to.deep.equal([9, 'x']) - }) - it('returns only requested changes', async function () { - const response = await testServer.basicAuthClient.apis.Project.getChanges( - { - project_id: projectId, - since: 10, - } - ) - expect(response.status).to.equal(HTTPStatus.OK) - const changes = response.obj - expect(changes.length).to.equal(11) - expect(changes[2].operations[0].textOperation).to.deep.equal([11, 'x']) + await persistChanges(projectId, changes, limits, 0) + }) + + it('returns the first chunk when not given a limit', async function () { + const response = + await testServer.basicAuthClient.apis.Project.getChanges({ + project_id: projectId, + }) + + expect(response.status).to.equal(HTTPStatus.OK) + expect(response.obj).to.deep.equal({ + changes: changes.slice(0, 5).map(c => c.toRaw()), + hasMore: true, + }) + }) + + it('returns only requested changes', async function () { + const response = + await testServer.basicAuthClient.apis.Project.getChanges({ + project_id: projectId, + since: 12, + }) + expect(response.status).to.equal(HTTPStatus.OK) + expect(response.obj).to.deep.equal({ + changes: changes.slice(12, 15).map(c => c.toRaw()), + hasMore: true, + }) + }) + + it('returns changes in the latest chunk', async function () { + const response = + await testServer.basicAuthClient.apis.Project.getChanges({ + project_id: projectId, + since: 20, + }) + expect(response.status).to.equal(HTTPStatus.OK) + expect(response.obj).to.deep.equal({ + changes: changes.slice(20).map(c => c.toRaw()), + hasMore: false, + }) + }) }) }) diff --git a/services/web/app/src/Features/History/HistoryController.js b/services/web/app/src/Features/History/HistoryController.js index a0f0183f44..be2a44c39e 100644 --- a/services/web/app/src/Features/History/HistoryController.js +++ b/services/web/app/src/Features/History/HistoryController.js @@ -465,9 +465,37 @@ async function getLatestHistory(req, res, next) { async function getChanges(req, res, next) { const projectId = req.params.project_id - const since = req.query.since - const changes = await HistoryManager.promises.getChanges(projectId, { since }) - res.json(changes) + let since = req.query.since + // TODO: Transition flag; remove after a while + const paginated = req.query.paginated === 'true' + + if (paginated) { + const changes = await HistoryManager.promises.getChanges(projectId, { + since, + }) + return res.json(changes) + } else { + // TODO: Remove this code path after a while + let hasMore = true + const allChanges = [] + while (hasMore) { + const response = await HistoryManager.promises.getChanges(projectId, { + since, + }) + + let changes + if (Array.isArray(response)) { + changes = response + hasMore = false + } else { + changes = response.changes + hasMore = response.hasMore + since += changes.length + } + allChanges.push(...changes) + } + return res.json(allChanges) + } } function isPrematureClose(err) { diff --git a/services/web/frontend/js/infrastructure/project-snapshot.ts b/services/web/frontend/js/infrastructure/project-snapshot.ts index eb7c768adf..976eb9d63f 100644 --- a/services/web/frontend/js/infrastructure/project-snapshot.ts +++ b/services/web/frontend/js/infrastructure/project-snapshot.ts @@ -124,9 +124,15 @@ export class ProjectSnapshot { */ private async loadChanges() { await flushHistory(this.projectId) - const changes = await fetchLatestChanges(this.projectId, this.version) - this.snapshot.applyAll(changes) - this.version += changes.length + let hasMore = true + while (hasMore) { + const response = await fetchLatestChanges(this.projectId, this.version) + const changes = response.changes + this.snapshot.applyAll(changes) + this.version += changes.length + hasMore = response.hasMore + } + await this.loadDocs() } @@ -181,14 +187,43 @@ async function fetchLatestChunk(projectId: string): Promise { return Chunk.fromRaw(response.chunk) } +type FetchLatestChangesResponse = { + changes: Change[] + hasMore: boolean +} + +type FetchLatestChangesApiResponse = + | RawChange[] + | { + changes: RawChange[] + hasMore: boolean + } + async function fetchLatestChanges( projectId: string, version: number -): Promise { - const response = await getJSON( - `/project/${projectId}/changes?since=${version}` +): Promise { + // TODO: The paginated flag is a transition flag. It can be removed after this + // code has been deployed for a few weeks. + const response = await getJSON( + `/project/${projectId}/changes?since=${version}&paginated=true` ) - return response.map(Change.fromRaw).filter(change => change != null) + + let changes, hasMore + if (Array.isArray(response)) { + // deprecated response format is a simple array of changes + // TODO: Remove this branch after the transition + changes = response + hasMore = false + } else { + changes = response.changes + hasMore = response.hasMore + } + + return { + changes: changes.map(Change.fromRaw).filter(change => change != null), + hasMore, + } } async function fetchBlob(projectId: string, hash: string): Promise { diff --git a/services/web/test/frontend/infrastructure/project-snapshot.test.ts b/services/web/test/frontend/infrastructure/project-snapshot.test.ts index 313436a3f5..b3a78efed7 100644 --- a/services/web/test/frontend/infrastructure/project-snapshot.test.ts +++ b/services/web/test/frontend/infrastructure/project-snapshot.test.ts @@ -119,10 +119,14 @@ describe('ProjectSnapshot', function () { } function mockChanges() { - fetchMock.getOnce(`/project/${projectId}/changes?since=1`, changes, { - name: 'changes-1', - }) - fetchMock.get(`/project/${projectId}/changes?since=2`, [], { + fetchMock.getOnce( + `/project/${projectId}/changes?since=1&paginated=true`, + changes, + { + name: 'changes-1', + } + ) + fetchMock.get(`/project/${projectId}/changes?since=2&paginated=true`, [], { name: 'changes-2', }) }