Merge pull request #25492 from overleaf/em-paginate-changes-1

Paginate history changes endpoint

GitOrigin-RevId: 2b48044d64244404efcd2e090b682c1f571a5567
This commit is contained in:
Eric Mc Sween
2025-05-21 11:34:01 -04:00
committed by Copybot
parent 23e24627d5
commit c18b3f95b2
6 changed files with 176 additions and 89 deletions

View File

@@ -152,29 +152,27 @@ async function getChanges(req, res, next) {
})
}
const changes = []
let chunk = await chunkStore.loadLatest(projectId)
if (since > chunk.getEndVersion()) {
return res.status(400).json({
error: `Version out of bounds: ${since}`,
let chunk
try {
chunk = await chunkStore.loadAtVersion(projectId, since, {
preferNewer: true,
})
} catch (err) {
if (err instanceof Chunk.VersionNotFoundError) {
return res.status(400).json({
error: `Version out of bounds: ${since}`,
})
}
throw err
}
// Fetch all chunks that come after the chunk that contains the start version
while (chunk.getStartVersion() > since) {
const changesInChunk = chunk.getChanges()
changes.unshift(...changesInChunk)
chunk = await chunkStore.loadAtVersion(projectId, chunk.getStartVersion())
}
const latestChunkMetadata = await chunkStore.getLatestChunkMetadata(projectId)
// Extract the relevant changes from the chunk that contains the start version
const changesInChunk = chunk
.getChanges()
.slice(since - chunk.getStartVersion())
changes.unshift(...changesInChunk)
const changes = chunk.getChanges().slice(since - chunk.getStartVersion())
const hasMore = latestChunkMetadata.endVersion > chunk.getEndVersion()
res.json(changes.map(change => change.toRaw()))
res.json({ changes: changes.map(change => change.toRaw()), hasMore })
}
async function getZip(req, res, next) {

View File

@@ -141,6 +141,8 @@ async function loadLatest(projectId, opts = {}) {
* @param {number} version
* @param {object} [opts]
* @param {boolean} [opts.persistedOnly] - only include persisted changes
* @param {boolean} [opts.preferNewer] - If the version is at the boundary of
* two chunks, return the newer chunk.
*/
async function loadAtVersion(projectId, version, opts = {}) {
assert.projectId(projectId, 'bad projectId')
@@ -150,7 +152,9 @@ async function loadAtVersion(projectId, version, opts = {}) {
const blobStore = new BlobStore(projectId)
const batchBlobStore = new BatchBlobStore(blobStore)
const chunkRecord = await backend.getChunkForVersion(projectId, version)
const chunkRecord = await backend.getChunkForVersion(projectId, version, {
preferNewer: opts.preferNewer,
})
const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id)
const history = History.fromRaw(rawHistory)

View File

@@ -10,7 +10,7 @@ const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
const { zipStore, persistChanges } = require('../../../../storage')
const { zipStore, BlobStore, persistChanges } = require('../../../../storage')
const { expectHttpError } = require('./support/expect_response')
const testServer = require('./support/test_server')
@@ -155,12 +155,13 @@ describe('project controller', function () {
project_id: projectId,
})
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
const { changes, hasMore } = response.obj
expect(changes.length).to.equal(2)
const filenames = changes
.flatMap(change => change.operations)
.map(operation => operation.pathname)
expect(filenames).to.deep.equal(['test.tex', 'other.tex'])
expect(hasMore).to.be.false
})
it('returns only requested changes', async function () {
@@ -170,12 +171,13 @@ describe('project controller', function () {
since: 1,
})
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
const { changes, hasMore } = response.obj
expect(changes.length).to.equal(1)
const filenames = changes
.flatMap(change => change.operations)
.map(operation => operation.pathname)
expect(filenames).to.deep.equal(['other.tex'])
expect(hasMore).to.be.false
})
it('rejects negative versions', async function () {
@@ -196,68 +198,84 @@ describe('project controller', function () {
).to.be.rejectedWith('Bad Request')
})
})
})
describe('project with many chunks', function () {
let projectId
describe('project with many chunks', function () {
let projectId, changes
beforeEach(async function () {
// used to provide a limit which forces us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
maxChunkChanges: 5,
}
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString(''))],
new Date(),
[]
),
]
for (let i = 0; i < 20; i++) {
const textOperation = new TextOperation()
textOperation.retain(i)
textOperation.insert('x')
changes.push(
beforeEach(async function () {
// used to provide a limit which forces us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
maxChunkChanges: 5,
}
projectId = await createEmptyProject()
const blobStore = new BlobStore(projectId)
const blob = await blobStore.putString('')
changes = [
new Change(
[new EditFileOperation('test.tex', textOperation)],
[new AddFileOperation('test.tex', File.createLazyFromBlobs(blob))],
new Date(),
[]
),
]
for (let i = 0; i < 20; i++) {
const textOperation = new TextOperation()
textOperation.retain(i)
textOperation.insert('x')
changes.push(
new Change(
[new EditFileOperation('test.tex', textOperation)],
new Date(),
[]
)
)
)
}
projectId = await createEmptyProject()
await persistChanges(projectId, changes, limits, 0)
})
it('returns all changes when not given a limit', async function () {
const response = await testServer.basicAuthClient.apis.Project.getChanges(
{
project_id: projectId,
}
)
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
expect(changes.length).to.equal(21)
expect(changes[10].operations[0].textOperation).to.deep.equal([9, 'x'])
})
it('returns only requested changes', async function () {
const response = await testServer.basicAuthClient.apis.Project.getChanges(
{
project_id: projectId,
since: 10,
}
)
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
expect(changes.length).to.equal(11)
expect(changes[2].operations[0].textOperation).to.deep.equal([11, 'x'])
await persistChanges(projectId, changes, limits, 0)
})
it('returns the first chunk when not given a limit', async function () {
const response =
await testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
})
expect(response.status).to.equal(HTTPStatus.OK)
expect(response.obj).to.deep.equal({
changes: changes.slice(0, 5).map(c => c.toRaw()),
hasMore: true,
})
})
it('returns only requested changes', async function () {
const response =
await testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
since: 12,
})
expect(response.status).to.equal(HTTPStatus.OK)
expect(response.obj).to.deep.equal({
changes: changes.slice(12, 15).map(c => c.toRaw()),
hasMore: true,
})
})
it('returns changes in the latest chunk', async function () {
const response =
await testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
since: 20,
})
expect(response.status).to.equal(HTTPStatus.OK)
expect(response.obj).to.deep.equal({
changes: changes.slice(20).map(c => c.toRaw()),
hasMore: false,
})
})
})
})

View File

@@ -465,9 +465,37 @@ async function getLatestHistory(req, res, next) {
async function getChanges(req, res, next) {
const projectId = req.params.project_id
const since = req.query.since
const changes = await HistoryManager.promises.getChanges(projectId, { since })
res.json(changes)
let since = req.query.since
// TODO: Transition flag; remove after a while
const paginated = req.query.paginated === 'true'
if (paginated) {
const changes = await HistoryManager.promises.getChanges(projectId, {
since,
})
return res.json(changes)
} else {
// TODO: Remove this code path after a while
let hasMore = true
const allChanges = []
while (hasMore) {
const response = await HistoryManager.promises.getChanges(projectId, {
since,
})
let changes
if (Array.isArray(response)) {
changes = response
hasMore = false
} else {
changes = response.changes
hasMore = response.hasMore
since += changes.length
}
allChanges.push(...changes)
}
return res.json(allChanges)
}
}
function isPrematureClose(err) {

View File

@@ -124,9 +124,15 @@ export class ProjectSnapshot {
*/
private async loadChanges() {
await flushHistory(this.projectId)
const changes = await fetchLatestChanges(this.projectId, this.version)
this.snapshot.applyAll(changes)
this.version += changes.length
let hasMore = true
while (hasMore) {
const response = await fetchLatestChanges(this.projectId, this.version)
const changes = response.changes
this.snapshot.applyAll(changes)
this.version += changes.length
hasMore = response.hasMore
}
await this.loadDocs()
}
@@ -181,14 +187,43 @@ async function fetchLatestChunk(projectId: string): Promise<Chunk> {
return Chunk.fromRaw(response.chunk)
}
type FetchLatestChangesResponse = {
changes: Change[]
hasMore: boolean
}
type FetchLatestChangesApiResponse =
| RawChange[]
| {
changes: RawChange[]
hasMore: boolean
}
async function fetchLatestChanges(
projectId: string,
version: number
): Promise<Change[]> {
const response = await getJSON<RawChange[]>(
`/project/${projectId}/changes?since=${version}`
): Promise<FetchLatestChangesResponse> {
// TODO: The paginated flag is a transition flag. It can be removed after this
// code has been deployed for a few weeks.
const response = await getJSON<FetchLatestChangesApiResponse>(
`/project/${projectId}/changes?since=${version}&paginated=true`
)
return response.map(Change.fromRaw).filter(change => change != null)
let changes, hasMore
if (Array.isArray(response)) {
// deprecated response format is a simple array of changes
// TODO: Remove this branch after the transition
changes = response
hasMore = false
} else {
changes = response.changes
hasMore = response.hasMore
}
return {
changes: changes.map(Change.fromRaw).filter(change => change != null),
hasMore,
}
}
async function fetchBlob(projectId: string, hash: string): Promise<string> {

View File

@@ -119,10 +119,14 @@ describe('ProjectSnapshot', function () {
}
function mockChanges() {
fetchMock.getOnce(`/project/${projectId}/changes?since=1`, changes, {
name: 'changes-1',
})
fetchMock.get(`/project/${projectId}/changes?since=2`, [], {
fetchMock.getOnce(
`/project/${projectId}/changes?since=1&paginated=true`,
changes,
{
name: 'changes-1',
}
)
fetchMock.get(`/project/${projectId}/changes?since=2&paginated=true`, [], {
name: 'changes-2',
})
}