Merge pull request #22825 from overleaf/em-move-project-snapshot

Move full project on client code to the main web tree

GitOrigin-RevId: a2afd0d7fceaef213841e662df0b20587e9fef69
This commit is contained in:
Eric Mc Sween
2025-01-14 07:48:57 -05:00
committed by Copybot
parent 8f21f58ad5
commit 5a72d08be2
4 changed files with 482 additions and 117 deletions

View File

@@ -0,0 +1,166 @@
// @ts-check
import Settings from '@overleaf/settings'
import { Joi, validate } from '../../infrastructure/Validation.js'
import { RateLimiter } from '../../infrastructure/RateLimiter.js'
import AuthenticationController from '../Authentication/AuthenticationController.js'
import AuthorizationMiddleware from '../Authorization/AuthorizationMiddleware.js'
import RateLimiterMiddleware from '../Security/RateLimiterMiddleware.js'
import HistoryController from './HistoryController.js'
const rateLimiters = {
downloadProjectRevision: new RateLimiter('download-project-revision', {
points: 30,
duration: 60 * 60,
}),
getProjectBlob: new RateLimiter('get-project-blob', {
// Download project in full once per hour
points: Settings.maxEntitiesPerProject,
duration: 60 * 60,
}),
flushHistory: new RateLimiter('flush-project-history', {
points: 30,
duration: 60,
}),
}
function apply(webRouter, privateApiRouter) {
// Blobs
webRouter.head(
'/project/:project_id/blob/:hash',
validate({
params: Joi.object({
project_id: Joi.objectId().required(),
hash: Joi.string().required().hex().length(40),
}),
query: Joi.object({
fallback: Joi.objectId().optional(),
}),
}),
RateLimiterMiddleware.rateLimit(rateLimiters.getProjectBlob),
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.headBlob
)
webRouter.get(
'/project/:project_id/blob/:hash',
validate({
params: Joi.object({
project_id: Joi.objectId().required(),
hash: Joi.string().required().hex().length(40),
}),
query: Joi.object({
fallback: Joi.objectId().optional(),
}),
}),
RateLimiterMiddleware.rateLimit(rateLimiters.getProjectBlob),
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.getBlob
)
// History diffs
webRouter.get(
'/project/:Project_id/updates',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApiAndInjectUserDetails
)
webRouter.get(
'/project/:Project_id/doc/:doc_id/diff',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
webRouter.get(
'/project/:Project_id/diff',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApiAndInjectUserDetails
)
webRouter.get(
'/project/:Project_id/filetree/diff',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
// File and project restore
webRouter.post(
'/project/:project_id/restore_file',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.restoreFileFromV2
)
webRouter.post(
'/project/:project_id/revert_file',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.revertFile
)
webRouter.post(
'/project/:project_id/revert-project',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.revertProject
)
// History download
webRouter.get(
'/project/:project_id/version/:version/zip',
RateLimiterMiddleware.rateLimit(rateLimiters.downloadProjectRevision),
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.downloadZipOfVersion
)
// History flush and resync
webRouter.post(
'/project/:Project_id/flush',
RateLimiterMiddleware.rateLimit(rateLimiters.flushHistory),
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
privateApiRouter.post(
'/project/:Project_id/history/resync',
AuthenticationController.requirePrivateApiAuth(),
HistoryController.resyncProjectHistory
)
// History labels
webRouter.get(
'/project/:Project_id/labels',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.getLabels
)
webRouter.post(
'/project/:Project_id/labels',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.createLabel
)
webRouter.delete(
'/project/:Project_id/labels/:label_id',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.deleteLabel
)
// History snapshot
webRouter.get(
'/project/:project_id/latest/history',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
webRouter.get(
'/project/:project_id/changes',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
}
export default { apply }

View File

@@ -32,6 +32,7 @@ import ProjectDownloadsController from './Features/Downloads/ProjectDownloadsCon
import FileStoreController from './Features/FileStore/FileStoreController.mjs'
import DocumentUpdaterController from './Features/DocumentUpdater/DocumentUpdaterController.mjs'
import HistoryController from './Features/History/HistoryController.js'
import HistoryRouter from './Features/History/HistoryRouter.mjs'
import ExportsController from './Features/Exports/ExportsController.mjs'
import PasswordResetRouter from './Features/PasswordReset/PasswordResetRouter.mjs'
import StaticPagesRouter from './Features/StaticPages/StaticPagesRouter.mjs'
@@ -120,24 +121,6 @@ const rateLimiters = {
points: 10,
duration: 60,
}),
downloadProjectRevision: new RateLimiter('download-project-revision', {
points: 30,
duration: 60 * 60,
}),
flushHistory: new RateLimiter('flush-project-history', {
// Allow flushing once every 30s-1s (allow for network jitter).
points: 1,
duration: 30 - 1,
}),
getProjectBlob: new RateLimiter('get-project-blob', {
// Download project in full once per hour
points: Settings.maxEntitiesPerProject,
duration: 60 * 60,
}),
getHistorySnapshot: new RateLimiter(
'get-history-snapshot',
openProjectRateLimiter.getOptions()
),
endorseEmail: new RateLimiter('endorse-email', {
points: 30,
duration: 60,
@@ -552,36 +535,10 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
HistoryController.fileToBlobRedirectMiddleware,
FileStoreController.getFile
)
webRouter.head(
'/project/:project_id/blob/:hash',
validate({
params: Joi.object({
project_id: Joi.objectId().required(),
hash: Joi.string().required().hex().length(40),
}),
query: Joi.object({
fallback: Joi.objectId().optional(),
}),
}),
RateLimiterMiddleware.rateLimit(rateLimiters.getProjectBlob),
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.headBlob
)
webRouter.get(
'/project/:project_id/blob/:hash',
validate({
params: Joi.object({
project_id: Joi.objectId().required(),
hash: Joi.string().required().hex().length(40),
}),
query: Joi.object({
fallback: Joi.objectId().optional(),
}),
}),
RateLimiterMiddleware.rateLimit(rateLimiters.getProjectBlob),
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.getBlob
)
// Has to be applied after any route using fileToBlobRedirectMiddleware
HistoryRouter.apply(webRouter, privateApiRouter)
webRouter.get(
'/Project/:Project_id/doc/:Doc_id/download', // "download" suffix to avoid conflict with private API route at doc/:doc_id
AuthorizationMiddleware.ensureUserCanReadProject,
@@ -801,75 +758,6 @@ async function initialize(webRouter, privateApiRouter, publicApiRouter) {
AuthorizationMiddleware.ensureUserCanAdminProject,
ProjectController.renameProject
)
webRouter.get(
'/project/:Project_id/updates',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApiAndInjectUserDetails
)
webRouter.get(
'/project/:Project_id/doc/:doc_id/diff',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
webRouter.get(
'/project/:Project_id/diff',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApiAndInjectUserDetails
)
webRouter.get(
'/project/:Project_id/filetree/diff',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.proxyToHistoryApi
)
webRouter.post(
'/project/:project_id/restore_file',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.restoreFileFromV2
)
webRouter.post(
'/project/:project_id/revert_file',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.revertFile
)
webRouter.post(
'/project/:project_id/revert-project',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.revertProject
)
webRouter.get(
'/project/:project_id/version/:version/zip',
RateLimiterMiddleware.rateLimit(rateLimiters.downloadProjectRevision),
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.downloadZipOfVersion
)
privateApiRouter.post(
'/project/:Project_id/history/resync',
AuthenticationController.requirePrivateApiAuth(),
HistoryController.resyncProjectHistory
)
webRouter.get(
'/project/:Project_id/labels',
AuthorizationMiddleware.blockRestrictedUserFromProject,
AuthorizationMiddleware.ensureUserCanReadProject,
HistoryController.getLabels
)
webRouter.post(
'/project/:Project_id/labels',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.createLabel
)
webRouter.delete(
'/project/:Project_id/labels/:label_id',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,
HistoryController.deleteLabel
)
webRouter.post(
'/project/:project_id/export/:brand_variation_id',
AuthorizationMiddleware.ensureUserCanWriteProjectContent,

View File

@@ -0,0 +1,124 @@
import pLimit from 'p-limit'
import { Change, Chunk, Snapshot } from 'overleaf-editor-core'
import { RawChange, RawChunk } from 'overleaf-editor-core/lib/types'
import { FetchError, getJSON, postJSON } from '@/infrastructure/fetch-json'
const DOWNLOAD_BLOBS_CONCURRENCY = 10
/**
* Project snapshot container with on-demand refresh
*/
export class ProjectSnapshot {
private projectId: string
private snapshot: Snapshot
private version: number
private state: 'init' | 'refreshing' | 'ready'
private blobStore: SimpleBlobStore
constructor(projectId: string) {
this.projectId = projectId
this.snapshot = new Snapshot()
this.version = 0
this.state = 'init'
this.blobStore = new SimpleBlobStore(this.projectId)
}
async refresh() {
if (this.state === 'refreshing') {
// Prevent concurrent refreshes
return
}
await flushHistory(this.projectId)
if (this.state === 'init') {
const chunk = await fetchLatestChunk(this.projectId)
this.snapshot = chunk.getSnapshot()
this.snapshot.applyAll(chunk.getChanges())
this.version = chunk.getEndVersion()
} else {
const changes = await fetchLatestChanges(this.projectId, this.version)
this.snapshot.applyAll(changes)
this.version += changes.length
}
this.state = 'ready'
await this.loadDocs()
}
getDocPaths(): string[] {
const allPaths = this.snapshot.getFilePathnames()
return allPaths.filter(path => this.snapshot.getFile(path)?.isEditable())
}
getDocContents(path: string): string | null {
const file = this.snapshot.getFile(path)
if (file == null) {
return null
}
return file.getContent() ?? null
}
private async loadDocs() {
const paths = this.getDocPaths()
const limit = pLimit(DOWNLOAD_BLOBS_CONCURRENCY)
await Promise.all(
paths.map(path =>
limit(async () => {
const file = this.snapshot.getFile(path)
await file?.load('eager', this.blobStore)
})
)
)
}
}
/**
* Blob store that fetches blobs from the history service
*/
class SimpleBlobStore {
private projectId: string
constructor(projectId: string) {
this.projectId = projectId
}
async getString(hash: string): Promise<string> {
return await fetchBlob(this.projectId, hash)
}
async getObject(hash: string) {
const blob = await this.getString(hash)
return JSON.parse(blob)
}
}
async function flushHistory(projectId: string) {
await postJSON(`/project/${projectId}/flush`)
}
async function fetchLatestChunk(projectId: string): Promise<Chunk> {
const response = await getJSON<{ chunk: RawChunk }>(
`/project/${projectId}/latest/history`
)
return Chunk.fromRaw(response.chunk)
}
async function fetchLatestChanges(
projectId: string,
version: number
): Promise<Change[]> {
const response = await getJSON<RawChange[]>(
`/project/${projectId}/changes?since=${version}`
)
return response.map(Change.fromRaw).filter(change => change != null)
}
async function fetchBlob(projectId: string, hash: string): Promise<string> {
const url = `/project/${projectId}/blob/${hash}`
const res = await fetch(url)
if (!res.ok) {
throw new FetchError('Failed to fetch blob', url, undefined, res)
}
return await res.text()
}

View File

@@ -0,0 +1,187 @@
import { expect } from 'chai'
import fetchMock from 'fetch-mock'
import { ProjectSnapshot } from '@/infrastructure/project-snapshot'
describe('ProjectSnapshot', function () {
let snapshot: ProjectSnapshot
const projectId = 'project-id'
beforeEach(function () {
snapshot = new ProjectSnapshot(projectId)
})
describe('before initialization', function () {
describe('getDocPaths()', function () {
it('returns an empty string', function () {
expect(snapshot.getDocPaths()).to.deep.equal([])
})
})
describe('getDocContents()', function () {
it('returns null', function () {
expect(snapshot.getDocContents('main.tex')).to.be.null
})
})
})
const files = {
'main.tex': {
contents: '\\documentclass{article}\netc.',
hash: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
},
'hello.txt': {
contents: 'Hello history!',
hash: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
},
'goodbye.txt': {
contents: "We're done here",
hash: 'dddddddddddddddddddddddddddddddddddddddd',
},
}
const chunk = {
history: {
snapshot: {
files: {},
},
changes: [
{
operations: [
{
pathname: 'hello.txt',
file: {
hash: files['hello.txt'].hash,
stringLength: files['hello.txt'].contents.length,
},
},
{
pathname: 'main.tex',
file: {
hash: files['main.tex'].hash,
stringLength: files['main.tex'].contents.length,
},
},
{
pathname: 'frog.jpg',
file: {
hash: 'cccccccccccccccccccccccccccccccccccccccc',
byteLength: 97080,
},
},
],
timestamp: '2025-01-01T12:00:00.000Z',
},
],
},
startVersion: 0,
}
async function initializeSnapshot() {
fetchMock.postOnce(`/project/${projectId}/flush`, 200)
fetchMock.getOnce(`/project/${projectId}/latest/history`, { chunk })
fetchMock.getOnce(
`/project/${projectId}/blob/${files['main.tex'].hash}`,
files['main.tex'].contents
)
fetchMock.getOnce(
`/project/${projectId}/blob/${files['hello.txt'].hash}`,
files['hello.txt'].contents
)
await snapshot.refresh()
expect(fetchMock.done()).to.be.true
fetchMock.reset()
}
describe('after initialization', function () {
beforeEach(initializeSnapshot)
describe('getDocPaths()', function () {
it('returns the editable docs', function () {
expect(snapshot.getDocPaths()).to.have.members([
'main.tex',
'hello.txt',
])
})
})
describe('getDocContents()', function () {
it('returns the doc contents', function () {
expect(snapshot.getDocContents('main.tex')).to.equal(
files['main.tex'].contents
)
})
it('returns null for binary files', function () {
expect(snapshot.getDocContents('frog.jpg')).to.be.null
})
it('returns null for inexistent files', function () {
expect(snapshot.getDocContents('does-not-exist.txt')).to.be.null
})
})
})
const changes = [
{
operations: [
{
pathname: 'hello.txt',
textOperation: ['Quote: ', files['hello.txt'].contents.length],
},
{
pathname: 'goodbye.txt',
file: {
hash: files['goodbye.txt'].hash,
stringLength: files['goodbye.txt'].contents.length,
},
},
],
timestamp: '2025-01-01T13:00:00.000Z',
},
]
async function refreshSnapshot() {
fetchMock.postOnce(`/project/${projectId}/flush`, 200, { repeat: 2 })
fetchMock.getOnce(`/project/${projectId}/changes?since=1`, changes)
fetchMock.getOnce(
`/project/${projectId}/blob/${files['goodbye.txt'].hash}`,
files['goodbye.txt'].contents
)
await snapshot.refresh()
expect(fetchMock.done()).to.be.true
fetchMock.reset()
}
describe('after refresh', function () {
beforeEach(initializeSnapshot)
beforeEach(refreshSnapshot)
afterEach(function () {
fetchMock.reset()
})
describe('getDocPaths()', function () {
it('returns the editable docs', function () {
expect(snapshot.getDocPaths()).to.have.members([
'main.tex',
'hello.txt',
'goodbye.txt',
])
})
})
describe('getDocCotents()', function () {
it('returns the up to date content', function () {
expect(snapshot.getDocContents('hello.txt')).to.equal(
`Quote: ${files['hello.txt'].contents}`
)
})
it('returns contents of new files', function () {
expect(snapshot.getDocContents('goodbye.txt')).to.equal(
files['goodbye.txt'].contents
)
})
})
})
})