Merge pull request #28672 from overleaf/em-reapply-restore-optimization

Reapply file restore optimization

GitOrigin-RevId: c4479141d890e3e0af746f406a0186bf21f3d918
This commit is contained in:
Eric Mc Sween
2025-09-24 07:49:22 -04:00
committed by Copybot
parent 75cbf3826c
commit 0015f2cc6c
9 changed files with 178 additions and 75 deletions

View File

@@ -294,6 +294,8 @@ class Change {
if (this.v2DocVersions) {
snapshot.updateV2DocVersions(this.v2DocVersions)
}
snapshot.setTimestamp(this.timestamp)
}
/**

View File

@@ -36,7 +36,8 @@ class Snapshot {
return new Snapshot(
FileMap.fromRaw(raw.files),
raw.projectVersion,
V2DocVersions.fromRaw(raw.v2DocVersions)
V2DocVersions.fromRaw(raw.v2DocVersions),
raw.timestamp ? new Date(raw.timestamp) : undefined
)
}
@@ -45,8 +46,15 @@ class Snapshot {
const raw = {
files: this.fileMap.toRaw(),
}
if (this.projectVersion) raw.projectVersion = this.projectVersion
if (this.v2DocVersions) raw.v2DocVersions = this.v2DocVersions.toRaw()
if (this.projectVersion) {
raw.projectVersion = this.projectVersion
}
if (this.v2DocVersions) {
raw.v2DocVersions = this.v2DocVersions.toRaw()
}
if (this.timestamp != null) {
raw.timestamp = this.timestamp.toISOString()
}
return raw
}
@@ -54,13 +62,15 @@ class Snapshot {
* @param {FileMap} [fileMap]
* @param {string} [projectVersion]
* @param {V2DocVersions} [v2DocVersions]
* @param {Date} [timestamp]
*/
constructor(fileMap, projectVersion, v2DocVersions) {
constructor(fileMap, projectVersion, v2DocVersions, timestamp) {
assert.maybe.instance(fileMap, FileMap, 'bad fileMap')
this.fileMap = fileMap || new FileMap({})
this.projectVersion = projectVersion
this.v2DocVersions = v2DocVersions
this.timestamp = timestamp ?? null
}
/**
@@ -109,6 +119,17 @@ class Snapshot {
v2DocVersions.applyTo(this)
}
getTimestamp() {
return this.timestamp
}
/**
* @param {Date} timestamp
*/
setTimestamp(timestamp) {
this.timestamp = timestamp
}
/**
* The underlying file map.
* @return {FileMap}
@@ -268,6 +289,7 @@ class Snapshot {
files: rawFiles,
projectVersion,
v2DocVersions: rawV2DocVersions,
timestamp: this.getTimestamp() ?? undefined,
}
}

View File

@@ -74,6 +74,7 @@ export type RawSnapshot = {
files: RawFileMap
projectVersion?: string
v2DocVersions?: RawV2DocVersions | null
timestamp?: string
}
export type RawHistory = {

View File

@@ -1,10 +1,15 @@
'use strict'
const { expect } = require('chai')
const core = require('..')
const Change = core.Change
const File = core.File
const Operation = core.Operation
const {
Change,
File,
Operation,
AddFileOperation,
Snapshot,
Origin,
RestoreFileOrigin,
} = require('..')
describe('Change', function () {
describe('findBlobHashes', function () {
@@ -37,9 +42,9 @@ describe('Change', function () {
describe('RestoreFileOrigin', function () {
it('should convert to and from raw', function () {
const origin = new core.RestoreFileOrigin(1, 'path', new Date())
const origin = new RestoreFileOrigin(1, 'path', new Date())
const raw = origin.toRaw()
const newOrigin = core.Origin.fromRaw(raw)
const newOrigin = Origin.fromRaw(raw)
expect(newOrigin).to.eql(origin)
})
@@ -56,7 +61,19 @@ describe('Change', function () {
},
})
expect(change.getOrigin()).to.be.an.instanceof(core.RestoreFileOrigin)
expect(change.getOrigin()).to.be.an.instanceof(RestoreFileOrigin)
})
})
describe('applyTo', function () {
it('sets the timestamp on the snapshot', function () {
const snapshot = new Snapshot()
snapshot.addFile('main.tex', File.fromString(''))
const operation = new AddFileOperation('main.tex', File.fromString(''))
const now = new Date()
const change = new Change([operation], now)
change.applyTo(snapshot)
expect(snapshot.getTimestamp().toISOString()).to.equal(now.toISOString())
})
})
})

View File

@@ -28,6 +28,7 @@ const withTmpDir = require('./with_tmp_dir')
const StreamSizeLimit = require('./stream_size_limit')
const { getProjectBlobsBatch } = require('../../storage/lib/blob_store')
const assert = require('../../storage/lib/assert')
const { getChunkMetadataForVersion } = require('../../storage/lib/chunk_store')
const pipeline = promisify(Stream.pipeline)
@@ -396,7 +397,28 @@ async function getSnapshotAtVersion(projectId, version) {
chunk.getChanges(),
chunk.getEndVersion() - version
)
snapshot.applyAll(changes)
if (changes.length > 0) {
snapshot.applyAll(changes)
} else {
// There are no changes in this chunk; we need to look at the previous chunk
// to get the snapshot's timestamp
let chunkMetadata
try {
chunkMetadata = await getChunkMetadataForVersion(projectId, version)
} catch (err) {
if (err instanceof Chunk.VersionNotFoundError) {
// The snapshot is the first snapshot of the first chunk, so we can't
// find a timestamp. This shouldn't happen often. Ignore the error and
// leave the timestamp empty.
} else {
throw err
}
}
snapshot.setTimestamp(chunkMetadata.endTimestamp)
}
return snapshot
}

View File

@@ -108,6 +108,7 @@ describe('persistChanges', function () {
content: '',
},
},
timestamp: thirdChange.getTimestamp().toISOString(),
})
const history = new History(snapshot, [thirdChange])
const currentChunk = new Chunk(history, 2)

View File

@@ -1,6 +1,7 @@
import { expect } from 'chai'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import { readFileSync } from 'node:fs'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
@@ -50,6 +51,12 @@ describe('LatestSnapshot', function () {
.get(`/api/projects/${this.historyId}/latest/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
const fixtureData = JSON.parse(
readFileSync(fixture('chunks/0-3.json'), 'utf8')
)
const changes = fixtureData.chunk.history.changes
const lastTimestamp = changes[changes.length - 1].timestamp
ProjectHistoryClient.getLatestSnapshot(this.projectId, (error, body) => {
if (error) {
throw error
@@ -69,6 +76,7 @@ describe('LatestSnapshot', function () {
operations: [{ textOperation: [26, '\n\nFour five six'] }],
},
},
timestamp: lastTimestamp,
},
version: 3,
})

View File

@@ -7,7 +7,6 @@ import EditorController from '../Editor/EditorController.js'
import Errors from '../Errors/Errors.js'
import moment from 'moment'
import { callbackifyAll } from '@overleaf/promise-utils'
import { fetchJson } from '@overleaf/fetch-utils'
import ProjectLocator from '../Project/ProjectLocator.js'
import DocumentUpdaterHandler from '../DocumentUpdater/DocumentUpdaterHandler.js'
import ChatApiHandler from '../Chat/ChatApiHandler.js'
@@ -69,6 +68,13 @@ const RestoreManager = {
)
const snapshot = Snapshot.fromRaw(snapshotRaw)
const origin = options.origin ?? {
kind: 'file-restore',
path: pathname,
version,
timestamp: snapshot.getTimestamp()?.toISOString(),
}
return await RestoreManager._revertSingleFile(
userId,
projectId,
@@ -76,7 +82,7 @@ const RestoreManager = {
pathname,
threadIds,
snapshot,
options
{ origin }
)
},
@@ -131,17 +137,9 @@ const RestoreManager = {
})
.catch(() => null)
const updates = await RestoreManager._getUpdatesFromHistory(
projectId,
version
)
const updateAtVersion = updates.find(update => update.toV === version)
const origin = options.origin || {
kind: 'file-restore',
path: pathname,
version,
timestamp: new Date(updateAtVersion.meta.end_ts).toISOString(),
const snapshotFile = projectSnapshotAtVersion.getFile(pathname)
if (!snapshotFile) {
throw new OError('file not found in snapshot', { pathname })
}
const importInfo = await FileSystemImportManager.promises.importFile(
@@ -162,7 +160,7 @@ const RestoreManager = {
projectId,
file.element._id,
file.type,
origin,
options.origin,
userId
)
@@ -177,25 +175,24 @@ const RestoreManager = {
threadIds.delete(file.element._id.toString())
}
const snapshotFile = projectSnapshotAtVersion.getFile(pathname)
if (!snapshotFile) {
throw new OError('file not found in snapshot', { pathname })
}
// Look for metadata indicating a linked file.
const fileMetadata = snapshotFile.getMetadata()
const isFileMetadata = fileMetadata && 'provider' in fileMetadata
logger.debug({ fileMetadata }, 'metadata from history')
if (importInfo.type === 'file' || isFileMetadata) {
if (
!snapshotFile.isEditable() ||
importInfo.type === 'file' ||
isFileMetadata
) {
const newFile = await EditorController.promises.upsertFile(
projectId,
parentFolderId,
basename,
fsPath,
fileMetadata,
origin,
options.origin,
userId
)
@@ -296,13 +293,17 @@ const RestoreManager = {
newCommentThreadData
)
const lines = snapshotFile
.getContent({ filterTrackedDeletes: true })
.split('\n')
const { _id } = await EditorController.promises.addDocWithRanges(
projectId,
parentFolderId,
basename,
importInfo.lines,
lines,
newRanges,
origin,
options.origin,
userId
)
@@ -362,31 +363,21 @@ const RestoreManager = {
throw new OError('project does not have ranges support', { projectId })
}
// Get project paths at version
const pathsAtPastVersion = await RestoreManager._getProjectPathsAtVersion(
projectId,
version
)
const updates = await RestoreManager._getUpdatesFromHistory(
projectId,
version
)
const updateAtVersion = updates.find(update => update.toV === version)
const origin = {
kind: 'project-restore',
version,
timestamp: new Date(updateAtVersion.meta.end_ts).toISOString(),
}
const threadIds = await getCommentThreadIds(projectId)
const snapshotRaw = await HistoryManager.promises.getContentAtVersion(
projectId,
version
)
const snapshot = Snapshot.fromRaw(snapshotRaw)
const pathsAtPastVersion = snapshot.getFilePathnames()
const origin = {
kind: 'project-restore',
version,
timestamp: snapshot.getTimestamp()?.toISOString(),
}
const threadIds = await getCommentThreadIds(projectId)
const reverted = []
for (const pathname of pathsAtPastVersion) {
const res = await RestoreManager._revertSingleFile(
@@ -437,18 +428,6 @@ const RestoreManager = {
}/project/${projectId}/version/${version}/${encodeURIComponent(pathname)}`
return await FileWriter.promises.writeUrlToDisk(projectId, url)
},
async _getUpdatesFromHistory(projectId, version) {
const url = `${Settings.apis.project_history.url}/project/${projectId}/updates?before=${version}&min_count=1`
const res = await fetchJson(url)
return res.updates
},
async _getProjectPathsAtVersion(projectId, version) {
const url = `${Settings.apis.project_history.url}/project/${projectId}/paths/version/${version}`
const res = await fetchJson(url)
return res.paths
},
}
export default { ...callbackifyAll(RestoreManager), promises: RestoreManager }

View File

@@ -69,6 +69,7 @@ describe('RestoreManager', function () {
},
},
},
timestamp: new Date().toISOString(),
}),
},
}),
@@ -161,12 +162,22 @@ describe('RestoreManager', function () {
getFile: pathname => ({
getStringLength: sinon.stub().returns(100),
getByteLength: sinon.stub().returns(100),
getContent: sinon.stub().returns('file content'),
getContent: sinon.stub().returns('foo\nbar\nbaz'),
isEditable: sinon.stub().returns(true),
getMetadata: sinon
.stub()
.returns(snapshotData?.files?.[pathname]?.metadata),
}),
getFilePathnames: sinon
.stub()
.returns(Object.keys(snapshotData.files || {})),
getTimestamp: sinon
.stub()
.returns(
snapshotData?.timestamp
? new Date(snapshotData.timestamp)
: new Date()
),
})),
},
getDocUpdaterCompatibleRanges: (ctx.getDocUpdaterCompatibleRanges = sinon
@@ -189,9 +200,7 @@ describe('RestoreManager', function () {
meta: { end_ts: new Date('2024-01-01T00:00:00.000Z') },
},
])
ctx.RestoreManager.promises._getProjectPathsAtVersion = sinon
.stub()
.resolves([])
ctx.RestoreManager.promises._writeFileVersionToDisk = sinon
.stub()
.resolves('/tmp/mock-file-path')
@@ -699,9 +708,17 @@ describe('RestoreManager', function () {
{
getFile: sinon.stub().returns({
getMetadata: sinon.stub().returns(undefined),
getContent: sinon.stub().returns('file content'),
getContent: sinon.stub().returns('foo\nbar\nbaz'),
isEditable: sinon.stub().returns(true),
}),
},
{
origin: {
kind: 'file-restore',
path: 'foo.tex',
timestamp: new Date(ctx.endTs).toISOString(),
version: 42,
},
}
)
})
@@ -770,9 +787,17 @@ describe('RestoreManager', function () {
{
getFile: sinon.stub().returns({
getMetadata: sinon.stub().returns(undefined),
getContent: sinon.stub().returns('file content'),
getContent: sinon.stub().returns('foo\nbar\nbaz'),
isEditable: sinon.stub().returns(true),
}),
},
{
origin: {
kind: 'file-restore',
path: 'foo.tex',
timestamp: new Date(ctx.endTs).toISOString(),
version: 42,
},
}
)
})
@@ -1128,15 +1153,41 @@ describe('RestoreManager', function () {
describe('for a project with overlap in current files and old files', function () {
beforeEach(async function (ctx) {
ctx.HistoryManager.promises.getContentAtVersion = sinon
.stub()
.resolves({
files: {
'main.tex': {
hash: 'abcdef1234567890abcdef1234567890abcdef12',
stringLength: 100,
metadata: {
editorId: 'test-editor',
},
},
'figures/image.png': {
hash: 'abcdef1234567890abcdef1234567890abcdef12',
stringLength: 100,
metadata: {
provider: 'bar',
},
},
'since-deleted.tex': {
hash: 'abcdef1234567890abcdef1234567890abcdef12',
stringLength: 100,
metadata: {
editorId: 'test-editor',
},
},
},
timestamp: new Date().toISOString(),
})
ctx.ProjectEntityHandler.promises.getAllEntities = sinon
.stub()
.resolves({
docs: [{ path: '/main.tex' }, { path: '/new-file.tex' }],
files: [{ path: '/figures/image.png' }],
})
ctx.RestoreManager.promises._getProjectPathsAtVersion = sinon
.stub()
.resolves(['main.tex', 'figures/image.png', 'since-deleted.tex'])
await ctx.RestoreManager.promises.revertProject(
ctx.user_id,