mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 17:19:37 +02:00
Merge pull request #23398 from overleaf/em-log-doc-hash-mismatches
Validate content hashes in history (log only) GitOrigin-RevId: ed772fc4e4d0aa9e980f9693a759647bd937e13a
This commit is contained in:
@@ -100,6 +100,9 @@ class Change {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {Operation[]}
|
||||
*/
|
||||
getOperations() {
|
||||
return this.operations
|
||||
}
|
||||
@@ -248,6 +251,24 @@ class Change {
|
||||
* @param {boolean} [opts.strict] - Do not ignore recoverable errors
|
||||
*/
|
||||
applyTo(snapshot, opts = {}) {
|
||||
// eslint-disable-next-line no-unused-vars
|
||||
for (const operation of this.iterativelyApplyTo(snapshot, opts)) {
|
||||
// Nothing to do: we're just consuming the iterator for the side effects
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generator that applies this change to a snapshot and yields each
|
||||
* operation after it has been applied.
|
||||
*
|
||||
* Recoverable errors (caused by historical bad data) are ignored unless
|
||||
* opts.strict is true
|
||||
*
|
||||
* @param {Snapshot} snapshot modified in place
|
||||
* @param {object} opts
|
||||
* @param {boolean} [opts.strict] - Do not ignore recoverable errors
|
||||
*/
|
||||
*iterativelyApplyTo(snapshot, opts = {}) {
|
||||
assert.object(snapshot, 'bad snapshot')
|
||||
|
||||
for (const operation of this.operations) {
|
||||
@@ -261,6 +282,7 @@ class Change {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
yield operation
|
||||
}
|
||||
|
||||
// update project version if present in change
|
||||
|
||||
18
services/history-v1/storage/lib/content_hash.js
Normal file
18
services/history-v1/storage/lib/content_hash.js
Normal file
@@ -0,0 +1,18 @@
|
||||
// @ts-check
|
||||
|
||||
const { createHash } = require('node:crypto')
|
||||
|
||||
/**
|
||||
* Compute a SHA-1 hash of the content
|
||||
*
|
||||
* This is used to validate incoming updates.
|
||||
*
|
||||
* @param {string} content
|
||||
*/
|
||||
function getContentHash(content) {
|
||||
const hash = createHash('sha-1')
|
||||
hash.update(content)
|
||||
return hash.digest('hex')
|
||||
}
|
||||
|
||||
module.exports = { getContentHash }
|
||||
5
services/history-v1/storage/lib/errors.js
Normal file
5
services/history-v1/storage/lib/errors.js
Normal file
@@ -0,0 +1,5 @@
|
||||
const OError = require('@overleaf/o-error')
|
||||
|
||||
class InvalidChangeError extends OError {}
|
||||
|
||||
module.exports = { InvalidChangeError }
|
||||
@@ -3,6 +3,7 @@
|
||||
'use strict'
|
||||
|
||||
const _ = require('lodash')
|
||||
const logger = require('@overleaf/logger')
|
||||
|
||||
const core = require('overleaf-editor-core')
|
||||
const Chunk = core.Chunk
|
||||
@@ -10,6 +11,9 @@ const History = core.History
|
||||
|
||||
const assert = require('./assert')
|
||||
const chunkStore = require('./chunk_store')
|
||||
const { BlobStore } = require('./blob_store')
|
||||
const { InvalidChangeError } = require('./errors')
|
||||
const { getContentHash } = require('./content_hash')
|
||||
|
||||
function countChangeBytes(change) {
|
||||
// Note: This is not quite accurate, because the raw change may contain raw
|
||||
@@ -59,10 +63,18 @@ async function persistChanges(projectId, allChanges, limits, clientEndVersion) {
|
||||
assert.maybe.object(limits)
|
||||
assert.integer(clientEndVersion)
|
||||
|
||||
const blobStore = new BlobStore(projectId)
|
||||
|
||||
let currentChunk
|
||||
// currentSnapshot tracks the latest change that we're applying; we use it to
|
||||
// check that the changes we are persisting are valid.
|
||||
|
||||
/**
|
||||
* currentSnapshot tracks the latest change that we're applying; we use it to
|
||||
* check that the changes we are persisting are valid.
|
||||
*
|
||||
* @type {core.Snapshot}
|
||||
*/
|
||||
let currentSnapshot
|
||||
|
||||
let originalEndVersion
|
||||
let changesToPersist
|
||||
|
||||
@@ -83,22 +95,95 @@ async function persistChanges(projectId, allChanges, limits, clientEndVersion) {
|
||||
}
|
||||
}
|
||||
|
||||
function fillChunk(chunk, changes) {
|
||||
/**
|
||||
* Add changes to a chunk until the chunk is full
|
||||
*
|
||||
* The chunk is full if it reaches a certain number of changes or a certain
|
||||
* size in bytes
|
||||
*
|
||||
* @param {core.Chunk} chunk
|
||||
* @param {core.Change[]} changes
|
||||
*/
|
||||
async function fillChunk(chunk, changes) {
|
||||
let totalBytes = totalChangeBytes(chunk.getChanges())
|
||||
let changesPushed = false
|
||||
while (changes.length > 0) {
|
||||
if (chunk.getChanges().length >= limits.maxChunkChanges) break
|
||||
const changeBytes = countChangeBytes(changes[0])
|
||||
if (totalBytes + changeBytes > limits.maxChunkChangeBytes) break
|
||||
const changesToFill = changes.splice(0, 1)
|
||||
currentSnapshot.applyAll(changesToFill, { strict: true })
|
||||
chunk.pushChanges(changesToFill)
|
||||
if (chunk.getChanges().length >= limits.maxChunkChanges) {
|
||||
break
|
||||
}
|
||||
|
||||
const change = changes[0]
|
||||
const changeBytes = countChangeBytes(change)
|
||||
|
||||
if (totalBytes + changeBytes > limits.maxChunkChangeBytes) {
|
||||
break
|
||||
}
|
||||
|
||||
for (const operation of change.iterativelyApplyTo(currentSnapshot, {
|
||||
strict: true,
|
||||
})) {
|
||||
try {
|
||||
await validateContentHash(operation)
|
||||
} catch (err) {
|
||||
// Temporary: skip validation errors
|
||||
if (err instanceof InvalidChangeError) {
|
||||
logger.warn(
|
||||
{ err, projectId },
|
||||
'content snapshot mismatch (ignored)'
|
||||
)
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunk.pushChanges([change])
|
||||
changes.shift()
|
||||
totalBytes += changeBytes
|
||||
changesPushed = true
|
||||
}
|
||||
return changesPushed
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the operation is valid and can be incorporated to the history.
|
||||
*
|
||||
* For now, this checks content hashes when they are provided.
|
||||
*
|
||||
* @param {core.Operation} operation
|
||||
*/
|
||||
async function validateContentHash(operation) {
|
||||
if (operation instanceof core.EditFileOperation) {
|
||||
const editOperation = operation.getOperation()
|
||||
if (
|
||||
editOperation instanceof core.TextOperation &&
|
||||
editOperation.contentHash != null
|
||||
) {
|
||||
const path = operation.getPathname()
|
||||
const file = currentSnapshot.getFile(path)
|
||||
if (file == null) {
|
||||
throw new InvalidChangeError('file not found for hash validation', {
|
||||
projectId,
|
||||
path,
|
||||
})
|
||||
}
|
||||
await file.load('eager', blobStore)
|
||||
const content = file.getContent({ filterTrackedDeletes: true })
|
||||
const expectedHash = editOperation.contentHash
|
||||
const actualHash = content != null ? getContentHash(content) : null
|
||||
logger.debug({ expectedHash, actualHash }, 'validating content hash')
|
||||
if (actualHash !== expectedHash) {
|
||||
throw new InvalidChangeError('content hash mismatch', {
|
||||
projectId,
|
||||
path,
|
||||
expectedHash,
|
||||
actualHash,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function extendLastChunkIfPossible() {
|
||||
const latestChunk = await chunkStore.loadLatest(projectId)
|
||||
|
||||
@@ -115,7 +200,11 @@ async function persistChanges(projectId, allChanges, limits, clientEndVersion) {
|
||||
const timer = new Timer()
|
||||
currentSnapshot.applyAll(latestChunk.getChanges())
|
||||
|
||||
if (!fillChunk(currentChunk, changesToPersist)) return
|
||||
const changesPushed = await fillChunk(currentChunk, changesToPersist)
|
||||
if (!changesPushed) {
|
||||
return
|
||||
}
|
||||
|
||||
checkElapsedTime(timer)
|
||||
|
||||
await chunkStore.update(projectId, originalEndVersion, currentChunk)
|
||||
@@ -127,7 +216,9 @@ async function persistChanges(projectId, allChanges, limits, clientEndVersion) {
|
||||
const history = new History(currentSnapshot.clone(), [])
|
||||
const chunk = new Chunk(history, endVersion)
|
||||
const timer = new Timer()
|
||||
if (fillChunk(chunk, changesToPersist)) {
|
||||
|
||||
const changesPushed = await fillChunk(chunk, changesToPersist)
|
||||
if (changesPushed) {
|
||||
checkElapsedTime(timer)
|
||||
currentChunk = chunk
|
||||
await chunkStore.create(projectId, chunk)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
'use strict'
|
||||
|
||||
const { createHash } = require('node:crypto')
|
||||
const { expect } = require('chai')
|
||||
|
||||
const cleanup = require('./support/cleanup')
|
||||
@@ -11,6 +12,8 @@ const persistChanges = storage.persistChanges
|
||||
|
||||
const core = require('overleaf-editor-core')
|
||||
const AddFileOperation = core.AddFileOperation
|
||||
const EditFileOperation = core.EditFileOperation
|
||||
const TextOperation = core.TextOperation
|
||||
const Change = core.Change
|
||||
const Chunk = core.Chunk
|
||||
const File = core.File
|
||||
@@ -28,7 +31,7 @@ describe('persistChanges', function () {
|
||||
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
|
||||
})
|
||||
|
||||
it('persists changes', function () {
|
||||
it('persists changes', async function () {
|
||||
const limitsToPersistImmediately = {
|
||||
minChangeTimestamp: farFuture,
|
||||
maxChangeTimestamp: farFuture,
|
||||
@@ -41,29 +44,29 @@ describe('persistChanges', function () {
|
||||
)
|
||||
const changes = [change]
|
||||
|
||||
return chunkStore
|
||||
.initializeProject(projectId)
|
||||
.then(() => {
|
||||
return persistChanges(projectId, changes, limitsToPersistImmediately, 0)
|
||||
})
|
||||
.then(result => {
|
||||
const history = new History(new Snapshot(), changes)
|
||||
const currentChunk = new Chunk(history, 0)
|
||||
expect(result).to.deep.equal({
|
||||
numberOfChangesPersisted: 1,
|
||||
originalEndVersion: 0,
|
||||
currentChunk,
|
||||
})
|
||||
return chunkStore.loadLatest(projectId)
|
||||
})
|
||||
.then(chunk => {
|
||||
expect(chunk.getStartVersion()).to.equal(0)
|
||||
expect(chunk.getEndVersion()).to.equal(1)
|
||||
expect(chunk.getChanges().length).to.equal(1)
|
||||
})
|
||||
await chunkStore.initializeProject(projectId)
|
||||
const result = await persistChanges(
|
||||
projectId,
|
||||
changes,
|
||||
limitsToPersistImmediately,
|
||||
0
|
||||
)
|
||||
|
||||
const history = new History(new Snapshot(), changes)
|
||||
const currentChunk = new Chunk(history, 0)
|
||||
expect(result).to.deep.equal({
|
||||
numberOfChangesPersisted: 1,
|
||||
originalEndVersion: 0,
|
||||
currentChunk,
|
||||
})
|
||||
|
||||
const chunk = await chunkStore.loadLatest(projectId)
|
||||
expect(chunk.getStartVersion()).to.equal(0)
|
||||
expect(chunk.getEndVersion()).to.equal(1)
|
||||
expect(chunk.getChanges().length).to.equal(1)
|
||||
})
|
||||
|
||||
it('persists changes in two chunks', function () {
|
||||
it('persists changes in two chunks', async function () {
|
||||
const limitsToPersistImmediately = {
|
||||
maxChunkChanges: 1,
|
||||
minChangeTimestamp: farFuture,
|
||||
@@ -82,36 +85,36 @@ describe('persistChanges', function () {
|
||||
)
|
||||
const changes = [firstChange, secondChange]
|
||||
|
||||
return chunkStore
|
||||
.initializeProject(projectId)
|
||||
.then(() => {
|
||||
return persistChanges(projectId, changes, limitsToPersistImmediately, 0)
|
||||
})
|
||||
.then(result => {
|
||||
const snapshot = Snapshot.fromRaw({
|
||||
files: {
|
||||
'a.tex': {
|
||||
content: '',
|
||||
},
|
||||
},
|
||||
})
|
||||
const history = new History(snapshot, [secondChange])
|
||||
const currentChunk = new Chunk(history, 1)
|
||||
expect(result).to.deep.equal({
|
||||
numberOfChangesPersisted: 2,
|
||||
originalEndVersion: 0,
|
||||
currentChunk,
|
||||
})
|
||||
return chunkStore.loadLatest(projectId)
|
||||
})
|
||||
.then(chunk => {
|
||||
expect(chunk.getStartVersion()).to.equal(1)
|
||||
expect(chunk.getEndVersion()).to.equal(2)
|
||||
expect(chunk.getChanges().length).to.equal(1)
|
||||
})
|
||||
await chunkStore.initializeProject(projectId)
|
||||
const result = await persistChanges(
|
||||
projectId,
|
||||
changes,
|
||||
limitsToPersistImmediately,
|
||||
0
|
||||
)
|
||||
|
||||
const snapshot = Snapshot.fromRaw({
|
||||
files: {
|
||||
'a.tex': {
|
||||
content: '',
|
||||
},
|
||||
},
|
||||
})
|
||||
const history = new History(snapshot, [secondChange])
|
||||
const currentChunk = new Chunk(history, 1)
|
||||
expect(result).to.deep.equal({
|
||||
numberOfChangesPersisted: 2,
|
||||
originalEndVersion: 0,
|
||||
currentChunk,
|
||||
})
|
||||
|
||||
const chunk = await chunkStore.loadLatest(projectId)
|
||||
expect(chunk.getStartVersion()).to.equal(1)
|
||||
expect(chunk.getEndVersion()).to.equal(2)
|
||||
expect(chunk.getChanges().length).to.equal(1)
|
||||
})
|
||||
|
||||
it('persists the snapshot at the start of the chunk', function () {
|
||||
it('persists the snapshot at the start of the chunk', async function () {
|
||||
const limitsToPersistImmediately = {
|
||||
maxChunkChanges: 2,
|
||||
minChangeTimestamp: farFuture,
|
||||
@@ -130,29 +133,29 @@ describe('persistChanges', function () {
|
||||
)
|
||||
const changes = [firstChange, secondChange]
|
||||
|
||||
return chunkStore
|
||||
.initializeProject(projectId)
|
||||
.then(() => {
|
||||
return persistChanges(projectId, changes, limitsToPersistImmediately, 0)
|
||||
})
|
||||
.then(result => {
|
||||
const history = new History(new Snapshot(), changes)
|
||||
const currentChunk = new Chunk(history, 0)
|
||||
expect(result).to.deep.equal({
|
||||
numberOfChangesPersisted: 2,
|
||||
originalEndVersion: 0,
|
||||
currentChunk,
|
||||
})
|
||||
return chunkStore.loadLatest(projectId)
|
||||
})
|
||||
.then(chunk => {
|
||||
expect(chunk.getStartVersion()).to.equal(0)
|
||||
expect(chunk.getEndVersion()).to.equal(2)
|
||||
expect(chunk.getChanges().length).to.equal(2)
|
||||
})
|
||||
await chunkStore.initializeProject(projectId)
|
||||
const result = await persistChanges(
|
||||
projectId,
|
||||
changes,
|
||||
limitsToPersistImmediately,
|
||||
0
|
||||
)
|
||||
|
||||
const history = new History(new Snapshot(), changes)
|
||||
const currentChunk = new Chunk(history, 0)
|
||||
expect(result).to.deep.equal({
|
||||
numberOfChangesPersisted: 2,
|
||||
originalEndVersion: 0,
|
||||
currentChunk,
|
||||
})
|
||||
|
||||
const chunk = await chunkStore.loadLatest(projectId)
|
||||
expect(chunk.getStartVersion()).to.equal(0)
|
||||
expect(chunk.getEndVersion()).to.equal(2)
|
||||
expect(chunk.getChanges().length).to.equal(2)
|
||||
})
|
||||
|
||||
it("errors if the version doesn't match the latest chunk", function () {
|
||||
it("errors if the version doesn't match the latest chunk", async function () {
|
||||
const limitsToPersistImmediately = {
|
||||
minChangeTimestamp: farFuture,
|
||||
maxChangeTimestamp: farFuture,
|
||||
@@ -169,18 +172,82 @@ describe('persistChanges', function () {
|
||||
[]
|
||||
)
|
||||
const changes = [firstChange, secondChange]
|
||||
return chunkStore
|
||||
.initializeProject(projectId)
|
||||
.then(() => {
|
||||
return persistChanges(projectId, changes, limitsToPersistImmediately, 1)
|
||||
})
|
||||
.then(() => {
|
||||
expect.fail()
|
||||
})
|
||||
.catch(err => {
|
||||
expect(err.message).to.equal(
|
||||
'client sent updates with end_version 1 but latest chunk has end_version 0'
|
||||
)
|
||||
})
|
||||
|
||||
await chunkStore.initializeProject(projectId)
|
||||
await expect(
|
||||
persistChanges(projectId, changes, limitsToPersistImmediately, 1)
|
||||
).to.be.rejectedWith(
|
||||
'client sent updates with end_version 1 but latest chunk has end_version 0'
|
||||
)
|
||||
})
|
||||
|
||||
describe('content hash validation', function () {
|
||||
it('acccepts a change with a valid hash', async function () {
|
||||
const limitsToPersistImmediately = {
|
||||
minChangeTimestamp: farFuture,
|
||||
maxChangeTimestamp: farFuture,
|
||||
}
|
||||
|
||||
const projectId = fixtures.docs.uninitializedProject.id
|
||||
await chunkStore.initializeProject(projectId)
|
||||
const textOperation = new TextOperation()
|
||||
textOperation.insert('hello ')
|
||||
textOperation.retain(5)
|
||||
textOperation.contentHash = hashString('hello world')
|
||||
const change = new Change(
|
||||
[
|
||||
new AddFileOperation('a.tex', File.fromString('world')),
|
||||
new EditFileOperation('a.tex', textOperation),
|
||||
],
|
||||
new Date(),
|
||||
[]
|
||||
)
|
||||
const changes = [change]
|
||||
|
||||
const result = await persistChanges(
|
||||
projectId,
|
||||
changes,
|
||||
limitsToPersistImmediately,
|
||||
0
|
||||
)
|
||||
expect(result.numberOfChangesPersisted).to.equal(1)
|
||||
})
|
||||
|
||||
it('acccepts a change with an invalid hash (only logs for now)', async function () {
|
||||
const limitsToPersistImmediately = {
|
||||
minChangeTimestamp: farFuture,
|
||||
maxChangeTimestamp: farFuture,
|
||||
}
|
||||
|
||||
const projectId = fixtures.docs.uninitializedProject.id
|
||||
await chunkStore.initializeProject(projectId)
|
||||
const textOperation = new TextOperation()
|
||||
textOperation.insert('hello ')
|
||||
textOperation.retain(5)
|
||||
textOperation.contentHash = hashString('bad hash')
|
||||
const change = new Change(
|
||||
[
|
||||
new AddFileOperation('a.tex', File.fromString('world')),
|
||||
new EditFileOperation('a.tex', textOperation),
|
||||
],
|
||||
new Date(),
|
||||
[]
|
||||
)
|
||||
const changes = [change]
|
||||
|
||||
const result = await persistChanges(
|
||||
projectId,
|
||||
changes,
|
||||
limitsToPersistImmediately,
|
||||
0
|
||||
)
|
||||
expect(result.numberOfChangesPersisted).to.equal(1)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
function hashString(s) {
|
||||
const hash = createHash('sha-1')
|
||||
hash.update(s)
|
||||
return hash.digest('hex')
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user