mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 09:09:36 +02:00
Merge pull request #32780 from overleaf/bg-history-v1-check-chunk-script
add script for checking chunks with resyncs GitOrigin-RevId: bf17bf262cf0a691a66c0d23256d53b606007461
This commit is contained in:
290
services/history-v1/storage/scripts/check_chunk.mjs
Normal file
290
services/history-v1/storage/scripts/check_chunk.mjs
Normal file
@@ -0,0 +1,290 @@
|
||||
import commandLineArgs from 'command-line-args'
|
||||
import {
|
||||
loadAtVersion,
|
||||
getProjectChunksFromVersion,
|
||||
} from '../lib/chunk_store/index.js'
|
||||
import { client } from '../lib/mongodb.js'
|
||||
import knex from '../lib/knex.js'
|
||||
import redis from '../lib/redis.js'
|
||||
import { loadGlobalBlobs, BlobStore } from '../lib/blob_store/index.js'
|
||||
import { getContentHash } from '../lib/content_hash.js'
|
||||
import core from 'overleaf-editor-core'
|
||||
import Events from 'node:events'
|
||||
Events.setMaxListeners(20)
|
||||
|
||||
const { StringFileData, LazyStringFileData } = core
|
||||
|
||||
const optionDefinitions = [
|
||||
{ name: 'historyId', alias: 'p', type: String },
|
||||
{ name: 'version', alias: 'v', type: Number },
|
||||
{ name: 'persistedOnly', alias: 'o', type: Boolean },
|
||||
]
|
||||
|
||||
async function ensureFileLoaded(file, blobStore, currentVersion, path) {
|
||||
if (
|
||||
typeof file.load === 'function' &&
|
||||
file.data instanceof LazyStringFileData
|
||||
) {
|
||||
if (file.data.rangesHash) {
|
||||
console.log(
|
||||
`Loading rangesHash ${file.data.rangesHash} for ${path} at version ${currentVersion}`
|
||||
)
|
||||
} else {
|
||||
console.log(
|
||||
`No rangesHash found for ${path} at version ${currentVersion}`
|
||||
)
|
||||
}
|
||||
await file.load('eager', blobStore)
|
||||
console.log('=> file', file.toRaw())
|
||||
}
|
||||
}
|
||||
|
||||
function checkFileTrackedChanges(path, file, currentVersion) {
|
||||
let violations = false
|
||||
const positions = []
|
||||
if (!(file.data instanceof StringFileData)) {
|
||||
return { violations, positions }
|
||||
}
|
||||
|
||||
let tcList
|
||||
try {
|
||||
tcList = file.getTrackedChanges()
|
||||
} catch (err) {
|
||||
return { violations, positions }
|
||||
}
|
||||
|
||||
if (!tcList) return { violations, positions }
|
||||
|
||||
const changesArr = Array.from(tcList)
|
||||
let prevTc = null
|
||||
|
||||
for (const tc of changesArr) {
|
||||
positions.push(`(${tc.range.start}, ${tc.range.end})`)
|
||||
if (prevTc) {
|
||||
if (prevTc.range.start > tc.range.start) {
|
||||
console.error(
|
||||
`VIOLATION: Unsorted ranges in ${path} at version ${currentVersion}: [${prevTc.range.start}, ${prevTc.range.end}] comes before [${tc.range.start}, ${tc.range.end}]`
|
||||
)
|
||||
violations = true
|
||||
}
|
||||
if (prevTc.range.overlaps(tc.range)) {
|
||||
console.error(
|
||||
`VIOLATION: Overlapping ranges in ${path} at version ${currentVersion}: [${prevTc.range.start}, ${prevTc.range.end}] overlaps [${tc.range.start}, ${tc.range.end}]`
|
||||
)
|
||||
violations = true
|
||||
}
|
||||
}
|
||||
prevTc = tc
|
||||
}
|
||||
|
||||
return { violations, positions }
|
||||
}
|
||||
|
||||
async function checkSnapshot(snapshot, blobStore, currentVersion) {
|
||||
let containsViolations = false
|
||||
const pathnames = snapshot.getFilePathnames()
|
||||
const diagnostics = []
|
||||
|
||||
for (const path of pathnames) {
|
||||
const file = snapshot.getFile(path)
|
||||
if (!file) continue
|
||||
|
||||
try {
|
||||
await ensureFileLoaded(file, blobStore, currentVersion, path)
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`Failed to load file ${path} at version ${currentVersion}:`,
|
||||
err
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
const { violations, positions } = checkFileTrackedChanges(
|
||||
path,
|
||||
file,
|
||||
currentVersion
|
||||
)
|
||||
if (violations) containsViolations = true
|
||||
if (positions.length > 0) {
|
||||
diagnostics.push(` ${path}: changes at [${positions.join(', ')}]`)
|
||||
}
|
||||
}
|
||||
|
||||
if (diagnostics.length > 0) {
|
||||
console.log(`Version ${currentVersion} tracked changes summary:`)
|
||||
console.log(diagnostics.join('\n'))
|
||||
}
|
||||
|
||||
return containsViolations
|
||||
}
|
||||
|
||||
async function validateContentHash(
|
||||
operation,
|
||||
snapshot,
|
||||
currentVersion,
|
||||
blobStore
|
||||
) {
|
||||
if (operation instanceof core.EditFileOperation) {
|
||||
const editOperation = operation.getOperation()
|
||||
if (
|
||||
editOperation instanceof core.TextOperation &&
|
||||
editOperation.contentHash != null
|
||||
) {
|
||||
const path = operation.getPathname()
|
||||
const file = snapshot.getFile(path)
|
||||
if (file == null) {
|
||||
console.error(
|
||||
`VIOLATION: file ${path} not found for hash validation at version ${currentVersion}`
|
||||
)
|
||||
return true
|
||||
}
|
||||
try {
|
||||
await ensureFileLoaded(file, blobStore, currentVersion, path)
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`Failed to load file ${path} for hash validation at version ${currentVersion}:`,
|
||||
err
|
||||
)
|
||||
return true
|
||||
}
|
||||
const content = file.getContent({ filterTrackedDeletes: true })
|
||||
const expectedHash = editOperation.contentHash
|
||||
const actualHash = content != null ? getContentHash(content) : null
|
||||
|
||||
if (actualHash !== expectedHash) {
|
||||
console.error(
|
||||
`VIOLATION: content hash mismatch in ${path} at version ${currentVersion}: expected ${expectedHash}, got ${actualHash}`
|
||||
)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
async function checkChunkChanges(historyId, chunk) {
|
||||
const snapshot = chunk.getSnapshot().clone()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const changes = chunk.getChanges()
|
||||
let currentVersion = chunk.getStartVersion()
|
||||
console.log(
|
||||
`Checking chunk starting at version ${currentVersion} with ${changes.length} changes.`
|
||||
)
|
||||
|
||||
const initialViolations = await checkSnapshot(
|
||||
snapshot,
|
||||
blobStore,
|
||||
currentVersion
|
||||
)
|
||||
if (initialViolations) {
|
||||
console.log(
|
||||
`Tracked changes corrupted at initial snapshot version ${currentVersion}`
|
||||
)
|
||||
}
|
||||
|
||||
for (const change of changes) {
|
||||
currentVersion += 1
|
||||
let localViolations = false
|
||||
if (change?.origin?.kind === 'history-resync') {
|
||||
console.log('-'.repeat(16), 'history-resync', '-'.repeat(16))
|
||||
}
|
||||
console.log(
|
||||
`Version ${currentVersion} change:`,
|
||||
JSON.stringify(change.toRaw())
|
||||
)
|
||||
if (change?.origin?.kind === 'history-resync') {
|
||||
process.exit()
|
||||
}
|
||||
|
||||
try {
|
||||
for (const _operation of change.iterativelyApplyTo(snapshot, {
|
||||
strict: true,
|
||||
})) {
|
||||
console.log(
|
||||
`Version ${currentVersion} operation:`,
|
||||
JSON.stringify(_operation.toRaw())
|
||||
)
|
||||
const hashErr = await validateContentHash(
|
||||
_operation,
|
||||
snapshot,
|
||||
currentVersion,
|
||||
blobStore
|
||||
)
|
||||
if (hashErr) localViolations = true
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Failed to apply change at version ${currentVersion}:`, err)
|
||||
continue
|
||||
}
|
||||
|
||||
const snapViolations = await checkSnapshot(
|
||||
snapshot,
|
||||
blobStore,
|
||||
currentVersion
|
||||
)
|
||||
if (snapViolations) localViolations = true
|
||||
|
||||
if (localViolations) {
|
||||
console.log(
|
||||
`Tracked changes corrupted or hash mismatch at version ${currentVersion}`
|
||||
)
|
||||
console.log('Change was:', JSON.stringify(change.toRaw(), null, 2))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const options = commandLineArgs(optionDefinitions)
|
||||
const { historyId, version, persistedOnly } = options
|
||||
|
||||
if (!historyId) {
|
||||
console.error('Error: --historyId is required.')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
await loadGlobalBlobs()
|
||||
|
||||
if (version != null) {
|
||||
const chunk = await loadAtVersion(historyId, version, {
|
||||
persistedOnly: persistedOnly || false,
|
||||
})
|
||||
if (!chunk) {
|
||||
console.error(`Chunk not found at version ${version}`)
|
||||
process.exit(1)
|
||||
}
|
||||
await checkChunkChanges(historyId, chunk)
|
||||
} else {
|
||||
let checkedAny = false
|
||||
for await (const chunkRecord of getProjectChunksFromVersion(historyId, 0)) {
|
||||
const chunk = await loadAtVersion(historyId, chunkRecord.startVersion, {
|
||||
persistedOnly: persistedOnly || false,
|
||||
})
|
||||
if (chunk) {
|
||||
checkedAny = true
|
||||
await checkChunkChanges(historyId, chunk)
|
||||
} else {
|
||||
console.error(
|
||||
`Failed to load chunk starting at ${chunkRecord.startVersion}`
|
||||
)
|
||||
}
|
||||
}
|
||||
if (!checkedAny) {
|
||||
console.log(`No chunks found for project ${historyId}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => console.log('Done.'))
|
||||
.catch(err => {
|
||||
console.error('Error:', err)
|
||||
process.exit(1)
|
||||
})
|
||||
.finally(() => {
|
||||
knex.destroy().catch(err => console.error('Error closing Postgres:', err))
|
||||
client.close().catch(err => console.error('Error closing MongoDB:', err))
|
||||
redis
|
||||
.disconnect()
|
||||
.catch(err => console.error('Error disconnecting Redis:', err))
|
||||
})
|
||||
Reference in New Issue
Block a user