mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-07 08:09:01 +02:00
[history-v1] fix remaining files with missing hashes (#22985)
GitOrigin-RevId: df382732954538f9db177180aee0a44c5d5679ca
This commit is contained in:
@@ -5,6 +5,7 @@ import Stream from 'node:stream'
|
||||
import { ObjectId } from 'mongodb'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import { Blob } from 'overleaf-editor-core'
|
||||
import {
|
||||
BlobStore,
|
||||
getStringLengthOfFile,
|
||||
@@ -25,7 +26,6 @@ Events.setMaxListeners(20)
|
||||
ObjectId.cacheHexString = true
|
||||
|
||||
/**
|
||||
* @typedef {import("overleaf-editor-core").Blob} Blob
|
||||
* @typedef {import("mongodb").Collection} Collection
|
||||
* @typedef {import("mongodb").Collection<Project>} ProjectsCollection
|
||||
* @typedef {import("mongodb").Collection<{project: Project}>} DeletedProjectsCollection
|
||||
@@ -51,13 +51,14 @@ ObjectId.cacheHexString = true
|
||||
*/
|
||||
|
||||
/**
|
||||
* @return {{FIX_NOT_FOUND: boolean, FIX_HASH_MISMATCH: boolean, FIX_DELETE_PERMISSION: boolean, LOGS: string}}
|
||||
* @return {{FIX_NOT_FOUND: boolean, FIX_HASH_MISMATCH: boolean, FIX_DELETE_PERMISSION: boolean, FIX_MISSING_HASH: boolean, LOGS: string}}
|
||||
*/
|
||||
function parseArgs() {
|
||||
const args = commandLineArgs([
|
||||
{ name: 'fixNotFound', type: String, defaultValue: 'true' },
|
||||
{ name: 'fixDeletePermission', type: String, defaultValue: 'true' },
|
||||
{ name: 'fixHashMismatch', type: String, defaultValue: 'true' },
|
||||
{ name: 'fixMissingHash', type: String, defaultValue: 'true' },
|
||||
{ name: 'logs', type: String, defaultValue: '' },
|
||||
])
|
||||
/**
|
||||
@@ -74,12 +75,18 @@ function parseArgs() {
|
||||
FIX_HASH_MISMATCH: boolVal('fixNotFound'),
|
||||
FIX_DELETE_PERMISSION: boolVal('fixDeletePermission'),
|
||||
FIX_NOT_FOUND: boolVal('fixHashMismatch'),
|
||||
FIX_MISSING_HASH: boolVal('fixMissingHash'),
|
||||
LOGS: args.logs,
|
||||
}
|
||||
}
|
||||
|
||||
const { FIX_HASH_MISMATCH, FIX_DELETE_PERMISSION, FIX_NOT_FOUND, LOGS } =
|
||||
parseArgs()
|
||||
const {
|
||||
FIX_HASH_MISMATCH,
|
||||
FIX_DELETE_PERMISSION,
|
||||
FIX_NOT_FOUND,
|
||||
FIX_MISSING_HASH,
|
||||
LOGS,
|
||||
} = parseArgs()
|
||||
if (!LOGS) {
|
||||
throw new Error('--logs parameter missing')
|
||||
}
|
||||
@@ -326,37 +333,75 @@ async function importRestoredFilestoreFile(projectId, fileId, historyId) {
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @return {Promise<string>}
|
||||
* @param {string} path
|
||||
* @return {Promise<Blob>}
|
||||
*/
|
||||
async function computeFilestoreFileHash(projectId, fileId) {
|
||||
async function bufferFilestoreFileToDisk(projectId, fileId, path) {
|
||||
const filestoreKey = `${projectId}/${fileId}`
|
||||
const path = `${BUFFER_DIR}/${projectId}_${fileId}`
|
||||
try {
|
||||
let s
|
||||
try {
|
||||
s = await filestorePersistor.getObjectStream(
|
||||
await Stream.promises.pipeline(
|
||||
await filestorePersistor.getObjectStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
filestoreKey
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof NotFoundError) {
|
||||
throw new OError('missing blob, need to restore filestore file', {
|
||||
filestoreKey,
|
||||
})
|
||||
}
|
||||
throw err
|
||||
}
|
||||
await Stream.promises.pipeline(
|
||||
s,
|
||||
),
|
||||
fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
|
||||
)
|
||||
const blob = await makeBlobForFile(path)
|
||||
blob.setStringLength(
|
||||
await getStringLengthOfFile(blob.getByteLength(), path)
|
||||
)
|
||||
return blob
|
||||
} catch (err) {
|
||||
if (err instanceof NotFoundError) {
|
||||
throw new OError('missing blob, need to restore filestore file', {
|
||||
filestoreKey,
|
||||
})
|
||||
}
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @return {Promise<string>}
|
||||
*/
|
||||
async function computeFilestoreFileHash(projectId, fileId) {
|
||||
const path = `${BUFFER_DIR}/${projectId}_${fileId}`
|
||||
try {
|
||||
const blob = await bufferFilestoreFileToDisk(projectId, fileId, path)
|
||||
return blob.getHash()
|
||||
} finally {
|
||||
await fs.promises.rm(path, { force: true })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function uploadFilestoreFile(projectId, fileId) {
|
||||
const path = `${BUFFER_DIR}/${projectId}_${fileId}`
|
||||
try {
|
||||
const blob = await bufferFilestoreFileToDisk(projectId, fileId, path)
|
||||
const hash = blob.getHash()
|
||||
try {
|
||||
await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
|
||||
} catch (err) {
|
||||
if (!(err instanceof Blob.NotFoundError)) throw err
|
||||
|
||||
const { project } = await getProject(projectId)
|
||||
const historyId = project.overleaf.history.id.toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
await blobStore.putBlob(path, blob)
|
||||
await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
|
||||
}
|
||||
} finally {
|
||||
await fs.promises.rm(path, { force: true })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} line
|
||||
* @return {Promise<boolean>}
|
||||
@@ -468,6 +513,23 @@ async function fixDeletePermission(line) {
|
||||
return await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} line
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function fixMissingHash(line) {
|
||||
let { projectId, _id: fileId } = JSON.parse(line)
|
||||
const {
|
||||
fileRef: { hash },
|
||||
} = await findFile(projectId, fileId)
|
||||
if (hash) {
|
||||
// processed, double check
|
||||
return await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
|
||||
}
|
||||
await uploadFilestoreFile(projectId, fileId)
|
||||
return true
|
||||
}
|
||||
|
||||
const CASES = {
|
||||
'not found': {
|
||||
match: 'NotFoundError',
|
||||
@@ -484,6 +546,11 @@ const CASES = {
|
||||
flag: FIX_DELETE_PERMISSION,
|
||||
action: fixDeletePermission,
|
||||
},
|
||||
'missing file hash': {
|
||||
match: '"bad file hash"',
|
||||
flag: FIX_MISSING_HASH,
|
||||
action: fixMissingHash,
|
||||
},
|
||||
}
|
||||
|
||||
const STATS = {
|
||||
@@ -513,7 +580,15 @@ async function processLog() {
|
||||
nextLine: for await (const line of rl) {
|
||||
if (gracefulShutdownInitiated) break
|
||||
STATS.processedLines++
|
||||
if (!line.includes('"failed to process file"')) continue
|
||||
if (
|
||||
!(
|
||||
line.includes('"failed to process file"') ||
|
||||
// Process missing hashes as flagged by find_malformed_filetrees.mjs
|
||||
line.includes('"bad file-tree path"')
|
||||
)
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
for (const [name, { match, flag, action }] of Object.entries(CASES)) {
|
||||
if (!line.includes(match)) continue
|
||||
|
||||
@@ -113,6 +113,8 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
const fileIdRestoreFromFilestore0 = objectIdFromTime('2017-02-01T00:10:00Z')
|
||||
const fileIdRestoreFromFilestore1 = objectIdFromTime('2017-02-01T00:11:00Z')
|
||||
const fileIdMissing2 = objectIdFromTime('2017-02-01T00:12:00Z')
|
||||
const fileIdHashMissing0 = objectIdFromTime('2017-02-01T00:13:00Z')
|
||||
const fileIdHashMissing1 = objectIdFromTime('2017-02-01T00:14:00Z')
|
||||
const contentCorruptedBlob = 'string that produces another hash'
|
||||
const contentDoesNotExistAsBlob = 'does not exist as blob'
|
||||
const hashDoesNotExistAsBlob = gitBlobHashBuffer(
|
||||
@@ -145,6 +147,16 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
historyId: historyId0,
|
||||
fileId: fileIdRestoreFromFilestore1,
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdHashMissing0,
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdHashMissing1,
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
historyId: historyIdDeleted0,
|
||||
@@ -263,6 +275,19 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
err: { message: 'some other error' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
// from find_malformed_filetrees.mjs
|
||||
{
|
||||
projectId: projectId0,
|
||||
_id: fileIdHashMissing0,
|
||||
reason: 'bad file hash',
|
||||
msg: 'bad file-tree path',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
_id: fileIdHashMissing1,
|
||||
reason: 'bad file hash',
|
||||
msg: 'bad file-tree path',
|
||||
},
|
||||
]
|
||||
if (PRINT_IDS_AND_HASHES_FOR_DEBUGGING) {
|
||||
const fileIds = {
|
||||
@@ -279,6 +304,8 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
fileIdWithDifferentHashRestore,
|
||||
fileIdRestoreFromFilestore0,
|
||||
fileIdRestoreFromFilestore1,
|
||||
fileIdHashMissing0,
|
||||
fileIdHashMissing1,
|
||||
}
|
||||
console.log({
|
||||
projectId0,
|
||||
@@ -316,6 +343,19 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
`${projectId0}/${fileIdRestoreFromFilestore1}`,
|
||||
Stream.Readable.from([fileIdRestoreFromFilestore1.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId0}/${fileIdHashMissing0}`,
|
||||
Stream.Readable.from([fileIdHashMissing0.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId0}/${fileIdHashMissing1}`,
|
||||
Stream.Readable.from([fileIdHashMissing1.toString()])
|
||||
)
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdHashMissing1.toString() // partially processed
|
||||
)
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdBlobExistsInGCS0.toString()
|
||||
)
|
||||
@@ -362,6 +402,8 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
{ _id: fileIdMissing0 },
|
||||
{ _id: fileIdMissing0 }, // bad file-tree, duplicated fileRef.
|
||||
{ _id: fileIdMissing2 },
|
||||
{ _id: fileIdHashMissing0 },
|
||||
{ _id: fileIdHashMissing1 },
|
||||
{
|
||||
_id: fileIdWithDifferentHashFound,
|
||||
hash: gitBlobHash(fileIdInGoodState),
|
||||
@@ -506,8 +548,8 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
})
|
||||
it('should print stats', function () {
|
||||
expect(stats).to.contain({
|
||||
processedLines: 14,
|
||||
success: 9,
|
||||
processedLines: 16,
|
||||
success: 11,
|
||||
alreadyProcessed: 0,
|
||||
fileDeleted: 0,
|
||||
skipped: 0,
|
||||
@@ -518,9 +560,9 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
it('should handle re-run on same logs', async function () {
|
||||
;({ stats } = await runScriptWithLogs())
|
||||
expect(stats).to.contain({
|
||||
processedLines: 14,
|
||||
processedLines: 16,
|
||||
success: 0,
|
||||
alreadyProcessed: 6,
|
||||
alreadyProcessed: 8,
|
||||
fileDeleted: 3,
|
||||
skipped: 0,
|
||||
failed: 3,
|
||||
@@ -577,6 +619,16 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
// { _id: fileIdMissing0 },
|
||||
// Removed
|
||||
// { _id: fileIdMissing2 },
|
||||
// Added hash
|
||||
{
|
||||
_id: fileIdHashMissing0,
|
||||
hash: gitBlobHash(fileIdHashMissing0),
|
||||
},
|
||||
// Added hash
|
||||
{
|
||||
_id: fileIdHashMissing1,
|
||||
hash: gitBlobHash(fileIdHashMissing1),
|
||||
},
|
||||
// No change, should warn about the find.
|
||||
{
|
||||
_id: fileIdWithDifferentHashFound,
|
||||
@@ -639,7 +691,7 @@ describe('back_fill_file_hash_fix_up script', function () {
|
||||
],
|
||||
overleaf: { history: { id: historyId0 } },
|
||||
// Incremented when removing file/updating hash
|
||||
version: 6,
|
||||
version: 8,
|
||||
},
|
||||
])
|
||||
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
|
||||
|
||||
Reference in New Issue
Block a user