mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-06 23:59:01 +02:00
Merge pull request #22870 from overleaf/jpa-back-fill-fix-up
[history-v1] add script for fixing up back-fill errors GitOrigin-RevId: 118992a32c1f6da4289cd35399ddd07a741da4ee
This commit is contained in:
@@ -0,0 +1,569 @@
|
||||
// @ts-check
|
||||
import Events from 'node:events'
|
||||
import fs from 'node:fs'
|
||||
import Stream from 'node:stream'
|
||||
import { ObjectId } from 'mongodb'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import {
|
||||
BlobStore,
|
||||
getStringLengthOfFile,
|
||||
GLOBAL_BLOBS,
|
||||
makeBlobForFile,
|
||||
} from '../lib/blob_store/index.js'
|
||||
import { db } from '../lib/mongodb.js'
|
||||
import commandLineArgs from 'command-line-args'
|
||||
import readline from 'node:readline'
|
||||
import { _blobIsBackedUp, backupBlob } from '../lib/backupBlob.mjs'
|
||||
import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
|
||||
import filestorePersistor from '../lib/persistor.js'
|
||||
|
||||
// Silence warning.
|
||||
Events.setMaxListeners(20)
|
||||
|
||||
// Enable caching for ObjectId.toString()
|
||||
ObjectId.cacheHexString = true
|
||||
|
||||
/**
|
||||
* @typedef {import("overleaf-editor-core").Blob} Blob
|
||||
* @typedef {import("mongodb").Collection} Collection
|
||||
* @typedef {import("mongodb").Collection<Project>} ProjectsCollection
|
||||
* @typedef {import("mongodb").Collection<{project: Project}>} DeletedProjectsCollection
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} FileRef
|
||||
* @property {ObjectId} _id
|
||||
* @property {string} hash
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} Folder
|
||||
* @property {Array<Folder>} folders
|
||||
* @property {Array<FileRef>} fileRefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} Project
|
||||
* @property {ObjectId} _id
|
||||
* @property {Array<Folder>} rootFolder
|
||||
* @property {{history: {id: (number|string)}}} overleaf
|
||||
*/
|
||||
|
||||
/**
|
||||
* @return {{FIX_NOT_FOUND: boolean, FIX_HASH_MISMATCH: boolean, FIX_DELETE_PERMISSION: boolean, LOGS: string}}
|
||||
*/
|
||||
function parseArgs() {
|
||||
const args = commandLineArgs([
|
||||
{ name: 'fixNotFound', type: String, defaultValue: 'true' },
|
||||
{ name: 'fixDeletePermission', type: String, defaultValue: 'true' },
|
||||
{ name: 'fixHashMismatch', type: String, defaultValue: 'true' },
|
||||
{ name: 'logs', type: String, defaultValue: '' },
|
||||
])
|
||||
/**
|
||||
* commandLineArgs cannot handle --foo=false, so go the long way
|
||||
* @param {string} name
|
||||
* @return {boolean}
|
||||
*/
|
||||
function boolVal(name) {
|
||||
const v = args[name]
|
||||
if (['true', 'false'].includes(v)) return v === 'true'
|
||||
throw new Error(`expected "true" or "false" for boolean option ${name}`)
|
||||
}
|
||||
return {
|
||||
FIX_HASH_MISMATCH: boolVal('fixNotFound'),
|
||||
FIX_DELETE_PERMISSION: boolVal('fixDeletePermission'),
|
||||
FIX_NOT_FOUND: boolVal('fixHashMismatch'),
|
||||
LOGS: args.logs,
|
||||
}
|
||||
}
|
||||
|
||||
const { FIX_HASH_MISMATCH, FIX_DELETE_PERMISSION, FIX_NOT_FOUND, LOGS } =
|
||||
parseArgs()
|
||||
if (!LOGS) {
|
||||
throw new Error('--logs parameter missing')
|
||||
}
|
||||
const BUFFER_DIR = fs.mkdtempSync(
|
||||
process.env.BUFFER_DIR_PREFIX || '/tmp/back_fill_file_hash-'
|
||||
)
|
||||
const USER_FILES_BUCKET_NAME = process.env.USER_FILES_BUCKET_NAME || ''
|
||||
if (!USER_FILES_BUCKET_NAME) {
|
||||
throw new Error('env var USER_FILES_BUCKET_NAME is missing')
|
||||
}
|
||||
// https://nodejs.org/api/stream.html#streamgetdefaulthighwatermarkobjectmode
|
||||
const STREAM_HIGH_WATER_MARK = parseInt(
|
||||
process.env.STREAM_HIGH_WATER_MARK || (64 * 1024).toString(),
|
||||
10
|
||||
)
|
||||
|
||||
/** @type {ProjectsCollection} */
|
||||
const projectsCollection = db.collection('projects')
|
||||
/** @type {DeletedProjectsCollection} */
|
||||
const deletedProjectsCollection = db.collection('deletedProjects')
|
||||
|
||||
let gracefulShutdownInitiated = false
|
||||
|
||||
process.on('SIGINT', handleSignal)
|
||||
process.on('SIGTERM', handleSignal)
|
||||
|
||||
function handleSignal() {
|
||||
gracefulShutdownInitiated = true
|
||||
console.warn('graceful shutdown initiated, draining queue')
|
||||
}
|
||||
|
||||
class FileDeletedError extends OError {}
|
||||
|
||||
/** @type {Map<string,{project: Project, projectSoftDeleted: boolean}>} */
|
||||
const PROJECT_CACHE = new Map()
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @return {Promise<{project: Project, projectSoftDeleted: boolean}>}
|
||||
*/
|
||||
async function getProject(projectId) {
|
||||
const cached = PROJECT_CACHE.get(projectId)
|
||||
if (cached) return cached
|
||||
|
||||
let projectSoftDeleted
|
||||
let project = await projectsCollection.findOne({
|
||||
_id: new ObjectId(projectId),
|
||||
})
|
||||
if (project) {
|
||||
projectSoftDeleted = false
|
||||
} else {
|
||||
const softDeleted = await deletedProjectsCollection.findOne({
|
||||
'deleterData.deletedProjectId': new ObjectId(projectId),
|
||||
project: { $exists: true },
|
||||
})
|
||||
if (!softDeleted) {
|
||||
throw new OError('project hard-deleted')
|
||||
}
|
||||
project = softDeleted.project
|
||||
projectSoftDeleted = true
|
||||
}
|
||||
PROJECT_CACHE.set(projectId, { projectSoftDeleted, project })
|
||||
return { projectSoftDeleted, project }
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Folder} folder
|
||||
* @param {string} fileId
|
||||
* @return {{path: string, fileRef: FileRef, folder: Folder}|null}
|
||||
*/
|
||||
function getFileTreePath(folder, fileId) {
|
||||
if (!folder) return null
|
||||
let idx = 0
|
||||
if (Array.isArray(folder.fileRefs)) {
|
||||
for (const fileRef of folder.fileRefs) {
|
||||
if (fileRef?._id.toString() === fileId) {
|
||||
return {
|
||||
fileRef,
|
||||
path: `.fileRefs.${idx}`,
|
||||
folder,
|
||||
}
|
||||
}
|
||||
idx++
|
||||
}
|
||||
}
|
||||
idx = 0
|
||||
if (Array.isArray(folder.folders)) {
|
||||
for (const child of folder.folders) {
|
||||
const match = getFileTreePath(child, fileId)
|
||||
if (match) {
|
||||
return {
|
||||
fileRef: match.fileRef,
|
||||
folder: match.folder,
|
||||
path: `.folders.${idx}${match.path}`,
|
||||
}
|
||||
}
|
||||
idx++
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @return {Promise<{fileRef: FileRef, folder: Folder, fullPath: string, query: Object, projectSoftDeleted: boolean}>}
|
||||
*/
|
||||
async function findFile(projectId, fileId) {
|
||||
const { projectSoftDeleted, project } = await getProject(projectId)
|
||||
const match = getFileTreePath(project.rootFolder[0], fileId)
|
||||
if (!match) {
|
||||
throw new FileDeletedError('file not found in file-tree', {
|
||||
projectSoftDeleted,
|
||||
})
|
||||
}
|
||||
const { path, fileRef, folder } = match
|
||||
let fullPath
|
||||
let query
|
||||
if (projectSoftDeleted) {
|
||||
fullPath = `project.rootFolder.0${path}`
|
||||
query = {
|
||||
'deleterData.deletedProjectId': new ObjectId(projectId),
|
||||
[`${fullPath}._id`]: new ObjectId(fileId),
|
||||
}
|
||||
} else {
|
||||
fullPath = `rootFolder.0${path}`
|
||||
query = {
|
||||
_id: new ObjectId(projectId),
|
||||
[`${fullPath}._id`]: new ObjectId(fileId),
|
||||
}
|
||||
}
|
||||
return {
|
||||
projectSoftDeleted,
|
||||
query,
|
||||
fullPath,
|
||||
fileRef,
|
||||
folder,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} line
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function fixNotFound(line) {
|
||||
const { projectId, fileId, bucketName } = JSON.parse(line)
|
||||
if (bucketName !== USER_FILES_BUCKET_NAME) {
|
||||
throw new OError('not found case for another bucket')
|
||||
}
|
||||
|
||||
const { projectSoftDeleted, query, fullPath, fileRef, folder } =
|
||||
await findFile(projectId, fileId)
|
||||
logger.info({ projectId, fileId, fileRef }, 'removing fileRef')
|
||||
// Copied from _removeElementFromMongoArray (https://github.com/overleaf/internal/blob/11e09528c153de6b7766d18c3c90d94962190371/services/web/app/src/Features/Project/ProjectEntityMongoUpdateHandler.js)
|
||||
const nonArrayPath = fullPath.slice(0, fullPath.lastIndexOf('.'))
|
||||
let result
|
||||
if (projectSoftDeleted) {
|
||||
result = await deletedProjectsCollection.updateOne(query, {
|
||||
$pull: { [nonArrayPath]: { _id: new ObjectId(fileId) } },
|
||||
$inc: { 'project.version': 1 },
|
||||
})
|
||||
} else {
|
||||
result = await projectsCollection.updateOne(query, {
|
||||
$pull: { [nonArrayPath]: { _id: new ObjectId(fileId) } },
|
||||
$inc: { version: 1 },
|
||||
})
|
||||
}
|
||||
if (result.matchedCount !== 1) {
|
||||
throw new OError('file-tree write did not match', { result })
|
||||
}
|
||||
// Update the cache. The mongo-path of the next file will be off otherwise.
|
||||
folder.fileRefs = folder.fileRefs.filter(f => !f._id.equals(fileId))
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @param {string} hash
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function setHashInMongo(projectId, fileId, hash) {
|
||||
const { projectSoftDeleted, query, fullPath, fileRef } = await findFile(
|
||||
projectId,
|
||||
fileId
|
||||
)
|
||||
if (fileRef.hash === hash) return
|
||||
logger.info({ projectId, fileId, fileRef, hash }, 'setting fileRef hash')
|
||||
let result
|
||||
if (projectSoftDeleted) {
|
||||
result = await deletedProjectsCollection.updateOne(query, {
|
||||
$set: { [`${fullPath}.hash`]: hash },
|
||||
$inc: { 'project.version': 1 },
|
||||
})
|
||||
} else {
|
||||
result = await projectsCollection.updateOne(query, {
|
||||
$set: { [`${fullPath}.hash`]: hash },
|
||||
$inc: { version: 1 },
|
||||
})
|
||||
}
|
||||
if (result.matchedCount !== 1) {
|
||||
throw new OError('file-tree write did not match', { result })
|
||||
}
|
||||
fileRef.hash = hash // Update cache for completeness.
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @param {string} historyId
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function importRestoredFilestoreFile(projectId, fileId, historyId) {
|
||||
const filestoreKey = `${projectId}/${fileId}`
|
||||
const path = `${BUFFER_DIR}/${projectId}_${fileId}`
|
||||
try {
|
||||
let s
|
||||
try {
|
||||
s = await filestorePersistor.getObjectStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
filestoreKey
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof NotFoundError) {
|
||||
throw new OError('missing blob, need to restore filestore file', {
|
||||
filestoreKey,
|
||||
})
|
||||
}
|
||||
throw err
|
||||
}
|
||||
await Stream.promises.pipeline(
|
||||
s,
|
||||
fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
|
||||
)
|
||||
const blobStore = new BlobStore(historyId)
|
||||
const blob = await blobStore.putFile(path)
|
||||
await backupBlob(historyId, blob, path)
|
||||
await setHashInMongo(projectId, fileId, blob.getHash())
|
||||
} finally {
|
||||
await fs.promises.rm(path, { force: true })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @return {Promise<string>}
|
||||
*/
|
||||
async function computeFilestoreFileHash(projectId, fileId) {
|
||||
const filestoreKey = `${projectId}/${fileId}`
|
||||
const path = `${BUFFER_DIR}/${projectId}_${fileId}`
|
||||
try {
|
||||
let s
|
||||
try {
|
||||
s = await filestorePersistor.getObjectStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
filestoreKey
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof NotFoundError) {
|
||||
throw new OError('missing blob, need to restore filestore file', {
|
||||
filestoreKey,
|
||||
})
|
||||
}
|
||||
throw err
|
||||
}
|
||||
await Stream.promises.pipeline(
|
||||
s,
|
||||
fs.createWriteStream(path, { highWaterMark: STREAM_HIGH_WATER_MARK })
|
||||
)
|
||||
const blob = await makeBlobForFile(path)
|
||||
return blob.getHash()
|
||||
} finally {
|
||||
await fs.promises.rm(path, { force: true })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} line
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function fixHashMismatch(line) {
|
||||
const {
|
||||
projectId,
|
||||
fileId,
|
||||
hash: computedHash,
|
||||
entry: {
|
||||
hash: fileTreeHash,
|
||||
ctx: { historyId },
|
||||
},
|
||||
} = JSON.parse(line)
|
||||
const blobStore = new BlobStore(historyId)
|
||||
if (await blobStore.getBlob(fileTreeHash)) {
|
||||
throw new OError('found blob with computed filestore object hash')
|
||||
}
|
||||
if (!(await blobStore.getBlob(computedHash))) {
|
||||
await importRestoredFilestoreFile(projectId, fileId, historyId)
|
||||
return true
|
||||
}
|
||||
return await ensureBlobExistsForFileAndUploadToAWS(
|
||||
projectId,
|
||||
fileId,
|
||||
computedHash
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @param {string} hash
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function hashAlreadyUpdatedInFileTree(projectId, fileId, hash) {
|
||||
const { fileRef } = await findFile(projectId, fileId)
|
||||
return fileRef.hash === hash
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} hash
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function needsBackingUpToAWS(projectId, hash) {
|
||||
if (GLOBAL_BLOBS.has(hash)) return false
|
||||
return !(await _blobIsBackedUp(projectId, hash))
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} fileId
|
||||
* @param {string} hash
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash) {
|
||||
const { project } = await getProject(projectId)
|
||||
const historyId = project.overleaf.history.id.toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
if (
|
||||
(await hashAlreadyUpdatedInFileTree(projectId, fileId, hash)) &&
|
||||
(await blobStore.getBlob(hash)) &&
|
||||
!(await needsBackingUpToAWS(projectId, hash))
|
||||
) {
|
||||
return false // already processed
|
||||
}
|
||||
|
||||
const stream = await blobStore.getStream(hash)
|
||||
const path = `${BUFFER_DIR}/${historyId}_${hash}`
|
||||
try {
|
||||
await Stream.promises.pipeline(
|
||||
stream,
|
||||
fs.createWriteStream(path, {
|
||||
highWaterMark: STREAM_HIGH_WATER_MARK,
|
||||
})
|
||||
)
|
||||
|
||||
const writtenBlob = await makeBlobForFile(path)
|
||||
writtenBlob.setStringLength(
|
||||
await getStringLengthOfFile(writtenBlob.getByteLength(), path)
|
||||
)
|
||||
if (writtenBlob.getHash() !== hash) {
|
||||
// Double check download, better safe than sorry.
|
||||
throw new OError('blob corrupted', { writtenBlob })
|
||||
}
|
||||
|
||||
let blob = await blobStore.getBlob(hash)
|
||||
if (!blob) {
|
||||
// Calling blobStore.putBlob would result in the same error again.
|
||||
// HACK: Skip upload to GCS and finalize putBlob operation directly.
|
||||
await blobStore.backend.insertBlob(historyId, writtenBlob)
|
||||
}
|
||||
await backupBlob(historyId, writtenBlob, path)
|
||||
} finally {
|
||||
await fs.promises.rm(path, { force: true })
|
||||
}
|
||||
await setHashInMongo(projectId, fileId, hash)
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} line
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function fixDeletePermission(line) {
|
||||
let { projectId, fileId, hash } = JSON.parse(line)
|
||||
if (!hash) hash = await computeFilestoreFileHash(projectId, fileId)
|
||||
return await ensureBlobExistsForFileAndUploadToAWS(projectId, fileId, hash)
|
||||
}
|
||||
|
||||
const CASES = {
|
||||
'not found': {
|
||||
match: 'NotFoundError',
|
||||
flag: FIX_NOT_FOUND,
|
||||
action: fixNotFound,
|
||||
},
|
||||
'hash mismatch': {
|
||||
match: 'OError: hash mismatch',
|
||||
flag: FIX_HASH_MISMATCH,
|
||||
action: fixHashMismatch,
|
||||
},
|
||||
'delete permission': {
|
||||
match: 'storage.objects.delete',
|
||||
flag: FIX_DELETE_PERMISSION,
|
||||
action: fixDeletePermission,
|
||||
},
|
||||
}
|
||||
|
||||
const STATS = {
|
||||
processedLines: 0,
|
||||
success: 0,
|
||||
alreadyProcessed: 0,
|
||||
fileDeleted: 0,
|
||||
skipped: 0,
|
||||
failed: 0,
|
||||
unmatched: 0,
|
||||
}
|
||||
function logStats() {
|
||||
console.log(
|
||||
JSON.stringify({
|
||||
time: new Date(),
|
||||
gracefulShutdownInitiated,
|
||||
...STATS,
|
||||
})
|
||||
)
|
||||
}
|
||||
setInterval(logStats, 10_000)
|
||||
|
||||
async function processLog() {
|
||||
const rl = readline.createInterface({
|
||||
input: fs.createReadStream(LOGS),
|
||||
})
|
||||
nextLine: for await (const line of rl) {
|
||||
if (gracefulShutdownInitiated) break
|
||||
STATS.processedLines++
|
||||
if (!line.includes('"failed to process file"')) continue
|
||||
|
||||
for (const [name, { match, flag, action }] of Object.entries(CASES)) {
|
||||
if (!line.includes(match)) continue
|
||||
if (flag) {
|
||||
try {
|
||||
if (await action(line)) {
|
||||
STATS.success++
|
||||
} else {
|
||||
STATS.alreadyProcessed++
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof FileDeletedError) {
|
||||
STATS.fileDeleted++
|
||||
logger.info({ err, line }, 'file deleted, skipping')
|
||||
} else {
|
||||
STATS.failed++
|
||||
logger.error({ err, line }, `failed to fix ${name}`)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
STATS.skipped++
|
||||
}
|
||||
continue nextLine
|
||||
}
|
||||
STATS.unmatched++
|
||||
logger.warn({ line }, 'unknown fatal error')
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
await processLog()
|
||||
} finally {
|
||||
logStats()
|
||||
try {
|
||||
await fs.promises.rm(BUFFER_DIR, { recursive: true, force: true })
|
||||
} catch (err) {
|
||||
console.error(`Cleanup of BUFFER_DIR=${BUFFER_DIR} failed`, err)
|
||||
}
|
||||
}
|
||||
const { skipped, failed, unmatched } = STATS
|
||||
if (failed > 0) {
|
||||
process.exit(Math.min(failed, 99))
|
||||
} else if (unmatched > 0) {
|
||||
process.exit(100)
|
||||
} else if (skipped > 0) {
|
||||
process.exit(101)
|
||||
} else {
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
|
||||
await main()
|
||||
@@ -0,0 +1,761 @@
|
||||
import fs from 'node:fs'
|
||||
import Crypto from 'node:crypto'
|
||||
import Stream from 'node:stream'
|
||||
import { promisify } from 'node:util'
|
||||
import { Binary, ObjectId } from 'mongodb'
|
||||
import { Blob } from 'overleaf-editor-core'
|
||||
import { backedUpBlobs, blobs, db } from '../../../../storage/lib/mongodb.js'
|
||||
import cleanup from './support/cleanup.js'
|
||||
import testProjects from '../api/support/test_projects.js'
|
||||
import { execFile } from 'node:child_process'
|
||||
import { expect } from 'chai'
|
||||
import config from 'config'
|
||||
import { WritableBuffer } from '@overleaf/stream-utils'
|
||||
import {
|
||||
backupPersistor,
|
||||
projectBlobsBucket,
|
||||
} from '../../../../storage/lib/backupPersistor.mjs'
|
||||
import projectKey from '../../../../storage/lib/project_key.js'
|
||||
import {
|
||||
BlobStore,
|
||||
makeProjectKey,
|
||||
} from '../../../../storage/lib/blob_store/index.js'
|
||||
import ObjectPersistor from '@overleaf/object-persistor'
|
||||
|
||||
const TIMEOUT = 20 * 1_000
|
||||
|
||||
const { deksBucket } = config.get('backupStore')
|
||||
const { tieringStorageClass } = config.get('backupPersistor')
|
||||
|
||||
const projectsCollection = db.collection('projects')
|
||||
const deletedProjectsCollection = db.collection('deletedProjects')
|
||||
|
||||
const FILESTORE_PERSISTOR = ObjectPersistor({
|
||||
backend: 'gcs',
|
||||
gcs: {
|
||||
endpoint: {
|
||||
apiEndpoint: process.env.GCS_API_ENDPOINT,
|
||||
projectId: process.env.GCS_PROJECT_ID,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
/**
|
||||
* @param {ObjectId} objectId
|
||||
* @return {string}
|
||||
*/
|
||||
function gitBlobHash(objectId) {
|
||||
return gitBlobHashBuffer(Buffer.from(objectId.toString()))
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Buffer} buf
|
||||
* @return {string}
|
||||
*/
|
||||
function gitBlobHashBuffer(buf) {
|
||||
const sha = Crypto.createHash('sha1')
|
||||
sha.update(`blob ${buf.byteLength}\x00`)
|
||||
sha.update(buf)
|
||||
return sha.digest('hex')
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} gitBlobHash
|
||||
* @return {Binary}
|
||||
*/
|
||||
function binaryForGitBlobHash(gitBlobHash) {
|
||||
return new Binary(Buffer.from(gitBlobHash, 'hex'))
|
||||
}
|
||||
|
||||
async function listS3Bucket(bucket, wantStorageClass) {
|
||||
const client = backupPersistor._getClientForBucket(bucket)
|
||||
const response = await client.listObjectsV2({ Bucket: bucket }).promise()
|
||||
|
||||
for (const object of response.Contents || []) {
|
||||
expect(object).to.have.property('StorageClass', wantStorageClass)
|
||||
}
|
||||
|
||||
return (response.Contents || []).map(item => item.Key || '')
|
||||
}
|
||||
|
||||
function objectIdFromTime(timestamp) {
|
||||
return ObjectId.createFromTime(new Date(timestamp).getTime() / 1000)
|
||||
}
|
||||
|
||||
const PRINT_IDS_AND_HASHES_FOR_DEBUGGING = false
|
||||
|
||||
describe('back_fill_file_hash_fix_up script', function () {
|
||||
this.timeout(TIMEOUT)
|
||||
const USER_FILES_BUCKET_NAME = 'fake-user-files-gcs'
|
||||
|
||||
const projectId0 = objectIdFromTime('2017-01-01T00:00:00Z')
|
||||
const projectIdDeleted0 = objectIdFromTime('2017-01-01T00:04:00Z')
|
||||
const historyId0 = 42 // stored as number is mongo
|
||||
const historyIdDeleted0 = projectIdDeleted0.toString()
|
||||
const fileIdWithDifferentHashFound = objectIdFromTime('2017-02-01T00:00:00Z')
|
||||
const fileIdInGoodState = objectIdFromTime('2017-02-01T00:01:00Z')
|
||||
const fileIdBlobExistsInGCS0 = objectIdFromTime('2017-02-01T00:02:00Z')
|
||||
const fileIdWithDifferentHashNotFound0 = objectIdFromTime(
|
||||
'2017-02-01T00:03:00Z'
|
||||
)
|
||||
const fileIdWithDifferentHashNotFound1 = objectIdFromTime(
|
||||
'2017-02-01T00:04:00Z'
|
||||
)
|
||||
const fileIdBlobExistsInGCSCorrupted = objectIdFromTime(
|
||||
'2017-02-01T00:05:00Z'
|
||||
)
|
||||
const fileIdMissing0 = objectIdFromTime('2024-02-01T00:06:00Z')
|
||||
const fileIdMissing1 = objectIdFromTime('2017-02-01T00:07:00Z')
|
||||
const fileIdWithDifferentHashRestore = objectIdFromTime(
|
||||
'2017-02-01T00:08:00Z'
|
||||
)
|
||||
const fileIdBlobExistsInGCS1 = objectIdFromTime('2017-02-01T00:09:00Z')
|
||||
const fileIdRestoreFromFilestore0 = objectIdFromTime('2017-02-01T00:10:00Z')
|
||||
const fileIdRestoreFromFilestore1 = objectIdFromTime('2017-02-01T00:11:00Z')
|
||||
const fileIdMissing2 = objectIdFromTime('2017-02-01T00:12:00Z')
|
||||
const contentCorruptedBlob = 'string that produces another hash'
|
||||
const contentDoesNotExistAsBlob = 'does not exist as blob'
|
||||
const hashDoesNotExistAsBlob = gitBlobHashBuffer(
|
||||
Buffer.from(contentDoesNotExistAsBlob)
|
||||
)
|
||||
const deleteProjectsRecordId0 = new ObjectId()
|
||||
const writtenBlobs = [
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdBlobExistsInGCS0,
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdBlobExistsInGCS1,
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdWithDifferentHashNotFound0,
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdRestoreFromFilestore0,
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
historyId: historyId0,
|
||||
fileId: fileIdRestoreFromFilestore1,
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
historyId: historyIdDeleted0,
|
||||
fileId: fileIdWithDifferentHashNotFound1,
|
||||
},
|
||||
]
|
||||
const logs = [
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdWithDifferentHashFound,
|
||||
err: { message: 'OError: hash mismatch' },
|
||||
hash: gitBlobHash(fileIdMissing0), // does not matter
|
||||
entry: {
|
||||
ctx: { historyId: historyId0.toString() },
|
||||
hash: gitBlobHash(fileIdInGoodState),
|
||||
},
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdWithDifferentHashRestore,
|
||||
err: { message: 'OError: hash mismatch' },
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
entry: {
|
||||
ctx: { historyId: historyId0.toString() },
|
||||
hash: gitBlobHash(fileIdMissing0), // does not matter
|
||||
},
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdWithDifferentHashNotFound0,
|
||||
err: { message: 'OError: hash mismatch' },
|
||||
hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
|
||||
entry: {
|
||||
ctx: { historyId: historyId0.toString() },
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
},
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdRestoreFromFilestore0,
|
||||
err: { message: 'OError: hash mismatch' },
|
||||
hash: gitBlobHash(fileIdRestoreFromFilestore0),
|
||||
entry: {
|
||||
ctx: { historyId: historyId0.toString() },
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
},
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
fileId: fileIdWithDifferentHashNotFound1,
|
||||
err: { message: 'OError: hash mismatch' },
|
||||
hash: gitBlobHash(fileIdWithDifferentHashNotFound1),
|
||||
entry: {
|
||||
ctx: { historyId: historyIdDeleted0.toString() },
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
},
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdMissing0,
|
||||
bucketName: USER_FILES_BUCKET_NAME,
|
||||
err: { message: 'NotFoundError' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdMissing2,
|
||||
bucketName: USER_FILES_BUCKET_NAME,
|
||||
err: { message: 'NotFoundError' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdBlobExistsInGCS0,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCS0),
|
||||
err: { message: 'storage.objects.delete' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdBlobExistsInGCSCorrupted,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
|
||||
err: { message: 'storage.objects.delete' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdBlobExistsInGCS1,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCS1),
|
||||
err: { message: 'storage.objects.delete' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectId0,
|
||||
fileId: fileIdRestoreFromFilestore1,
|
||||
err: { message: 'storage.objects.delete' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
fileId: fileIdMissing1,
|
||||
bucketName: USER_FILES_BUCKET_NAME,
|
||||
err: { message: 'NotFoundError' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
{
|
||||
err: { message: 'spurious error' },
|
||||
msg: 'failed to process file, trying again',
|
||||
},
|
||||
{
|
||||
err: { message: 'some other error' },
|
||||
msg: 'failed to process file',
|
||||
},
|
||||
]
|
||||
if (PRINT_IDS_AND_HASHES_FOR_DEBUGGING) {
|
||||
const fileIds = {
|
||||
fileIdWithDifferentHashFound,
|
||||
fileIdInGoodState,
|
||||
fileIdBlobExistsInGCS0,
|
||||
fileIdBlobExistsInGCS1,
|
||||
fileIdWithDifferentHashNotFound0,
|
||||
fileIdWithDifferentHashNotFound1,
|
||||
fileIdBlobExistsInGCSCorrupted,
|
||||
fileIdMissing0,
|
||||
fileIdMissing1,
|
||||
fileIdMissing2,
|
||||
fileIdWithDifferentHashRestore,
|
||||
fileIdRestoreFromFilestore0,
|
||||
fileIdRestoreFromFilestore1,
|
||||
}
|
||||
console.log({
|
||||
projectId0,
|
||||
projectIdDeleted0,
|
||||
historyId0,
|
||||
historyIdDeleted0,
|
||||
...fileIds,
|
||||
hashDoesNotExistAsBlob,
|
||||
})
|
||||
for (const [name, v] of Object.entries(fileIds)) {
|
||||
console.log(
|
||||
name,
|
||||
gitBlobHash(v),
|
||||
Array.from(binaryForGitBlobHash(gitBlobHash(v)).value())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
before(cleanup.everything)
|
||||
before('cleanup s3 buckets', async function () {
|
||||
await backupPersistor.deleteDirectory(deksBucket, '')
|
||||
await backupPersistor.deleteDirectory(projectBlobsBucket, '')
|
||||
expect(await listS3Bucket(deksBucket)).to.have.length(0)
|
||||
expect(await listS3Bucket(projectBlobsBucket)).to.have.length(0)
|
||||
})
|
||||
|
||||
before('populate blobs/GCS', async function () {
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId0}/${fileIdRestoreFromFilestore0}`,
|
||||
Stream.Readable.from([fileIdRestoreFromFilestore0.toString()])
|
||||
)
|
||||
await FILESTORE_PERSISTOR.sendStream(
|
||||
USER_FILES_BUCKET_NAME,
|
||||
`${projectId0}/${fileIdRestoreFromFilestore1}`,
|
||||
Stream.Readable.from([fileIdRestoreFromFilestore1.toString()])
|
||||
)
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdBlobExistsInGCS0.toString()
|
||||
)
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdBlobExistsInGCS1.toString()
|
||||
)
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdRestoreFromFilestore1.toString()
|
||||
)
|
||||
const path = '/tmp/test-blob-corrupted'
|
||||
try {
|
||||
await fs.promises.writeFile(path, contentCorruptedBlob)
|
||||
await new BlobStore(historyId0.toString()).putBlob(
|
||||
path,
|
||||
new Blob(gitBlobHash(fileIdBlobExistsInGCSCorrupted), 42)
|
||||
)
|
||||
} finally {
|
||||
await fs.promises.rm(path, { force: true })
|
||||
}
|
||||
await cleanup.postgres()
|
||||
await cleanup.mongo()
|
||||
await Promise.all([
|
||||
testProjects.createEmptyProject(historyId0.toString()),
|
||||
testProjects.createEmptyProject(historyIdDeleted0),
|
||||
])
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdWithDifferentHashNotFound0.toString()
|
||||
)
|
||||
await new BlobStore(historyIdDeleted0.toString()).putString(
|
||||
fileIdWithDifferentHashNotFound1.toString()
|
||||
)
|
||||
await new BlobStore(historyId0.toString()).putString(
|
||||
fileIdInGoodState.toString()
|
||||
)
|
||||
})
|
||||
|
||||
before('populate mongo', async function () {
|
||||
await projectsCollection.insertMany([
|
||||
{
|
||||
_id: projectId0,
|
||||
rootFolder: [
|
||||
{
|
||||
fileRefs: [
|
||||
{ _id: fileIdMissing0 },
|
||||
{ _id: fileIdMissing0 }, // bad file-tree, duplicated fileRef.
|
||||
{ _id: fileIdMissing2 },
|
||||
{
|
||||
_id: fileIdWithDifferentHashFound,
|
||||
hash: gitBlobHash(fileIdInGoodState),
|
||||
},
|
||||
{
|
||||
_id: fileIdWithDifferentHashRestore,
|
||||
hash: gitBlobHash(fileIdMissing0),
|
||||
},
|
||||
],
|
||||
folders: [
|
||||
{
|
||||
docs: [],
|
||||
},
|
||||
null,
|
||||
{
|
||||
fileRefs: [
|
||||
null,
|
||||
{
|
||||
_id: fileIdInGoodState,
|
||||
hash: gitBlobHash(fileIdInGoodState),
|
||||
},
|
||||
{
|
||||
_id: fileIdWithDifferentHashNotFound0,
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
},
|
||||
{
|
||||
_id: fileIdRestoreFromFilestore0,
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
},
|
||||
{
|
||||
_id: fileIdRestoreFromFilestore1,
|
||||
},
|
||||
{
|
||||
_id: fileIdBlobExistsInGCS0,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCS0),
|
||||
},
|
||||
{
|
||||
_id: fileIdBlobExistsInGCSCorrupted,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
|
||||
},
|
||||
{ _id: fileIdBlobExistsInGCS1 },
|
||||
],
|
||||
folders: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
overleaf: { history: { id: historyId0 } },
|
||||
version: 0,
|
||||
},
|
||||
])
|
||||
await deletedProjectsCollection.insertMany([
|
||||
{
|
||||
_id: deleteProjectsRecordId0,
|
||||
project: {
|
||||
_id: projectIdDeleted0,
|
||||
rootFolder: [
|
||||
{
|
||||
fileRefs: [
|
||||
{
|
||||
_id: fileIdWithDifferentHashNotFound1,
|
||||
hash: hashDoesNotExistAsBlob,
|
||||
},
|
||||
],
|
||||
folders: [
|
||||
{
|
||||
fileRefs: [],
|
||||
folders: [
|
||||
{ fileRefs: [{ _id: fileIdMissing1 }], folders: [] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
overleaf: { history: { id: historyIdDeleted0 } },
|
||||
version: 100,
|
||||
},
|
||||
deleterData: {
|
||||
deletedProjectId: projectIdDeleted0,
|
||||
},
|
||||
},
|
||||
])
|
||||
})
|
||||
|
||||
/**
|
||||
* @param {Array<string>} args
|
||||
* @param {Record<string, string>} env
|
||||
* @return {Promise<{ stdout: string, stderr: string, status: number }>}
|
||||
*/
|
||||
async function tryRunScript(args = [], env = {}) {
|
||||
let result
|
||||
try {
|
||||
result = await promisify(execFile)(
|
||||
process.argv0,
|
||||
['storage/scripts/back_fill_file_hash_fix_up.mjs', ...args],
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
timeout: TIMEOUT - 500,
|
||||
env: {
|
||||
...process.env,
|
||||
USER_FILES_BUCKET_NAME,
|
||||
...env,
|
||||
LOG_LEVEL: 'warn', // Override LOG_LEVEL of acceptance tests
|
||||
},
|
||||
}
|
||||
)
|
||||
result.status = 0
|
||||
} catch (err) {
|
||||
const { stdout, stderr, code } = err
|
||||
if (typeof code !== 'number') {
|
||||
console.log(err)
|
||||
}
|
||||
result = { stdout, stderr, status: code }
|
||||
}
|
||||
expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
|
||||
/back_fill_file_hash/
|
||||
)
|
||||
return result
|
||||
}
|
||||
async function runScriptWithLogs() {
|
||||
const logsPath = '/tmp/test-script-logs'
|
||||
let result
|
||||
try {
|
||||
await fs.promises.writeFile(
|
||||
logsPath,
|
||||
logs.map(e => JSON.stringify(e)).join('\n')
|
||||
)
|
||||
result = await tryRunScript([`--logs=${logsPath}`])
|
||||
} finally {
|
||||
await fs.promises.rm(logsPath, { force: true })
|
||||
}
|
||||
const stats = JSON.parse(result.stdout.trim().split('\n').pop())
|
||||
return {
|
||||
result,
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
let result, stats
|
||||
before(async function () {
|
||||
;({ result, stats } = await runScriptWithLogs())
|
||||
})
|
||||
it('should print stats', function () {
|
||||
expect(stats).to.contain({
|
||||
processedLines: 14,
|
||||
success: 9,
|
||||
alreadyProcessed: 0,
|
||||
fileDeleted: 0,
|
||||
skipped: 0,
|
||||
failed: 3,
|
||||
unmatched: 1,
|
||||
})
|
||||
})
|
||||
it('should handle re-run on same logs', async function () {
|
||||
;({ stats } = await runScriptWithLogs())
|
||||
expect(stats).to.contain({
|
||||
processedLines: 14,
|
||||
success: 0,
|
||||
alreadyProcessed: 6,
|
||||
fileDeleted: 3,
|
||||
skipped: 0,
|
||||
failed: 3,
|
||||
unmatched: 1,
|
||||
})
|
||||
})
|
||||
it('should flag the unknown fatal error', function () {
|
||||
const unknown = result.stdout
|
||||
.split('\n')
|
||||
.filter(l => l.includes('unknown fatal error'))
|
||||
expect(unknown).to.have.length(1)
|
||||
const [line] = unknown
|
||||
expect(line).to.exist
|
||||
expect(line).to.include('some other error')
|
||||
})
|
||||
it('should flag the unexpected blob on mismatched hash', function () {
|
||||
const line = result.stdout
|
||||
.split('\n')
|
||||
.find(l => l.includes('found blob with computed filestore object hash'))
|
||||
expect(line).to.exist
|
||||
expect(line).to.include(projectId0.toString())
|
||||
expect(line).to.include(fileIdWithDifferentHashFound.toString())
|
||||
expect(line).to.include(gitBlobHash(fileIdInGoodState))
|
||||
})
|
||||
it('should flag the need to restore', function () {
|
||||
const line = result.stdout
|
||||
.split('\n')
|
||||
.find(l => l.includes('missing blob, need to restore filestore file'))
|
||||
expect(line).to.exist
|
||||
expect(line).to.include(projectId0.toString())
|
||||
expect(line).to.include(fileIdWithDifferentHashRestore.toString())
|
||||
expect(line).to.include(hashDoesNotExistAsBlob)
|
||||
})
|
||||
it('should flag the corrupted blob', function () {
|
||||
const line = result.stdout
|
||||
.split('\n')
|
||||
.find(l => l.includes('blob corrupted'))
|
||||
expect(line).to.exist
|
||||
expect(line).to.include(projectId0.toString())
|
||||
expect(line).to.include(fileIdBlobExistsInGCSCorrupted.toString())
|
||||
expect(line).to.include(
|
||||
gitBlobHashBuffer(Buffer.from(contentCorruptedBlob))
|
||||
)
|
||||
expect(line).to.include(gitBlobHash(fileIdBlobExistsInGCSCorrupted))
|
||||
})
|
||||
it('should update mongo', async function () {
|
||||
expect(await projectsCollection.find({}).toArray()).to.deep.equal([
|
||||
{
|
||||
_id: projectId0,
|
||||
rootFolder: [
|
||||
{
|
||||
fileRefs: [
|
||||
// Removed
|
||||
// { _id: fileIdMissing0 },
|
||||
// Removed
|
||||
// { _id: fileIdMissing2 },
|
||||
// No change, should warn about the find.
|
||||
{
|
||||
_id: fileIdWithDifferentHashFound,
|
||||
hash: gitBlobHash(fileIdInGoodState),
|
||||
},
|
||||
// No change, should warn about the need to restore.
|
||||
{
|
||||
_id: fileIdWithDifferentHashRestore,
|
||||
hash: gitBlobHash(fileIdMissing0),
|
||||
},
|
||||
],
|
||||
folders: [
|
||||
{
|
||||
docs: [],
|
||||
},
|
||||
null,
|
||||
{
|
||||
fileRefs: [
|
||||
null,
|
||||
// No change
|
||||
{
|
||||
_id: fileIdInGoodState,
|
||||
hash: gitBlobHash(fileIdInGoodState),
|
||||
},
|
||||
// Updated hash
|
||||
{
|
||||
_id: fileIdWithDifferentHashNotFound0,
|
||||
hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
|
||||
},
|
||||
// Updated hash
|
||||
{
|
||||
_id: fileIdRestoreFromFilestore0,
|
||||
hash: gitBlobHash(fileIdRestoreFromFilestore0),
|
||||
},
|
||||
// Added hash
|
||||
{
|
||||
_id: fileIdRestoreFromFilestore1,
|
||||
hash: gitBlobHash(fileIdRestoreFromFilestore1),
|
||||
},
|
||||
// No change, blob created
|
||||
{
|
||||
_id: fileIdBlobExistsInGCS0,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCS0),
|
||||
},
|
||||
// No change, flagged
|
||||
{
|
||||
_id: fileIdBlobExistsInGCSCorrupted,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
|
||||
},
|
||||
// Added hash
|
||||
{
|
||||
_id: fileIdBlobExistsInGCS1,
|
||||
hash: gitBlobHash(fileIdBlobExistsInGCS1),
|
||||
},
|
||||
],
|
||||
folders: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
overleaf: { history: { id: historyId0 } },
|
||||
// Incremented when removing file/updating hash
|
||||
version: 6,
|
||||
},
|
||||
])
|
||||
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
|
||||
{
|
||||
_id: deleteProjectsRecordId0,
|
||||
project: {
|
||||
_id: projectIdDeleted0,
|
||||
rootFolder: [
|
||||
{
|
||||
fileRefs: [
|
||||
// Updated hash
|
||||
{
|
||||
_id: fileIdWithDifferentHashNotFound1,
|
||||
hash: gitBlobHash(fileIdWithDifferentHashNotFound1),
|
||||
},
|
||||
],
|
||||
folders: [
|
||||
{
|
||||
fileRefs: [],
|
||||
folders: [
|
||||
{
|
||||
fileRefs: [
|
||||
// Removed
|
||||
// { _id: fileIdMissing1 },
|
||||
],
|
||||
folders: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
overleaf: { history: { id: historyIdDeleted0 } },
|
||||
// Incremented when removing file/updating hash
|
||||
version: 102,
|
||||
},
|
||||
deleterData: {
|
||||
deletedProjectId: projectIdDeleted0,
|
||||
},
|
||||
},
|
||||
])
|
||||
const writtenBlobsByProject = new Map()
|
||||
for (const { projectId, fileId } of writtenBlobs) {
|
||||
writtenBlobsByProject.set(
|
||||
projectId,
|
||||
(writtenBlobsByProject.get(projectId) || []).concat([fileId])
|
||||
)
|
||||
}
|
||||
expect(
|
||||
(await backedUpBlobs.find({}, { sort: { _id: 1 } }).toArray()).map(
|
||||
entry => {
|
||||
// blobs are pushed unordered into mongo. Sort the list for consistency.
|
||||
entry.blobs.sort()
|
||||
return entry
|
||||
}
|
||||
)
|
||||
).to.deep.equal(
|
||||
Array.from(writtenBlobsByProject.entries()).map(
|
||||
([projectId, fileIds]) => {
|
||||
return {
|
||||
_id: projectId,
|
||||
blobs: fileIds
|
||||
.map(fileId => binaryForGitBlobHash(gitBlobHash(fileId)))
|
||||
.sort(),
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
})
|
||||
it('should have backed up all the files', async function () {
|
||||
expect(tieringStorageClass).to.exist
|
||||
const objects = await listS3Bucket(projectBlobsBucket, tieringStorageClass)
|
||||
expect(objects.sort()).to.deep.equal(
|
||||
writtenBlobs
|
||||
.map(({ historyId, fileId, hash }) =>
|
||||
makeProjectKey(historyId, hash || gitBlobHash(fileId))
|
||||
)
|
||||
.sort()
|
||||
)
|
||||
for (let { historyId, fileId } of writtenBlobs) {
|
||||
const hash = gitBlobHash(fileId.toString())
|
||||
const s = await backupPersistor.getObjectStream(
|
||||
projectBlobsBucket,
|
||||
makeProjectKey(historyId, hash),
|
||||
{ autoGunzip: true }
|
||||
)
|
||||
const buf = new WritableBuffer()
|
||||
await Stream.promises.pipeline(s, buf)
|
||||
expect(gitBlobHashBuffer(buf.getContents())).to.equal(hash)
|
||||
const id = buf.getContents().toString('utf-8')
|
||||
expect(id).to.equal(fileId.toString())
|
||||
// double check we are not comparing 'undefined' or '[object Object]' above
|
||||
expect(id).to.match(/^[a-f0-9]{24}$/)
|
||||
}
|
||||
const deks = await listS3Bucket(deksBucket, 'STANDARD')
|
||||
expect(deks.sort()).to.deep.equal(
|
||||
Array.from(
|
||||
new Set(
|
||||
writtenBlobs.map(
|
||||
({ historyId }) => projectKey.format(historyId) + '/dek'
|
||||
)
|
||||
)
|
||||
).sort()
|
||||
)
|
||||
})
|
||||
it('should have written the back filled files to history v1', async function () {
|
||||
for (const { historyId, fileId } of writtenBlobs) {
|
||||
const blobStore = new BlobStore(historyId.toString())
|
||||
const hash = gitBlobHash(fileId.toString())
|
||||
const blob = await blobStore.getBlob(hash)
|
||||
expect(blob).to.exist
|
||||
expect(blob.getByteLength()).to.equal(24)
|
||||
const id = await blobStore.getString(hash)
|
||||
expect(id).to.equal(fileId.toString())
|
||||
// double check we are not comparing 'undefined' or '[object Object]' above
|
||||
expect(id).to.match(/^[a-f0-9]{24}$/)
|
||||
}
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user