Add script for verifying sampled sets of projects

GitOrigin-RevId: 23a599ea269a528201c67cda66bd2d00c0d376e4
This commit is contained in:
Andrew Rumble
2025-03-05 11:02:31 +00:00
committed by Copybot
parent 061d67ee4b
commit afedce1b0e
6 changed files with 533 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
// @ts-check
import { objectIdFromDate } from './utils.mjs'
import { db } from '../storage/lib/mongodb.js'
import config from 'config'
const projectsCollection = db.collection('projects')
const HAS_PROJECTS_WITHOUT_HISTORY =
config.get('hasProjectsWithoutHistory') === 'true'
/**
* @param {Date} start
* @param {Date} end
* @param {number} N
* @return {Promise<Array<string>>}
*/
export async function selectProjectsInDateRange(start, end, N) {
let projects = await projectsCollection
.aggregate([
{
$match: {
_id: {
$gt: objectIdFromDate(start),
$lte: objectIdFromDate(end),
},
},
},
{ $sample: { size: N } },
{ $project: { 'overleaf.history.id': 1 } },
])
.toArray()
if (HAS_PROJECTS_WITHOUT_HISTORY) {
projects = projects.filter(p => Boolean(p.overleaf?.history?.id))
if (projects.length === 0) {
// Very unlucky sample. Try again.
return await selectProjectsInDateRange(start, end, N)
}
}
return projects.map(p => p.overleaf.history.id.toString())
}
export async function* getSampleProjectsCursor(N) {
const cursor = projectsCollection.aggregate([
{ $sample: { size: N } },
{ $project: { 'overleaf.history.id': 1 } },
])
let validProjects = 0
for await (const project of cursor) {
if (HAS_PROJECTS_WITHOUT_HISTORY) {
continue
}
validProjects++
yield project.overleaf.history.id.toString()
}
if (validProjects === 0) {
yield* getSampleProjectsCursor(N)
}
}

View File

@@ -0,0 +1,272 @@
// @ts-check
import {
BackupCorruptedError,
BackupCorruptedInvalidBlobError,
BackupCorruptedMissingBlobError,
BackupRPOViolationChunkNotBackedUpError,
BackupRPOViolationError,
verifyProjectWithErrorContext,
} from '../storage/lib/backupVerifier.mjs'
import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import {
getSampleProjectsCursor,
selectProjectsInDateRange,
} from './ProjectSampler.mjs'
import OError from '@overleaf/o-error'
const MS_PER_30_DAYS = 30 * 24 * 60 * 60 * 1000
const METRICS = {
backup_project_verification_failed: 'backup_project_verification_failed',
backup_project_verification_succeeded:
'backup_project_verification_succeeded',
}
let WRITE_METRICS = false
/**
* Allows writing metrics to be enabled or disabled.
* @param {Boolean} writeMetrics
*/
export function setWriteMetrics(writeMetrics) {
WRITE_METRICS = writeMetrics
}
/**
*
* @param {Error|unknown} error
* @param {string} historyId
*/
function handleVerificationError(error, historyId) {
let errorType = 'unknown'
// Additional detail for verbose/development
if (error instanceof Error) {
if (error instanceof BackupCorruptedInvalidBlobError) {
errorType = 'invalid-blob'
} else if (error instanceof BackupCorruptedMissingBlobError) {
errorType = 'missing-blob'
} else if (error instanceof BackupCorruptedError) {
errorType = 'backup-corrupted-unknown'
} else if (error instanceof BackupRPOViolationChunkNotBackedUpError) {
errorType = 'rpo-violation-chunk-not-backed-up'
} else if (error instanceof BackupRPOViolationError) {
errorType = 'rpo-violation'
}
}
logger.error(
{ errorType, historyId, error },
'error verifying project backup'
)
WRITE_METRICS &&
metrics.inc(METRICS.backup_project_verification_failed, 1, { errorType })
return errorType
}
/**
*
* @param {Date} startDate
* @param {Date} endDate
* @param {number} interval
* @returns {Array<VerificationJobSpecification>}
*/
function splitJobs(startDate, endDate, interval) {
/** @type {Array<VerificationJobSpecification>} */
const jobs = []
while (startDate < endDate) {
const nextStart = new Date(
Math.min(startDate.getTime() + interval, endDate.getTime())
)
jobs.push({ startDate, endDate: nextStart })
startDate = nextStart
}
return jobs
}
/**
*
* @param {Array<string>} historyIds
* @return {Promise<VerificationJobStatus>}
*/
async function verifyProjects(historyIds) {
let verified = 0
const errorTypes = []
for (const historyId of historyIds) {
try {
await verifyProjectWithErrorContext(historyId)
logger.debug({ historyId }, 'verified project backup successfully')
WRITE_METRICS &&
metrics.inc(METRICS.backup_project_verification_succeeded)
verified++
} catch (error) {
errorTypes.push(handleVerificationError(error, historyId))
}
}
return {
verified,
errorTypes,
hasFailure: errorTypes.length > 0,
total: historyIds.length,
}
}
/**
*
* @param {number} nProjectsToSample
* @return {Promise<VerificationJobStatus>}
*/
export async function verifyRandomProjectSample(nProjectsToSample) {
const historyIds = await getSampleProjectsCursor(nProjectsToSample)
const errorTypes = []
let verified = 0
let total = 0
for await (const historyId of historyIds) {
total++
try {
await verifyProjectWithErrorContext(historyId)
logger.debug({ historyId }, 'verified project backup successfully')
WRITE_METRICS &&
metrics.inc(METRICS.backup_project_verification_succeeded)
verified++
} catch (error) {
errorTypes.push(handleVerificationError(error, historyId))
}
}
return {
verified,
total,
errorTypes,
hasFailure: errorTypes.length > 0,
}
}
/**
* Samples projects with history IDs between the specified dates and verifies them.
*
* @param {Date} startDate
* @param {Date} endDate
* @param {number} projectsPerRange
* @return {Promise<VerificationJobStatus>}
*/
async function verifyRange(startDate, endDate, projectsPerRange) {
logger.info({ startDate, endDate }, 'verifying range')
const historyIds = await selectProjectsInDateRange(
startDate,
endDate,
projectsPerRange
)
if (historyIds.length === 0) {
logger.debug(
{ start: startDate, end: endDate },
'No projects found in range'
)
return {
startDate,
endDate,
verified: 0,
total: 0,
hasFailure: false,
errorTypes: [],
}
}
logger.debug(
{ startDate, endDate, total: historyIds.length },
'Verifying projects in range'
)
const { errorTypes, hasFailure, verified } = await verifyProjects(historyIds)
const jobStatus = {
verified,
total: historyIds.length,
hasFailure,
startDate,
endDate,
errorTypes,
}
logger.debug(jobStatus, 'verified range')
return jobStatus
}
/**
* @typedef {Object} VerificationJobSpecification
* @property {Date} startDate
* @property {Date} endDate
*/
/**
* @typedef {import('./types.d.ts').VerificationJobStatus} VerificationJobStatus
*/
/**
* @typedef {Object} VerifyDateRangeOptions
* @property {Date} startDate
* @property {Date} endDate
* @property {number} [interval]
* @property {number} [projectsPerRange]
* @property {number} [concurrency]
*/
/**
*
* @param {VerifyDateRangeOptions} options
* @return {Promise<VerificationJobStatus>}
*/
export async function verifyProjectsInDateRange({
concurrency = 0,
projectsPerRange = 10,
startDate,
endDate,
interval = MS_PER_30_DAYS,
}) {
const jobs = splitJobs(startDate, endDate, interval)
if (jobs.length === 0) {
throw new OError('Time range could not be split into jobs', {
start: startDate,
end: endDate,
interval,
})
}
const settlements = await promiseMapSettledWithLimit(
concurrency,
jobs,
({ startDate, endDate }) =>
verifyRange(startDate, endDate, projectsPerRange)
)
return settlements.reduce(
/**
*
* @param {VerificationJobStatus} acc
* @param settlement
* @return {VerificationJobStatus}
*/
(acc, settlement) => {
if (settlement.status !== 'rejected') {
if (settlement.value.hasFailure) {
acc.hasFailure = true
}
acc.total += settlement.value.total
acc.verified += settlement.value.verified
acc.errorTypes = acc.errorTypes.concat(settlement.value.errorTypes)
} else {
logger.error({ ...settlement.reason }, 'Error processing range')
}
return acc
},
/** @type {VerificationJobStatus} */
{
startDate,
endDate,
verified: 0,
total: 0,
hasFailure: false,
errorTypes: [],
}
)
}

View File

@@ -0,0 +1,8 @@
export type VerificationJobStatus = {
verified: number
total: number
startDate?: Date
endDate?: Date
hasFailure: boolean
errorTypes: Array<string>
}

View File

@@ -0,0 +1,9 @@
import { ObjectId } from 'mongodb'
/**
* @param {Date} time
* @return {ObjectId}
*/
export function objectIdFromDate(time) {
return ObjectId.createFromTime(time.getTime() / 1000)
}

View File

@@ -30,6 +30,7 @@
"zipStore": {
"zipTimeoutMs": "360000"
},
"hasProjectsWithoutHistory": false,
"minSoftDeletionPeriodDays": "90",
"maxDeleteKeys": "1000",
"useDeleteObjects": "true",

View File

@@ -0,0 +1,182 @@
// @ts-check
import commandLineArgs from 'command-line-args'
import {
setWriteMetrics,
verifyProjectsInDateRange,
verifyRandomProjectSample,
} from '../../backupVerifier/ProjectVerifier.mjs'
import knex from '../lib/knex.js'
import { client } from '../lib/mongodb.js'
import { setTimeout } from 'node:timers/promises'
import logger from '@overleaf/logger'
logger.logger.level('fatal')
const usageMessage = [
'Usage: node verify_range.mjs [--startDate <start>] [--endDate <end>] [--nProjects <n>] [--verbose] [--usage] [--writeMetrics] [--concurrency <n>] [--strategy <range|random>]',
'strategy: defaults to "range"; startDate and endDate are required for "range" strategy',
].join('\n')
/**
* Gracefully shutdown the process
* @param code
* @return {Promise<void>}
*/
async function gracefulShutdown(code = process.exitCode) {
await knex.destroy()
await client.close()
await setTimeout(1_000)
process.exit(code)
}
const STATS = {
verifiable: 0,
unverifiable: 0,
}
/**
* @typedef {Object} CLIOptions
* @property {() => Promise<VerificationJobStatus>} projectVerifier
* @property {boolean} verbose
*/
/**
* @typedef {import('../../backupVerifier/types.d.ts').VerificationJobStatus} VerificationJobStatus
*/
/**
*
* @return {CLIOptions}
*/
function getOptions() {
const {
startDate,
endDate,
concurrency,
writeMetrics,
verbose,
nProjects,
strategy,
usage,
} = commandLineArgs([
{ name: 'startDate', type: String },
{ name: 'endDate', type: String },
{ name: 'concurrency', type: Number, defaultValue: 1 },
{ name: 'verbose', type: Boolean, defaultValue: false },
{ name: 'nProjects', type: Number, defaultValue: 10 },
{ name: 'usage', type: Boolean, defaultValue: false },
{ name: 'writeMetrics', type: Boolean, defaultValue: false },
{ name: 'strategy', type: String, defaultValue: 'range' },
])
if (usage) {
console.log(usageMessage)
process.exit(0)
}
if (!['range', 'random'].includes(strategy)) {
throw new Error(`Invalid strategy: ${strategy}`)
}
setWriteMetrics(writeMetrics)
switch (strategy) {
case 'random':
console.log('Verifying random projects')
return {
verbose,
projectVerifier: () => verifyRandomProjectSample(nProjects),
}
case 'range':
default: {
if (!startDate || !endDate) {
throw new Error(usageMessage)
}
const start = Date.parse(startDate)
const end = Date.parse(endDate)
if (Number.isNaN(start)) {
throw new Error(`Invalid start date: ${startDate}`)
}
if (Number.isNaN(end)) {
throw new Error(`Invalid end date: ${endDate}`)
}
if (verbose) {
console.log(`Verifying from ${startDate} to ${endDate}`)
console.log(`Concurrency: ${concurrency}`)
}
STATS.ranges = 0
return {
projectVerifier: () =>
verifyProjectsInDateRange({
startDate: new Date(start),
endDate: new Date(end),
projectsPerRange: nProjects,
concurrency,
}),
verbose,
}
}
}
}
/**
* @type {CLIOptions}
*/
let options
try {
options = getOptions()
} catch (error) {
console.error(error)
process.exitCode = 1
await gracefulShutdown(1)
process.exit() // just here so the type checker knows that the process will exit
}
const { projectVerifier, verbose } = options
if (verbose) {
logger.logger.level('debug')
} else {
logger.logger.level('error')
}
/**
*
* @param {Array<string>} array
* @param {string} matchString
* @return {*}
*/
function sumStringInstances(array, matchString) {
return array.reduce((total, string) => {
return string === matchString ? total + 1 : total
}, 0)
}
/**
*
* @param {VerificationJobStatus} stats
*/
function displayStats(stats) {
console.log(`Verified projects: ${stats.verified}`)
console.log(`Total projects sampled: ${stats.total}`)
if (stats.errorTypes.length > 0) {
console.log('Errors:')
for (const error of new Set(stats.errorTypes)) {
console.log(`${error}: ${sumStringInstances(stats.errorTypes, error)}`)
}
}
}
try {
const stats = await projectVerifier()
displayStats(stats)
console.log(`completed`)
} catch (error) {
console.error(error)
console.log('completed with errors')
process.exitCode = 1
} finally {
console.log('shutting down')
await gracefulShutdown()
}