mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-31 21:01:33 +02:00
Add new strategy to verify_sampled_projects
GitOrigin-RevId: d967da41250bb5945d5b8668b212d4a61b4f9d69
This commit is contained in:
@@ -1,18 +1,12 @@
|
||||
// @ts-check
|
||||
import {
|
||||
BackupCorruptedError,
|
||||
BackupCorruptedInvalidBlobError,
|
||||
BackupCorruptedMissingBlobError,
|
||||
BackupRPOViolationChunkNotBackedUpError,
|
||||
BackupRPOViolationError,
|
||||
verifyProjectWithErrorContext,
|
||||
} from '../storage/lib/backupVerifier.mjs'
|
||||
import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
|
||||
import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
|
||||
import logger from '@overleaf/logger'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import {
|
||||
getSampleProjectsCursor,
|
||||
selectProjectsInDateRange,
|
||||
getProjectsCreatedInDateRangeCursor,
|
||||
getProjectsUpdatedInDateRangeCursor,
|
||||
} from './ProjectSampler.mjs'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
@@ -71,43 +65,14 @@ function splitJobs(startDate, endDate, interval) {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Array<string>} historyIds
|
||||
* @return {Promise<VerificationJobStatus>}
|
||||
* @param historyIdCursor
|
||||
* @return {Promise<{verified: number, total: number, errorTypes: *[], hasFailure: boolean}>}
|
||||
*/
|
||||
async function verifyProjects(historyIds) {
|
||||
let verified = 0
|
||||
const errorTypes = []
|
||||
for (const historyId of historyIds) {
|
||||
try {
|
||||
await verifyProjectWithErrorContext(historyId)
|
||||
logger.debug({ historyId }, 'verified project backup successfully')
|
||||
WRITE_METRICS &&
|
||||
metrics.inc(METRICS.backup_project_verification_succeeded)
|
||||
verified++
|
||||
} catch (error) {
|
||||
errorTypes.push(handleVerificationError(error, historyId))
|
||||
}
|
||||
}
|
||||
return {
|
||||
verified,
|
||||
errorTypes,
|
||||
hasFailure: errorTypes.length > 0,
|
||||
total: historyIds.length,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {number} nProjectsToSample
|
||||
* @return {Promise<VerificationJobStatus>}
|
||||
*/
|
||||
export async function verifyRandomProjectSample(nProjectsToSample) {
|
||||
const historyIds = await getSampleProjectsCursor(nProjectsToSample)
|
||||
|
||||
async function verifyProjectsFromCursor(historyIdCursor) {
|
||||
const errorTypes = []
|
||||
let verified = 0
|
||||
let total = 0
|
||||
for await (const historyId of historyIds) {
|
||||
for await (const historyId of historyIdCursor) {
|
||||
total++
|
||||
try {
|
||||
await verifyProjectWithErrorContext(historyId)
|
||||
@@ -127,6 +92,16 @@ export async function verifyRandomProjectSample(nProjectsToSample) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {number} nProjectsToSample
|
||||
* @return {Promise<VerificationJobStatus>}
|
||||
*/
|
||||
export async function verifyRandomProjectSample(nProjectsToSample) {
|
||||
const historyIds = await getSampleProjectsCursor(nProjectsToSample)
|
||||
return await verifyProjectsFromCursor(historyIds)
|
||||
}
|
||||
|
||||
/**
|
||||
* Samples projects with history IDs between the specified dates and verifies them.
|
||||
*
|
||||
@@ -137,42 +112,28 @@ export async function verifyRandomProjectSample(nProjectsToSample) {
|
||||
*/
|
||||
async function verifyRange(startDate, endDate, projectsPerRange) {
|
||||
logger.info({ startDate, endDate }, 'verifying range')
|
||||
const historyIds = await selectProjectsInDateRange(
|
||||
startDate,
|
||||
endDate,
|
||||
projectsPerRange
|
||||
|
||||
const results = await verifyProjectsFromCursor(
|
||||
getProjectsCreatedInDateRangeCursor(startDate, endDate, projectsPerRange)
|
||||
)
|
||||
if (historyIds.length === 0) {
|
||||
|
||||
if (results.total === 0) {
|
||||
logger.debug(
|
||||
{ start: startDate, end: endDate },
|
||||
'No projects found in range'
|
||||
)
|
||||
return {
|
||||
startDate,
|
||||
endDate,
|
||||
verified: 0,
|
||||
total: 0,
|
||||
hasFailure: false,
|
||||
errorTypes: [],
|
||||
}
|
||||
}
|
||||
logger.debug(
|
||||
{ startDate, endDate, total: historyIds.length },
|
||||
'Verifying projects in range'
|
||||
)
|
||||
|
||||
const { errorTypes, hasFailure, verified } = await verifyProjects(historyIds)
|
||||
|
||||
const jobStatus = {
|
||||
verified,
|
||||
total: historyIds.length,
|
||||
hasFailure,
|
||||
...results,
|
||||
startDate,
|
||||
endDate,
|
||||
errorTypes,
|
||||
}
|
||||
|
||||
logger.debug(jobStatus, 'verified range')
|
||||
logger.debug(
|
||||
{ ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
|
||||
'Verified range'
|
||||
)
|
||||
return jobStatus
|
||||
}
|
||||
|
||||
@@ -200,7 +161,7 @@ async function verifyRange(startDate, endDate, projectsPerRange) {
|
||||
* @param {VerifyDateRangeOptions} options
|
||||
* @return {Promise<VerificationJobStatus>}
|
||||
*/
|
||||
export async function verifyProjectsInDateRange({
|
||||
export async function verifyProjectsCreatedInDateRange({
|
||||
concurrency = 0,
|
||||
projectsPerRange = 10,
|
||||
startDate,
|
||||
@@ -252,3 +213,44 @@ export async function verifyProjectsInDateRange({
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that projects that have recently gone out of RPO have been updated.
|
||||
*
|
||||
* @param {Date} startDate
|
||||
* @param {Date} endDate
|
||||
* @param {number} nProjects
|
||||
* @return {Promise<VerificationJobStatus>}
|
||||
*/
|
||||
export async function verifyProjectsUpdatedInDateRange(
|
||||
startDate,
|
||||
endDate,
|
||||
nProjects
|
||||
) {
|
||||
logger.debug(
|
||||
{ startDate, endDate, nProjects },
|
||||
'Sampling projects updated in date range'
|
||||
)
|
||||
const results = await verifyProjectsFromCursor(
|
||||
getProjectsUpdatedInDateRangeCursor(startDate, endDate, nProjects)
|
||||
)
|
||||
|
||||
if (results.total === 0) {
|
||||
logger.debug(
|
||||
{ start: startDate, end: endDate },
|
||||
'No projects updated recently'
|
||||
)
|
||||
}
|
||||
|
||||
const jobStatus = {
|
||||
...results,
|
||||
startDate,
|
||||
endDate,
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
{ ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
|
||||
'Verified recently updated projects'
|
||||
)
|
||||
return jobStatus
|
||||
}
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import { ObjectId } from 'mongodb'
|
||||
import config from 'config'
|
||||
|
||||
export const RPO = parseInt(config.get('backupRPOInMS'), 10)
|
||||
|
||||
/**
|
||||
* @param {Date} time
|
||||
@@ -7,3 +10,18 @@ import { ObjectId } from 'mongodb'
|
||||
export function objectIdFromDate(time) {
|
||||
return ObjectId.createFromTime(time.getTime() / 1000)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a startDate, endDate pair that checks a period of time before the RPO horizon
|
||||
*
|
||||
* @param {number} offset - How many seconds we should check
|
||||
* @return {{endDate: Date, startDate: Date}}
|
||||
*/
|
||||
export function getDatesBeforeRPO(offset) {
|
||||
const now = new Date()
|
||||
const endDate = new Date(now.getTime() - RPO)
|
||||
return {
|
||||
endDate,
|
||||
startDate: new Date(endDate.getTime() - offset * 1000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// @ts-check
|
||||
import config from 'config'
|
||||
import OError from '@overleaf/o-error'
|
||||
import chunkStore from '../lib/chunk_store/index.js'
|
||||
import {
|
||||
@@ -16,8 +15,7 @@ import path from 'node:path'
|
||||
import projectKey from './project_key.js'
|
||||
import streams from './streams.js'
|
||||
import objectPersistor from '@overleaf/object-persistor'
|
||||
|
||||
const RPO = parseInt(config.get('backupRPOInMS'), 10)
|
||||
import { RPO } from '../../backupVerifier/utils.mjs'
|
||||
|
||||
/**
|
||||
* @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor.js").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor
|
||||
|
||||
@@ -2,14 +2,16 @@
|
||||
import commandLineArgs from 'command-line-args'
|
||||
import {
|
||||
setWriteMetrics,
|
||||
verifyProjectsInDateRange,
|
||||
verifyProjectsCreatedInDateRange,
|
||||
verifyRandomProjectSample,
|
||||
verifyProjectsUpdatedInDateRange,
|
||||
} from '../../backupVerifier/ProjectVerifier.mjs'
|
||||
import knex from '../lib/knex.js'
|
||||
import { client } from '../lib/mongodb.js'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
import logger from '@overleaf/logger'
|
||||
import { loadGlobalBlobs } from '../lib/blob_store/index.js'
|
||||
import { getDatesBeforeRPO } from '../../backupVerifier/utils.mjs'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
@@ -75,7 +77,7 @@ function getOptions() {
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
if (!['range', 'random'].includes(strategy)) {
|
||||
if (!['range', 'random', 'recent'].includes(strategy)) {
|
||||
throw new Error(`Invalid strategy: ${strategy}`)
|
||||
}
|
||||
|
||||
@@ -88,6 +90,18 @@ function getOptions() {
|
||||
verbose,
|
||||
projectVerifier: () => verifyRandomProjectSample(nProjects),
|
||||
}
|
||||
case 'recent':
|
||||
return {
|
||||
verbose,
|
||||
projectVerifier: async () => {
|
||||
const { startDate, endDate } = getDatesBeforeRPO(3 * 3600)
|
||||
return await verifyProjectsUpdatedInDateRange(
|
||||
startDate,
|
||||
endDate,
|
||||
nProjects
|
||||
)
|
||||
},
|
||||
}
|
||||
case 'range':
|
||||
default: {
|
||||
if (!startDate || !endDate) {
|
||||
@@ -109,7 +123,7 @@ function getOptions() {
|
||||
STATS.ranges = 0
|
||||
return {
|
||||
projectVerifier: () =>
|
||||
verifyProjectsInDateRange({
|
||||
verifyProjectsCreatedInDateRange({
|
||||
startDate: new Date(start),
|
||||
endDate: new Date(end),
|
||||
projectsPerRange: nProjects,
|
||||
|
||||
Reference in New Issue
Block a user