mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 17:19:37 +02:00
Merge pull request #33046 from overleaf/mj-batch-history-resync-script
[web] Add script for batch history resyncs GitOrigin-RevId: 2409475fa1ba12dadfaae9641a5fafdaa6c88e47
This commit is contained in:
committed by
Copybot
parent
e59cbc61cf
commit
92bbba5e50
344
services/web/scripts/history/resync_projects.mjs
Normal file
344
services/web/scripts/history/resync_projects.mjs
Normal file
@@ -0,0 +1,344 @@
|
||||
// @ts-check
|
||||
|
||||
import minimist from 'minimist'
|
||||
import { scriptRunner } from '../lib/ScriptRunner.mjs'
|
||||
import logger from '@overleaf/logger'
|
||||
import ProjectGetter from '../../app/src/Features/Project/ProjectGetter.mjs'
|
||||
import {
|
||||
db,
|
||||
ObjectId,
|
||||
READ_PREFERENCE_SECONDARY,
|
||||
} from '../../app/src/infrastructure/mongodb.mjs'
|
||||
import HistoryManager from '../../app/src/Features/History/HistoryManager.mjs'
|
||||
import DocstoreManager from '../../app/src/Features/Docstore/DocstoreManager.mjs'
|
||||
import DocumentUpdaterHandler from '../../app/src/Features/DocumentUpdater/DocumentUpdaterHandler.mjs'
|
||||
|
||||
function usage() {
|
||||
console.error(`Usage: resync_projects.mjs [OPTIONS]
|
||||
|
||||
Options:
|
||||
|
||||
--help Print this help
|
||||
--project-id Migrate this project
|
||||
--min-id Migrate projects from this id
|
||||
--max-id Migrate projects to this id
|
||||
--last-updated-after Migrate projects last updated after this date
|
||||
--last-updated-before Migrate projects last updated before this date
|
||||
--commit Actually perform the resync, instead of just checking which projects would be resynced
|
||||
--skip-metadata-checks Skip doing Mongo/Redis-level only checks to determine if the projects needs resyncing (has ranges or linked file data)
|
||||
--concurrency How many jobs to run in parallel
|
||||
`)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @returns {{ projectIds?: string[]; minId?: string; maxId?: string; concurrency: number; commit: boolean; skipMetadataChecks: boolean; lastUpdatedAfter?: string; lastUpdatedBefore?: string; }}
|
||||
*/
|
||||
function parseArgs() {
|
||||
const args = minimist(process.argv.slice(2), {
|
||||
boolean: ['help', 'commit', 'skip-metadata-checks'],
|
||||
string: [
|
||||
'project-id',
|
||||
'min-id',
|
||||
'max-id',
|
||||
'last-updated-after',
|
||||
'last-updated-before',
|
||||
],
|
||||
})
|
||||
|
||||
if (args.help) {
|
||||
usage()
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const projectIds = arrayOpt(args['project-id'])
|
||||
const minId = args['min-id']
|
||||
const maxId = args['max-id']
|
||||
const lastUpdatedAfter = args['last-updated-after']
|
||||
const lastUpdatedBefore = args['last-updated-before']
|
||||
const concurrency = parseInt(args.concurrency, 10) || 1
|
||||
const commit = args.commit
|
||||
const skipMetadataChecks = args['skip-metadata-checks']
|
||||
|
||||
if (
|
||||
projectIds == null &&
|
||||
minId == null &&
|
||||
maxId == null &&
|
||||
lastUpdatedAfter == null &&
|
||||
lastUpdatedBefore == null
|
||||
) {
|
||||
console.error('Please specify at least one filter\n')
|
||||
usage()
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
return {
|
||||
projectIds,
|
||||
minId,
|
||||
maxId,
|
||||
concurrency,
|
||||
commit,
|
||||
skipMetadataChecks,
|
||||
lastUpdatedAfter,
|
||||
lastUpdatedBefore,
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const {
|
||||
projectIds,
|
||||
minId,
|
||||
maxId,
|
||||
concurrency,
|
||||
commit,
|
||||
skipMetadataChecks,
|
||||
lastUpdatedAfter,
|
||||
lastUpdatedBefore,
|
||||
} = parseArgs()
|
||||
|
||||
// skip projects that don't have full project history
|
||||
/** @type {any[]} */
|
||||
const clauses = [{ 'overleaf.history.id': { $exists: true } }]
|
||||
|
||||
if (projectIds != null) {
|
||||
clauses.push({ _id: { $in: projectIds.map(id => new ObjectId(id)) } })
|
||||
}
|
||||
if (minId) {
|
||||
clauses.push({ _id: { $gte: new ObjectId(minId) } })
|
||||
}
|
||||
if (maxId) {
|
||||
clauses.push({ _id: { $lte: new ObjectId(maxId) } })
|
||||
}
|
||||
if (lastUpdatedAfter) {
|
||||
clauses.push({ lastUpdated: { $gt: new Date(lastUpdatedAfter) } })
|
||||
}
|
||||
if (lastUpdatedBefore) {
|
||||
clauses.push({ lastUpdated: { $lt: new Date(lastUpdatedBefore) } })
|
||||
}
|
||||
const filter = { $and: clauses }
|
||||
|
||||
const projects = db.projects
|
||||
.find(filter, {
|
||||
readPreference: READ_PREFERENCE_SECONDARY,
|
||||
projection: { _id: 1, overleaf: 1 },
|
||||
})
|
||||
.sort({ _id: -1 })
|
||||
|
||||
/** @type {{ skipped: number; resync: number; total: number;}} */
|
||||
const projectsProcessed = {
|
||||
skipped: 0,
|
||||
resync: 0,
|
||||
total: 0,
|
||||
}
|
||||
/** @type {Map<string, Promise<void>>} */
|
||||
const jobsByProjectId = new Map()
|
||||
let errors = 0
|
||||
|
||||
let terminating = false
|
||||
/**
|
||||
* @param {any} signal
|
||||
*/
|
||||
const handleSignal = signal => {
|
||||
logger.info({ signal }, 'History resync job received signal')
|
||||
terminating = true
|
||||
}
|
||||
process.on('SIGINT', handleSignal)
|
||||
process.on('SIGTERM', handleSignal)
|
||||
|
||||
for await (const project of projects) {
|
||||
if (terminating) {
|
||||
break
|
||||
}
|
||||
|
||||
const projectId = project._id.toString()
|
||||
if (jobsByProjectId.size >= concurrency) {
|
||||
// Wait until the next job finishes
|
||||
await Promise.race(jobsByProjectId.values())
|
||||
}
|
||||
const job = processProject(projectId, { commit, skipMetadataChecks })
|
||||
.then(
|
||||
/** @param {'skipped' | 'resync'} migrationType */ migrationType => {
|
||||
jobsByProjectId.delete(projectId)
|
||||
projectsProcessed[migrationType] += 1
|
||||
projectsProcessed.total += 1
|
||||
logger.debug(
|
||||
{
|
||||
projectId,
|
||||
projectsProcessed,
|
||||
errors,
|
||||
migrationType,
|
||||
},
|
||||
'History resync'
|
||||
)
|
||||
if (projectsProcessed.total % 10000 === 0) {
|
||||
logger.info(
|
||||
{ projectsProcessed, errors, lastProjectId: projectId },
|
||||
'History resync progress'
|
||||
)
|
||||
}
|
||||
}
|
||||
)
|
||||
.catch(
|
||||
/** @param {any} err */ err => {
|
||||
jobsByProjectId.delete(projectId)
|
||||
errors += 1
|
||||
logger.error(
|
||||
{ err, projectId, projectsProcessed, errors },
|
||||
'Failed to resync project history'
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
jobsByProjectId.set(projectId, job)
|
||||
}
|
||||
|
||||
// Clear the remaining backlog of jobs
|
||||
await Promise.all(jobsByProjectId.values())
|
||||
logger.info({ projectsProcessed, errors }, 'History resync completion')
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {{skipMetadataChecks: boolean, commit: boolean}} opts
|
||||
* @returns
|
||||
*/
|
||||
async function processProject(projectId, opts) {
|
||||
const shouldProceed = opts.skipMetadataChecks
|
||||
? true
|
||||
: await hasHistoryMetadata(projectId)
|
||||
|
||||
if (!shouldProceed) {
|
||||
logger.debug(
|
||||
{ projectId },
|
||||
'Skipping project as it has no history relevant data in Mongo'
|
||||
)
|
||||
return 'skipped'
|
||||
}
|
||||
|
||||
if (opts.commit) {
|
||||
logger.debug({ projectId }, 'Resyncing project')
|
||||
await HistoryManager.promises.flushProject(projectId)
|
||||
await HistoryManager.promises.resyncProject(projectId)
|
||||
} else {
|
||||
logger.debug({ projectId }, 'Project would be resynced')
|
||||
}
|
||||
|
||||
return 'resync'
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @returns
|
||||
*/
|
||||
async function hasHistoryMetadata(projectId) {
|
||||
try {
|
||||
const blockSuccess =
|
||||
await DocumentUpdaterHandler.promises.blockProject(projectId)
|
||||
if (!blockSuccess) {
|
||||
logger.debug(
|
||||
{ projectId },
|
||||
'Project is currently active, so we cannot skip'
|
||||
)
|
||||
return true
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{ projectId, err },
|
||||
'Error thrown while acquiring block for project'
|
||||
)
|
||||
return true
|
||||
}
|
||||
|
||||
try {
|
||||
if (await hasLinkedFileData(projectId)) {
|
||||
return true
|
||||
}
|
||||
if (await DocstoreManager.promises.projectHasRanges(projectId)) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{ projectId, err },
|
||||
'Error checking for history data in Mongo, proceeding with resync just in case'
|
||||
)
|
||||
} finally {
|
||||
try {
|
||||
await DocumentUpdaterHandler.promises.unblockProject(projectId)
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{ projectId, err },
|
||||
'Error unblocking project after checking for history data in Mongo'
|
||||
)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
async function hasLinkedFileData(projectId) {
|
||||
const project = await ProjectGetter.promises.getProjectWithoutLock(
|
||||
projectId,
|
||||
{
|
||||
rootFolder: 1,
|
||||
}
|
||||
)
|
||||
if (!project) {
|
||||
return false
|
||||
}
|
||||
return hasLinkedDataInFileTree(project.rootFolder?.[0])
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {any} folder
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function hasLinkedDataInFileTree(folder) {
|
||||
if (!folder) {
|
||||
return false
|
||||
}
|
||||
if (Array.isArray(folder.fileRefs)) {
|
||||
for (const fileRef of folder.fileRefs) {
|
||||
if (fileRef.linkedFileData) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(folder.folders)) {
|
||||
for (const subfolder of folder.folders) {
|
||||
if (hasLinkedDataInFileTree(subfolder)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {any} value
|
||||
* @returns {Array<any> | undefined}
|
||||
*/
|
||||
function arrayOpt(value) {
|
||||
if (typeof value === 'string') {
|
||||
return [value]
|
||||
} else if (Array.isArray(value)) {
|
||||
return value
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await scriptRunner(main)
|
||||
process.exit(0)
|
||||
} catch (error) {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
}
|
||||
Reference in New Issue
Block a user