Files
overleaf-cep/services/web/scripts/deactivate_projects.mjs
Jakob Ackermann 28a7aa5e19 [web] deactivate_projects: spread archiving jobs over the cron interval (#31212)
* [web] deactivate_projects: spread archiving jobs over the cron interval

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: stop processing after graceful shutdown

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: exit w/ code 1 when aborting due to max-time

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: adjust bail out for timeout

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: handle small number of projects better

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* add comment about use of --maxTime option

---------

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
GitOrigin-RevId: c62c66725233d391fd2c8d86ce95275cea88f36c
2026-02-04 09:05:59 +00:00

240 lines
7.0 KiB
JavaScript
Executable File

#!/usr/bin/env node
import minimist from 'minimist'
import PQueue from 'p-queue'
import InactiveProjectManager from '../app/src/Features/InactiveData/InactiveProjectManager.mjs'
import { gracefulShutdown } from '../app/src/infrastructure/GracefulShutdown.mjs'
import logger from '@overleaf/logger'
import { setTimeout } from 'node:timers/promises'
// Global variables for tracking job and error counts
let jobCount = 0
let succeededCount = 0
let skippedCount = 0
let failedCount = 0
let currentAgeInDays = null
let currentLastOpened = null
let DRY_RUN = false
let gracefulShutdownInitiated = false
const SCRIPT_START_TIME = Date.now()
const MAX_RUNTIME_DEFAULT = null
let MAX_RUNTIME = MAX_RUNTIME_DEFAULT // in milliseconds
const MAX_PROJECT_ESTIMATE = 30_000
// Configure signal handling
process.on('SIGINT', handleSignal)
process.on('SIGTERM', handleSignal)
function handleSignal() {
if (gracefulShutdownInitiated) return
gracefulShutdownInitiated = true
logger.warn(
{ gracefulShutdownInitiated },
'graceful shutdown initiated, draining queue'
)
}
// Check if max runtime has been exceeded
function hasMaxRuntimeExceeded() {
if (MAX_RUNTIME === null) return false
const elapsedTime = Date.now() - SCRIPT_START_TIME
const hasExceeded = elapsedTime >= MAX_RUNTIME
if (hasExceeded && !gracefulShutdownInitiated) {
// Exit with code 1 eventually. The cron heartbeat script will alert us.
process.exitCode = 1
gracefulShutdownInitiated = true
logger.warn(
{ elapsedTimeMs: elapsedTime, maxRuntimeMs: MAX_RUNTIME },
'maximum runtime exceeded, initiating graceful shutdown'
)
}
return hasExceeded
}
// Calculates the age in days since the provided lastOpened date.
function getAgeFromLastOpened(lastOpened) {
const lastOpenedDate = new Date(lastOpened)
const now = new Date()
return Number(((now - lastOpenedDate) / (1000 * 60 * 60 * 24)).toFixed(2))
}
// Deactivates a single project and handles errors
async function deactivateSingleProject(project) {
const { _id: projectId, lastOpened } = project
jobCount++
if (lastOpened) {
currentLastOpened = lastOpened
currentAgeInDays = getAgeFromLastOpened(lastOpened)
}
// Periodic progress logging
if (jobCount % 1000 === 0) {
logger.info(
{ jobCount, failedCount, currentAgeInDays },
'project deactivation in progress'
)
}
// Debug level detail logging
logger.debug(
{ projectId, jobCount, failedCount, dryRun: DRY_RUN },
'attempting to deactivate project'
)
// Dry run handling
if (DRY_RUN) {
logger.info({ projectId }, '[DRY RUN] would deactivate project')
succeededCount++
}
// Actual deactivation with error handling
try {
await InactiveProjectManager.promises.deactivateProject(projectId)
logger.debug({ projectId }, 'successfully deactivated project')
succeededCount++
} catch (error) {
failedCount++
logger.error({ projectId, err: error }, 'failed to deactivate project')
}
}
// Centralized project processing function
async function processProjects(projectCursor, concurrency) {
const queue = new PQueue({ concurrency })
const projects = []
for await (const project of projectCursor) {
if (gracefulShutdownInitiated || hasMaxRuntimeExceeded()) {
skippedCount++
break
}
projects.push(project)
}
const start = Date.now()
const isSteadyStateProcessing = projects.length < 10_000
for (const [idx, project] of projects.entries()) {
if (MAX_RUNTIME > 0) {
// If the job has to run in a finite time (e.g. when running as the cron job)
// then spread the work evenly over the runtime duration. Otherwise, process
// all the outstanding projects without any delay, subject to the concurrency.
const remainingTime = MAX_RUNTIME - (Date.now() - start)
if (isSteadyStateProcessing && remainingTime > MAX_PROJECT_ESTIMATE) {
const remainingProjects = projects.length - idx
// Handle small number of projects better (don't wait for all of remainingTime to pass).
await setTimeout(remainingTime / (remainingProjects + 1))
}
}
await queue.onEmpty()
if (gracefulShutdownInitiated || hasMaxRuntimeExceeded()) {
skippedCount++
break
}
logger.debug(
{ queueSize: queue.size, queuePending: queue.pending },
'queue size before adding new job'
)
queue.add(async () => {
await deactivateSingleProject(project)
})
}
await queue.onIdle()
}
const usage = `
Usage: scripts/deactivate_projects.mjs [options]
Options:
--limit <number> Max number of projects to process (default: 10)
--daysOld <number> Min age in days for a project to be considered inactive (default: 7)
--concurrency <number> Number of deactivations to run in parallel (default: 1)
--max-time <number> Maximum runtime in seconds before graceful shutdown (default: no limit)
--dry-run, -n Simulate deactivation without making changes (default: false)
--help Display this usage message
`
async function main() {
const argv = minimist(process.argv.slice(2), {
string: ['limit', 'daysOld', 'concurrency', 'maxTime'],
boolean: ['dryRun', 'help'],
alias: {
dryRun: ['dry-run', 'n'],
maxTime: 'max-time',
help: 'h',
},
default: {
limit: '10',
daysOld: '7',
concurrency: '1',
maxTime: '',
dryRun: false,
},
})
if (argv.help || process.argv.length <= 2) {
console.log(usage)
process.exit(0)
}
const limit = parseInt(argv.limit, 10)
const daysOld = parseInt(argv.daysOld, 10)
const concurrency = parseInt(argv.concurrency, 10)
const maxRuntimeInSeconds = parseInt(argv.maxTime, 10)
DRY_RUN = argv.dryRun
MAX_RUNTIME = maxRuntimeInSeconds * 1000 // Convert seconds to milliseconds
if (DRY_RUN) {
logger.info(
{},
'DRY RUN MODE ENABLED: No actual deactivations will be performed'
)
}
logger.info(
{
limit,
daysOld,
concurrency,
dryRun: DRY_RUN,
maxRuntimeSeconds: maxRuntimeInSeconds || 'unlimited',
},
'finding inactive projects'
)
try {
// Find projects to deactivate
const projectCursor = await InactiveProjectManager.findInactiveProjects(
limit,
daysOld
)
// Process the projects
await processProjects(projectCursor, concurrency)
} catch (error) {
logger.error({ err: error }, 'critical error during script execution')
process.exitCode = 1
} finally {
logger.info(
{
jobCount,
succeededCount,
failedCount,
skippedCount,
currentAgeInDays,
currentLastOpened,
elapsedTimeInSeconds: Math.floor(
(Date.now() - SCRIPT_START_TIME) / 1000
),
maxRuntimeInSeconds: maxRuntimeInSeconds || 'unlimited',
},
'project deactivation process completed'
)
}
}
main()
.then(async () => {
await gracefulShutdown()
})
.catch(err => {
logger.fatal({ err }, 'unhandled error in main execution')
process.exit(1)
})