[web] deactivate_projects: spread archiving jobs over the cron interval (#31212)

* [web] deactivate_projects: spread archiving jobs over the cron interval

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: stop processing after graceful shutdown

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: exit w/ code 1 when aborting due to max-time

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: adjust bail out for timeout

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* [web] deactivate_projects: handle small number of projects better

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

* add comment about use of --maxTime option

---------

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
GitOrigin-RevId: c62c66725233d391fd2c8d86ce95275cea88f36c
This commit is contained in:
Jakob Ackermann
2026-02-03 10:01:21 +01:00
committed by Copybot
parent 94b79aac8c
commit 28a7aa5e19

View File

@@ -4,6 +4,7 @@ import PQueue from 'p-queue'
import InactiveProjectManager from '../app/src/Features/InactiveData/InactiveProjectManager.mjs'
import { gracefulShutdown } from '../app/src/infrastructure/GracefulShutdown.mjs'
import logger from '@overleaf/logger'
import { setTimeout } from 'node:timers/promises'
// Global variables for tracking job and error counts
let jobCount = 0
@@ -17,6 +18,7 @@ let gracefulShutdownInitiated = false
const SCRIPT_START_TIME = Date.now()
const MAX_RUNTIME_DEFAULT = null
let MAX_RUNTIME = MAX_RUNTIME_DEFAULT // in milliseconds
const MAX_PROJECT_ESTIMATE = 30_000
// Configure signal handling
process.on('SIGINT', handleSignal)
@@ -36,6 +38,8 @@ function hasMaxRuntimeExceeded() {
const elapsedTime = Date.now() - SCRIPT_START_TIME
const hasExceeded = elapsedTime >= MAX_RUNTIME
if (hasExceeded && !gracefulShutdownInitiated) {
// Exit with code 1 eventually. The cron heartbeat script will alert us.
process.exitCode = 1
gracefulShutdownInitiated = true
logger.warn(
{ elapsedTimeMs: elapsedTime, maxRuntimeMs: MAX_RUNTIME },
@@ -96,12 +100,33 @@ async function deactivateSingleProject(project) {
// Centralized project processing function
async function processProjects(projectCursor, concurrency) {
const queue = new PQueue({ concurrency })
const projects = []
for await (const project of projectCursor) {
if (gracefulShutdownInitiated || hasMaxRuntimeExceeded()) {
skippedCount++
break
}
projects.push(project)
}
const start = Date.now()
const isSteadyStateProcessing = projects.length < 10_000
for (const [idx, project] of projects.entries()) {
if (MAX_RUNTIME > 0) {
// If the job has to run in a finite time (e.g. when running as the cron job)
// then spread the work evenly over the runtime duration. Otherwise, process
// all the outstanding projects without any delay, subject to the concurrency.
const remainingTime = MAX_RUNTIME - (Date.now() - start)
if (isSteadyStateProcessing && remainingTime > MAX_PROJECT_ESTIMATE) {
const remainingProjects = projects.length - idx
// Handle small number of projects better (don't wait for all of remainingTime to pass).
await setTimeout(remainingTime / (remainingProjects + 1))
}
}
await queue.onEmpty()
if (gracefulShutdownInitiated || hasMaxRuntimeExceeded()) {
skippedCount++
break
}
logger.debug(
{ queueSize: queue.size, queuePending: queue.pending },
'queue size before adding new job'