From efd55ffe97e7be46af87361acc34b4aaa6e8706d Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Tue, 20 May 2025 09:47:19 +0100 Subject: [PATCH] Merge pull request #25743 from overleaf/bg-deactivate-projects-script add deactivate projects script GitOrigin-RevId: 5acf4b980d8980457930ee488571362da2a8014c --- .../InactiveData/InactiveProjectManager.js | 54 +++-- .../src/infrastructure/GracefulShutdown.js | 28 +-- services/web/scripts/deactivate_projects.mjs | 214 ++++++++++++++++++ 3 files changed, 261 insertions(+), 35 deletions(-) create mode 100755 services/web/scripts/deactivate_projects.mjs diff --git a/services/web/app/src/Features/InactiveData/InactiveProjectManager.js b/services/web/app/src/Features/InactiveData/InactiveProjectManager.js index 818fe70c08..54bd81a500 100644 --- a/services/web/app/src/Features/InactiveData/InactiveProjectManager.js +++ b/services/web/app/src/Features/InactiveData/InactiveProjectManager.js @@ -11,6 +11,27 @@ const { callbackifyAll } = require('@overleaf/promise-utils') const Metrics = require('@overleaf/metrics') const MILISECONDS_IN_DAY = 86400000 + +function findInactiveProjects(limit, daysOld) { + const oldProjectDate = new Date() - MILISECONDS_IN_DAY * daysOld + try { + // use $not $gt to catch non-opened projects where lastOpened is null + // return a cursor instead of executing the query + return Project.find({ + lastOpened: { $not: { $gt: oldProjectDate } }, + }) + .where('active') + .equals(true) + .select(['_id', 'lastOpened']) + .limit(limit) + .read(READ_PREFERENCE_SECONDARY) + .cursor() + } catch (err) { + logger.err({ err }, 'could not get projects for deactivating') + throw err // Re-throw the error to be handled by the caller + } +} + const InactiveProjectManager = { async reactivateProjectIfRequired(projectId) { let project @@ -53,30 +74,13 @@ const InactiveProjectManager = { if (daysOld == null) { daysOld = 360 } - const oldProjectDate = new Date() - MILISECONDS_IN_DAY * daysOld - let projects - try { - // use $not $gt to catch non-opened projects where lastOpened is null - projects = await Project.find({ - lastOpened: { $not: { $gt: oldProjectDate } }, - }) - .where('active') - .equals(true) - .select('_id') - .limit(limit) - .read(READ_PREFERENCE_SECONDARY) - .exec() - } catch (err) { - logger.err({ err }, 'could not get projects for deactivating') - } + logger.debug('deactivating projects') - logger.debug( - { numberOfProjects: projects && projects.length }, - 'deactivating projects' - ) + const processedProjects = [] - for (const project of projects) { + for await (const project of findInactiveProjects(limit, daysOld)) { + processedProjects.push(project) try { await InactiveProjectManager.deactivateProject(project._id) } catch (err) { @@ -87,7 +91,12 @@ const InactiveProjectManager = { } } - return projects + logger.debug( + { numberOfProjects: processedProjects.length }, + 'finished deactivating projects' + ) + + return processedProjects }, async deactivateProject(projectId) { @@ -126,4 +135,5 @@ const InactiveProjectManager = { module.exports = { ...callbackifyAll(InactiveProjectManager), promises: InactiveProjectManager, + findInactiveProjects, } diff --git a/services/web/app/src/infrastructure/GracefulShutdown.js b/services/web/app/src/infrastructure/GracefulShutdown.js index 2446397b1d..b4b345fb95 100644 --- a/services/web/app/src/infrastructure/GracefulShutdown.js +++ b/services/web/app/src/infrastructure/GracefulShutdown.js @@ -65,20 +65,22 @@ async function gracefulShutdown(server, signal) { true ) - await sleep(Settings.gracefulShutdownDelayInMs) - try { - await new Promise((resolve, reject) => { - logger.warn({}, 'graceful shutdown: closing http server') - server.close(err => { - if (err) { - reject(OError.tag(err, 'http.Server.close failed')) - } else { - resolve() - } + if (server) { + await sleep(Settings.gracefulShutdownDelayInMs) + try { + await new Promise((resolve, reject) => { + logger.warn({}, 'graceful shutdown: closing http server') + server.close(err => { + if (err) { + reject(OError.tag(err, 'http.Server.close failed')) + } else { + resolve() + } + }) }) - }) - } catch (err) { - throw OError.tag(err, 'stop traffic') + } catch (err) { + throw OError.tag(err, 'stop traffic') + } } await runHandlers( diff --git a/services/web/scripts/deactivate_projects.mjs b/services/web/scripts/deactivate_projects.mjs new file mode 100755 index 0000000000..b229af649d --- /dev/null +++ b/services/web/scripts/deactivate_projects.mjs @@ -0,0 +1,214 @@ +#!/usr/bin/env node +import minimist from 'minimist' +import PQueue from 'p-queue' +import InactiveProjectManager from '../app/src/Features/InactiveData/InactiveProjectManager.js' +import { gracefulShutdown } from '../app/src/infrastructure/GracefulShutdown.js' +import logger from '@overleaf/logger' + +// Global variables for tracking job and error counts +let jobCount = 0 +let succeededCount = 0 +let skippedCount = 0 +let failedCount = 0 +let currentAgeInDays = null +let currentLastOpened = null +let DRY_RUN = false +let gracefulShutdownInitiated = false +const SCRIPT_START_TIME = Date.now() +const MAX_RUNTIME_DEFAULT = null +let MAX_RUNTIME = MAX_RUNTIME_DEFAULT // in milliseconds + +// Configure signal handling +process.on('SIGINT', handleSignal) +process.on('SIGTERM', handleSignal) +function handleSignal() { + if (gracefulShutdownInitiated) return + gracefulShutdownInitiated = true + logger.warn( + { gracefulShutdownInitiated }, + 'graceful shutdown initiated, draining queue' + ) +} + +// Check if max runtime has been exceeded +function hasMaxRuntimeExceeded() { + if (MAX_RUNTIME === null) return false + const elapsedTime = Date.now() - SCRIPT_START_TIME + const hasExceeded = elapsedTime >= MAX_RUNTIME + if (hasExceeded && !gracefulShutdownInitiated) { + gracefulShutdownInitiated = true + logger.warn( + { elapsedTimeMs: elapsedTime, maxRuntimeMs: MAX_RUNTIME }, + 'maximum runtime exceeded, initiating graceful shutdown' + ) + } + return hasExceeded +} + +// Calculates the age in days since the provided lastOpened date. +function getAgeFromLastOpened(lastOpened) { + const lastOpenedDate = new Date(lastOpened) + const now = new Date() + return Number(((now - lastOpenedDate) / (1000 * 60 * 60 * 24)).toFixed(2)) +} + +// Deactivates a single project and handles errors +async function deactivateSingleProject(project) { + const { _id: projectId, lastOpened } = project + jobCount++ + + if (lastOpened) { + currentLastOpened = lastOpened + currentAgeInDays = getAgeFromLastOpened(lastOpened) + } + + // Periodic progress logging + if (jobCount % 1000 === 0) { + logger.info( + { jobCount, failedCount, currentAgeInDays }, + 'project deactivation in progress' + ) + } + + // Debug level detail logging + logger.debug( + { projectId, jobCount, failedCount, dryRun: DRY_RUN }, + 'attempting to deactivate project' + ) + + // Dry run handling + if (DRY_RUN) { + logger.info({ projectId }, '[DRY RUN] would deactivate project') + succeededCount++ + } + + // Actual deactivation with error handling + try { + await InactiveProjectManager.promises.deactivateProject(projectId) + logger.debug({ projectId }, 'successfully deactivated project') + succeededCount++ + } catch (error) { + failedCount++ + logger.error({ projectId, err: error }, 'failed to deactivate project') + } +} + +// Centralized project processing function +async function processProjects(projectCursor, concurrency) { + const queue = new PQueue({ concurrency }) + for await (const project of projectCursor) { + if (gracefulShutdownInitiated || hasMaxRuntimeExceeded()) { + skippedCount++ + break + } + await queue.onEmpty() + logger.debug( + { queueSize: queue.size, queuePending: queue.pending }, + 'queue size before adding new job' + ) + queue.add(async () => { + await deactivateSingleProject(project) + }) + } + await queue.onIdle() +} + +const usage = ` +Usage: scripts/deactivate_projects.mjs [options] + +Options: + --limit Max number of projects to process (default: 10) + --daysOld Min age in days for a project to be considered inactive (default: 7) + --concurrency Number of deactivations to run in parallel (default: 1) + --max-time Maximum runtime in seconds before graceful shutdown (default: no limit) + --dry-run, -n Simulate deactivation without making changes (default: false) + --help Display this usage message +` + +async function main() { + const argv = minimist(process.argv.slice(2), { + string: ['limit', 'daysOld', 'concurrency', 'maxTime'], + boolean: ['dryRun', 'help'], + alias: { + dryRun: ['dry-run', 'n'], + maxTime: 'max-time', + help: 'h', + }, + default: { + limit: '10', + daysOld: '7', + concurrency: '1', + maxTime: '', + dryRun: false, + }, + }) + + if (argv.help || process.argv.length <= 2) { + console.log(usage) + process.exit(0) + } + + const limit = parseInt(argv.limit, 10) + const daysOld = parseInt(argv.daysOld, 10) + const concurrency = parseInt(argv.concurrency, 10) + const maxRuntimeInSeconds = parseInt(argv.maxTime, 10) + DRY_RUN = argv.dryRun + MAX_RUNTIME = maxRuntimeInSeconds * 1000 // Convert seconds to milliseconds + + if (DRY_RUN) { + logger.info( + {}, + 'DRY RUN MODE ENABLED: No actual deactivations will be performed' + ) + } + + logger.info( + { + limit, + daysOld, + concurrency, + dryRun: DRY_RUN, + maxRuntimeSeconds: maxRuntimeInSeconds || 'unlimited', + }, + 'finding inactive projects' + ) + + try { + // Find projects to deactivate + const projectCursor = await InactiveProjectManager.findInactiveProjects( + limit, + daysOld + ) + + // Process the projects + await processProjects(projectCursor, concurrency) + } catch (error) { + logger.error({ err: error }, 'critical error during script execution') + process.exitCode = 1 + } finally { + logger.info( + { + jobCount, + succeededCount, + failedCount, + skippedCount, + currentAgeInDays, + currentLastOpened, + elapsedTimeInSeconds: Math.floor( + (Date.now() - SCRIPT_START_TIME) / 1000 + ), + maxRuntimeInSeconds: maxRuntimeInSeconds || 'unlimited', + }, + 'project deactivation process completed' + ) + } +} + +main() + .then(async () => { + await gracefulShutdown() + }) + .catch(err => { + logger.fatal({ err }, 'unhandled error in main execution') + process.exit(1) + })