diff --git a/services/history-v1/storage/lib/backup_store/index.js b/services/history-v1/storage/lib/backup_store/index.js index 1253afea06..da7944786a 100644 --- a/services/history-v1/storage/lib/backup_store/index.js +++ b/services/history-v1/storage/lib/backup_store/index.js @@ -27,6 +27,29 @@ function listPendingBackups(timeIntervalMs = 0, limit = null) { return cursor } +// List projects that have never been backed up and are older than the specified interval +function listUninitializedBackups(timeIntervalMs = 0, limit = null) { + const cutoffTimeInSeconds = (Date.now() - timeIntervalMs) / 1000 + const options = { + projection: { _id: 1 }, + sort: { _id: 1 }, + } + // Apply limit if provided + if (limit) { + options.limit = limit + } + const cursor = projects.find( + { + 'overleaf.backup.lastBackedUpVersion': null, + _id: { + $lt: ObjectId.createFromTime(cutoffTimeInSeconds), + }, + }, + options + ) + return cursor +} + // Retrieve the history ID for a given project without giving direct access to the // projects collection. @@ -183,6 +206,7 @@ module.exports = { updateCurrentMetadataIfNotSet, updatePendingChangeTimestamp, listPendingBackups, + listUninitializedBackups, getBackedUpBlobHashes, unsetBackedUpBlobHashes, } diff --git a/services/history-v1/storage/scripts/backup_scheduler.mjs b/services/history-v1/storage/scripts/backup_scheduler.mjs index 32edc1d0af..164512701e 100644 --- a/services/history-v1/storage/scripts/backup_scheduler.mjs +++ b/services/history-v1/storage/scripts/backup_scheduler.mjs @@ -4,6 +4,7 @@ import commandLineArgs from 'command-line-args' import logger from '@overleaf/logger' import { listPendingBackups, + listUninitializedBackups, getBackupStatus, } from '../lib/backup_store/index.js' @@ -200,6 +201,18 @@ async function addDateRangeJob(input) { ) } +// Helper to list pending and uninitialized backups +// This function combines the two cursors into a single generator +// to yield projects from both lists +async function* pendingCursor(timeIntervalMs, limit) { + for await (const project of listPendingBackups(timeIntervalMs, limit)) { + yield project + } + for await (const project of listUninitializedBackups(timeIntervalMs, limit)) { + yield project + } +} + // Process pending projects with changes older than the specified seconds async function processPendingProjects( age, @@ -218,11 +231,11 @@ async function processPendingProjects( let addedCount = 0 let existingCount = 0 // Pass the limit directly to MongoDB query for better performance - const pendingCursor = listPendingBackups(timeIntervalMs, limit) const changeTimes = [] - for await (const project of pendingCursor) { + for await (const project of pendingCursor(timeIntervalMs, limit)) { const projectId = project._id.toHexString() - const pendingAt = project.overleaf?.backup?.pendingChangeAt + const pendingAt = + project.overleaf?.backup?.pendingChangeAt || project._id.getTimestamp() if (pendingAt) { changeTimes.push(pendingAt) const pendingAge = Math.floor((Date.now() - pendingAt.getTime()) / 1000)