From ac19f994dd4fb5d42f6249fa820f43105dffc396 Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Thu, 20 Feb 2025 08:16:06 -0500 Subject: [PATCH] Merge pull request #23725 from overleaf/em-find-dangling-comments-archived-docs Look at archived docs when looking for dangling comments GitOrigin-RevId: fb04b9428ce83802b6e658153c5bbe70e983de65 --- .../web/scripts/find_dangling_comments.mjs | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/services/web/scripts/find_dangling_comments.mjs b/services/web/scripts/find_dangling_comments.mjs index 5d13c67970..72563c8a68 100644 --- a/services/web/scripts/find_dangling_comments.mjs +++ b/services/web/scripts/find_dangling_comments.mjs @@ -6,6 +6,8 @@ import { ObjectId, READ_PREFERENCE_SECONDARY, } from '../app/src/infrastructure/mongodb.js' +import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js' +import { NotFoundError } from '../app/src/Features/Errors/Errors.js' const OPTS = parseArgs() @@ -47,7 +49,7 @@ async function main() { ) projectsFound += 1 } - if (projectsProcessed % 100000 === 0) { + if (projectsProcessed % 10000 === 0) { console.log( `${projectsProcessed} projects processed - Last project: ${projectId}` ) @@ -60,7 +62,7 @@ async function* fetchThreadIdsByProject() { const clauses = [] clauses.push({ deleted: { $ne: true }, - 'ranges.comments.0': { $exists: true }, + $or: [{ 'ranges.comments.0': { $exists: true } }, { inS3: true }], }) if (OPTS.minProjectId != null) { clauses.push({ project_id: { $gte: new ObjectId(OPTS.minProjectId) } }) @@ -72,7 +74,7 @@ async function* fetchThreadIdsByProject() { { $and: clauses }, { sort: { project_id: 1 }, - projection: { project_id: 1, 'ranges.comments': 1 }, + projection: { project_id: 1, 'ranges.comments': 1, inS3: 1 }, readPreference: READ_PREFERENCE_SECONDARY, } ) @@ -87,7 +89,27 @@ async function* fetchThreadIdsByProject() { projectId = doc.project_id - for (const comment of doc.ranges.comments) { + let comments = [] + if (doc.inS3) { + try { + const archivedDoc = await DocstoreManager.promises.getDoc( + projectId, + doc._id, + { peek: true } + ) + comments = archivedDoc.ranges?.comments ?? [] + } catch (err) { + if (err instanceof NotFoundError) { + console.warn(`Doc ${doc._id} in project ${projectId} not found`) + } else { + throw err + } + } + } else { + comments = doc.ranges?.comments + } + + for (const comment of comments) { threadIds.add(comment.op.t.toString()) } }