From b507c8e1bb3c64f8f9129943211b8fe8894c852a Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Mon, 17 Feb 2025 07:43:08 -0500 Subject: [PATCH] Merge pull request #23612 from overleaf/em-find-dangling-comments Add script to find projects with dangling comments GitOrigin-RevId: 31048defa2f33c2bf23d14ddc643366775df3104 --- .../web/scripts/find_dangling_comments.mjs | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 services/web/scripts/find_dangling_comments.mjs diff --git a/services/web/scripts/find_dangling_comments.mjs b/services/web/scripts/find_dangling_comments.mjs new file mode 100644 index 0000000000..5d13c67970 --- /dev/null +++ b/services/web/scripts/find_dangling_comments.mjs @@ -0,0 +1,125 @@ +// @ts-check + +import minimist from 'minimist' +import { + db, + ObjectId, + READ_PREFERENCE_SECONDARY, +} from '../app/src/infrastructure/mongodb.js' + +const OPTS = parseArgs() + +function parseArgs() { + const args = minimist(process.argv.slice(2), { + string: ['min-project-id', 'max-project-id'], + boolean: ['help'], + }) + + if (args.help) { + usage() + process.exit(0) + } + + return { + minProjectId: args['min-project-id'] ?? null, + maxProjectId: args['max-project-id'] ?? null, + } +} + +function usage() { + console.log(`Usage: find_dangling_comments.mjs [OPTS] + +Options: + + --min-project-id Start scanning at this project id + --max-project-id Stop scanning at this project id`) +} + +async function main() { + let projectsProcessed = 0 + let projectsFound = 0 + for await (const { projectId, threadIds } of fetchThreadIdsByProject()) { + projectsProcessed += 1 + const danglingThreadIds = await findDanglingThreadIds(projectId, threadIds) + if (danglingThreadIds.length > 0) { + console.log( + `Project ${projectId} has dangling threads: ${danglingThreadIds.join(', ')}` + ) + projectsFound += 1 + } + if (projectsProcessed % 100000 === 0) { + console.log( + `${projectsProcessed} projects processed - Last project: ${projectId}` + ) + } + } + console.log(`${projectsFound} projects with dangling comments found`) +} + +async function* fetchThreadIdsByProject() { + const clauses = [] + clauses.push({ + deleted: { $ne: true }, + 'ranges.comments.0': { $exists: true }, + }) + if (OPTS.minProjectId != null) { + clauses.push({ project_id: { $gte: new ObjectId(OPTS.minProjectId) } }) + } + if (OPTS.maxProjectId != null) { + clauses.push({ project_id: { $lte: new ObjectId(OPTS.maxProjectId) } }) + } + const docs = db.docs.find( + { $and: clauses }, + { + sort: { project_id: 1 }, + projection: { project_id: 1, 'ranges.comments': 1 }, + readPreference: READ_PREFERENCE_SECONDARY, + } + ) + let projectId + let threadIds = new Set() + for await (const doc of docs) { + if (projectId !== doc.project_id) { + yield { projectId, threadIds } + projectId = doc.project_id + threadIds = new Set() + } + + projectId = doc.project_id + + for (const comment of doc.ranges.comments) { + threadIds.add(comment.op.t.toString()) + } + } + yield { projectId, threadIds } +} /** + * @param {string} projectId + * @param {Set} threadIds + */ +async function findDanglingThreadIds(projectId, threadIds) { + const rooms = await db.rooms.find( + { project_id: projectId, thread_id: { $exists: true } }, + { readPreference: READ_PREFERENCE_SECONDARY } + ) + const existingThreadIds = new Set() + for await (const room of rooms) { + existingThreadIds.add(room.thread_id.toString()) + } + + const danglingThreadIds = [] + for (const threadId of threadIds) { + if (!existingThreadIds.has(threadId)) { + danglingThreadIds.push(threadId) + } + } + + return danglingThreadIds +} + +try { + await main() + process.exit(0) +} catch (err) { + console.error(err) + process.exit(1) +}