From c51d6f46d443a7149ebc65e699450eb79571fdc2 Mon Sep 17 00:00:00 2001 From: Antoine Clausse Date: Wed, 30 Apr 2025 11:39:21 +0200 Subject: [PATCH] Add script to remove emails with commas, taken from a CSV file (#25107) * Add script to remove emails with commas and replace them emails to support with encoded former emails * Enhance RemoveEmailsWithCommasScriptTest to verify unchanged user data GitOrigin-RevId: 6961995f2a143ac1c53bc2eeb183808a4be7dd02 --- .../web/scripts/remove_emails_with_commas.mjs | 124 ++++++++++ .../src/RemoveEmailsWithCommasScriptTest.mjs | 226 ++++++++++++++++++ 2 files changed, 350 insertions(+) create mode 100644 services/web/scripts/remove_emails_with_commas.mjs create mode 100644 services/web/test/acceptance/src/RemoveEmailsWithCommasScriptTest.mjs diff --git a/services/web/scripts/remove_emails_with_commas.mjs b/services/web/scripts/remove_emails_with_commas.mjs new file mode 100644 index 0000000000..29d78b129c --- /dev/null +++ b/services/web/scripts/remove_emails_with_commas.mjs @@ -0,0 +1,124 @@ +// @ts-check + +import minimist from 'minimist' +import fs from 'node:fs/promises' +import * as csv from 'csv' +import { promisify } from 'node:util' +import UserAuditLogHandler from '../app/src/Features/User/UserAuditLogHandler.js' +import { db } from '../app/src/infrastructure/mongodb.js' + +const CSV_FILENAME = '/tmp/emails-with-commas.csv' + +/** + * @type {(csvString: string) => Promise} + */ +const parseAsync = promisify(csv.parse) + +function usage() { + console.log('Usage: node remove_emails_with_commas.mjs') + console.log(`Read emails from ${CSV_FILENAME} and remove them from users.`) + console.log('Add support+@overleaf.com instead.') + console.log('Options:') + console.log(' --commit apply the changes\n') + process.exit(0) +} + +const { commit, help } = minimist(process.argv.slice(2), { + boolean: ['commit', 'help'], + alias: { help: 'h' }, + default: { commit: false }, +}) + +async function consumeCsvFileAndUpdate() { + console.time('remove_emails_with_commas') + + const csvContent = await fs.readFile(CSV_FILENAME, 'utf8') + const rows = await parseAsync(csvContent) + const emailsWithComma = rows.map(row => row[0]) + + console.log('Total emails in the CSV:', emailsWithComma.length) + + const unexpectedValidEmails = emailsWithComma.filter( + str => !str.includes(',') + ) + if (unexpectedValidEmails.length > 0) { + throw new Error( + 'CSV file contains unexpected valid emails: ' + + JSON.stringify(emailsWithComma) + ) + } + + let updatedUsersCount = 0 + for (const oldEmail of emailsWithComma) { + const encodedEmail = oldEmail + .replaceAll('_', '_5f') + .replaceAll('@', '_40') + .replaceAll(',', '_2c') + .replaceAll('<', '_60') + .replaceAll('>', '_62') + + const newEmail = `support+${encodedEmail}@overleaf.com` + + console.log(oldEmail, '->', newEmail) + + const user = await db.users.findOne({ email: oldEmail }) + + if (!user) { + console.log('User not found for email:', oldEmail) + continue + } + + if (commit) { + await db.users.updateOne( + { _id: user._id }, + { + $set: { email: newEmail }, + $pull: { emails: { email: oldEmail } }, + } + ) + await db.users.updateOne( + { _id: user._id }, + { + $addToSet: { + emails: { + email: newEmail, + createdAt: Date.now(), + reversedHostname: 'moc.faelrevo', + }, + }, + } + ) + + await UserAuditLogHandler.promises.addEntry( + user._id, + 'remove-email', + undefined, + undefined, + { + removedEmail: oldEmail, + script: true, + note: 'remove primary email containing commas', + } + ) + updatedUsersCount++ + } + } + + console.log('Updated users:', updatedUsersCount) + + if (!commit) { + console.log('Note: this was a dry-run. No changes were made.') + } + console.log() + console.timeEnd('remove_emails_with_commas') + console.log() +} + +try { + if (help) usage() + else await consumeCsvFileAndUpdate() + process.exit(0) +} catch (error) { + console.error(error) + process.exit(1) +} diff --git a/services/web/test/acceptance/src/RemoveEmailsWithCommasScriptTest.mjs b/services/web/test/acceptance/src/RemoveEmailsWithCommasScriptTest.mjs new file mode 100644 index 0000000000..f50f8f19df --- /dev/null +++ b/services/web/test/acceptance/src/RemoveEmailsWithCommasScriptTest.mjs @@ -0,0 +1,226 @@ +import { promisify } from 'node:util' +import { exec } from 'node:child_process' +import { expect } from 'chai' +import { filterOutput } from './helpers/settings.mjs' +import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js' +import fs from 'node:fs/promises' + +const CSV_FILENAME = '/tmp/emails-with-commas.csv' + +async function runScript(commit) { + const result = await promisify(exec)( + ['node', 'scripts/remove_emails_with_commas.mjs', commit && '--commit'] + .filter(Boolean) + .join(' ') + ) + return { + ...result, + stdout: result.stdout.split('\n').filter(filterOutput), + } +} + +function createUser(email, emails) { + return { + _id: new ObjectId(), + email, + emails, + } +} + +describe('scripts/remove_emails_with_commas', function () { + let user, unchangedUser + + beforeEach(async function () { + await fs.writeFile( + CSV_FILENAME, + '"user,email@test.com"\n"user,another@test.com"\n' + ) + }) + + afterEach(async function () { + try { + await fs.unlink(CSV_FILENAME) + } catch (err) { + // Ignore errors if file doesn't exist + } + }) + + describe('when removing email addresses with commas', function () { + beforeEach(async function () { + user = createUser('user,email@test.com', [ + { + email: 'user,email@test.com', + createdAt: new Date(), + reversedHostname: 'moc.tset', + }, + ]) + await db.users.insertOne(user) + + unchangedUser = createUser('john.doe@example.com', [ + { + email: 'john.doe@example.com', + createdAt: new Date(), + reversedHostname: 'moc.elpmaxe', + }, + ]) + await db.users.insertOne(unchangedUser) + }) + + afterEach(async function () { + await db.users.deleteOne({ _id: user._id }) + }) + + it('should replace emails with commas with encoded support emails', async function () { + const r = await runScript(true) + + expect(r.stdout).to.include( + 'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com' + ) + expect(r.stdout).to.include('Updated users: 1') + + const updatedUser = await db.users.findOne({ _id: user._id }) + expect(updatedUser.email).to.equal( + 'support+user_2cemail_40test.com@overleaf.com' + ) + expect(updatedUser.emails).to.have.length(1) + expect(updatedUser.emails[0].email).to.equal( + 'support+user_2cemail_40test.com@overleaf.com' + ) + expect(updatedUser.emails[0].reversedHostname).to.equal('moc.faelrevo') + + const unchanged = await db.users.findOne({ _id: unchangedUser._id }) + + expect(unchanged.emails).to.have.length(1) + expect(unchanged.email).to.equal('john.doe@example.com') + expect(unchanged.emails[0].email).to.equal('john.doe@example.com') + }) + + it('should not modify anything in dry run mode', async function () { + const r = await runScript(false) + + expect(r.stdout).to.include( + 'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com' + ) + expect(r.stdout).to.include( + 'Note: this was a dry-run. No changes were made.' + ) + + const updatedUser = await db.users.findOne({ _id: user._id }) + expect(updatedUser.email).to.equal('user,email@test.com') + expect(updatedUser.emails).to.have.length(1) + expect(updatedUser.emails[0].email).to.equal('user,email@test.com') + }) + }) + + describe('when handling multiple email replacements', function () { + beforeEach(async function () { + user = createUser('user,email@test.com', [ + { + email: 'user,email@test.com', + createdAt: new Date(), + reversedHostname: 'moc.tset', + }, + { + email: 'normal@test.com', + createdAt: new Date(), + reversedHostname: 'moc.tset', + }, + ]) + await db.users.insertOne(user) + }) + + afterEach(async function () { + await db.users.deleteOne({ _id: user._id }) + }) + + it('should only replace primary email with comma and keep other emails', async function () { + const r = await runScript(true) + + expect(r.stdout).to.include( + 'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com' + ) + expect(r.stdout).to.include('Updated users: 1') + + const updatedUser = await db.users.findOne({ _id: user._id }) + expect(updatedUser.email).to.equal( + 'support+user_2cemail_40test.com@overleaf.com' + ) + expect(updatedUser.emails).to.have.length(2) + expect(updatedUser.emails[0].email).to.equal('normal@test.com') + expect(updatedUser.emails[1].email).to.equal( + 'support+user_2cemail_40test.com@overleaf.com' + ) + }) + }) + + describe('when handling special characters in emails', function () { + beforeEach(async function () { + await fs.writeFile( + CSV_FILENAME, + '"user,email@test.com"\n","\n"user_special@test.co,"\n' + ) + + user = createUser('user,email@test.com', [ + { + email: 'user,email@test.com', + createdAt: new Date(), + reversedHostname: 'moc.tset', + }, + ]) + + await db.users.insertOne(user) + + const user2 = createUser('user<>@test.com', [ + { + email: 'user<>@test.com', + createdAt: new Date(), + reversedHostname: 'moc.tset', + }, + ]) + + await db.users.insertOne(user2) + }) + + afterEach(async function () { + await db.users.deleteMany({ + email: { + $in: [ + 'support+user_2cemail_40test.com@overleaf.com', + 'support+user_60_62_40test.com@overleaf.com', + ], + }, + }) + }) + + it('should correctly encode various special characters', async function () { + const r = await runScript(true) + + expect(r.stdout).to.include( + 'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com' + ) + expect(r.stdout).to.include( + ', -> support+_2c_60user_40test.com_62@overleaf.com' + ) + + const updatedUser1 = await db.users.findOne({ _id: user._id }) + expect(updatedUser1.email).to.equal( + 'support+user_2cemail_40test.com@overleaf.com' + ) + }) + }) + + describe('when user does not exist', function () { + beforeEach(async function () { + await fs.writeFile(CSV_FILENAME, '"nonexistent,email@test.com"\n') + }) + + it('should handle missing users gracefully', async function () { + const r = await runScript(true) + + expect(r.stdout).to.include( + 'User not found for email: nonexistent,email@test.com' + ) + expect(r.stdout).to.include('Updated users: 0') + }) + }) +})