Add script to remove emails with commas, taken from a CSV file (#25107)

* Add script to remove emails with commas and replace them emails to support with encoded former emails

* Enhance RemoveEmailsWithCommasScriptTest to verify unchanged user data

GitOrigin-RevId: 6961995f2a143ac1c53bc2eeb183808a4be7dd02
This commit is contained in:
Antoine Clausse
2025-04-30 11:39:21 +02:00
committed by Copybot
parent 73476180d4
commit c51d6f46d4
2 changed files with 350 additions and 0 deletions

View File

@@ -0,0 +1,124 @@
// @ts-check
import minimist from 'minimist'
import fs from 'node:fs/promises'
import * as csv from 'csv'
import { promisify } from 'node:util'
import UserAuditLogHandler from '../app/src/Features/User/UserAuditLogHandler.js'
import { db } from '../app/src/infrastructure/mongodb.js'
const CSV_FILENAME = '/tmp/emails-with-commas.csv'
/**
* @type {(csvString: string) => Promise<string[][]>}
*/
const parseAsync = promisify(csv.parse)
function usage() {
console.log('Usage: node remove_emails_with_commas.mjs')
console.log(`Read emails from ${CSV_FILENAME} and remove them from users.`)
console.log('Add support+<encoded_email>@overleaf.com instead.')
console.log('Options:')
console.log(' --commit apply the changes\n')
process.exit(0)
}
const { commit, help } = minimist(process.argv.slice(2), {
boolean: ['commit', 'help'],
alias: { help: 'h' },
default: { commit: false },
})
async function consumeCsvFileAndUpdate() {
console.time('remove_emails_with_commas')
const csvContent = await fs.readFile(CSV_FILENAME, 'utf8')
const rows = await parseAsync(csvContent)
const emailsWithComma = rows.map(row => row[0])
console.log('Total emails in the CSV:', emailsWithComma.length)
const unexpectedValidEmails = emailsWithComma.filter(
str => !str.includes(',')
)
if (unexpectedValidEmails.length > 0) {
throw new Error(
'CSV file contains unexpected valid emails: ' +
JSON.stringify(emailsWithComma)
)
}
let updatedUsersCount = 0
for (const oldEmail of emailsWithComma) {
const encodedEmail = oldEmail
.replaceAll('_', '_5f')
.replaceAll('@', '_40')
.replaceAll(',', '_2c')
.replaceAll('<', '_60')
.replaceAll('>', '_62')
const newEmail = `support+${encodedEmail}@overleaf.com`
console.log(oldEmail, '->', newEmail)
const user = await db.users.findOne({ email: oldEmail })
if (!user) {
console.log('User not found for email:', oldEmail)
continue
}
if (commit) {
await db.users.updateOne(
{ _id: user._id },
{
$set: { email: newEmail },
$pull: { emails: { email: oldEmail } },
}
)
await db.users.updateOne(
{ _id: user._id },
{
$addToSet: {
emails: {
email: newEmail,
createdAt: Date.now(),
reversedHostname: 'moc.faelrevo',
},
},
}
)
await UserAuditLogHandler.promises.addEntry(
user._id,
'remove-email',
undefined,
undefined,
{
removedEmail: oldEmail,
script: true,
note: 'remove primary email containing commas',
}
)
updatedUsersCount++
}
}
console.log('Updated users:', updatedUsersCount)
if (!commit) {
console.log('Note: this was a dry-run. No changes were made.')
}
console.log()
console.timeEnd('remove_emails_with_commas')
console.log()
}
try {
if (help) usage()
else await consumeCsvFileAndUpdate()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,226 @@
import { promisify } from 'node:util'
import { exec } from 'node:child_process'
import { expect } from 'chai'
import { filterOutput } from './helpers/settings.mjs'
import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js'
import fs from 'node:fs/promises'
const CSV_FILENAME = '/tmp/emails-with-commas.csv'
async function runScript(commit) {
const result = await promisify(exec)(
['node', 'scripts/remove_emails_with_commas.mjs', commit && '--commit']
.filter(Boolean)
.join(' ')
)
return {
...result,
stdout: result.stdout.split('\n').filter(filterOutput),
}
}
function createUser(email, emails) {
return {
_id: new ObjectId(),
email,
emails,
}
}
describe('scripts/remove_emails_with_commas', function () {
let user, unchangedUser
beforeEach(async function () {
await fs.writeFile(
CSV_FILENAME,
'"user,email@test.com"\n"user,another@test.com"\n'
)
})
afterEach(async function () {
try {
await fs.unlink(CSV_FILENAME)
} catch (err) {
// Ignore errors if file doesn't exist
}
})
describe('when removing email addresses with commas', function () {
beforeEach(async function () {
user = createUser('user,email@test.com', [
{
email: 'user,email@test.com',
createdAt: new Date(),
reversedHostname: 'moc.tset',
},
])
await db.users.insertOne(user)
unchangedUser = createUser('john.doe@example.com', [
{
email: 'john.doe@example.com',
createdAt: new Date(),
reversedHostname: 'moc.elpmaxe',
},
])
await db.users.insertOne(unchangedUser)
})
afterEach(async function () {
await db.users.deleteOne({ _id: user._id })
})
it('should replace emails with commas with encoded support emails', async function () {
const r = await runScript(true)
expect(r.stdout).to.include(
'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com'
)
expect(r.stdout).to.include('Updated users: 1')
const updatedUser = await db.users.findOne({ _id: user._id })
expect(updatedUser.email).to.equal(
'support+user_2cemail_40test.com@overleaf.com'
)
expect(updatedUser.emails).to.have.length(1)
expect(updatedUser.emails[0].email).to.equal(
'support+user_2cemail_40test.com@overleaf.com'
)
expect(updatedUser.emails[0].reversedHostname).to.equal('moc.faelrevo')
const unchanged = await db.users.findOne({ _id: unchangedUser._id })
expect(unchanged.emails).to.have.length(1)
expect(unchanged.email).to.equal('john.doe@example.com')
expect(unchanged.emails[0].email).to.equal('john.doe@example.com')
})
it('should not modify anything in dry run mode', async function () {
const r = await runScript(false)
expect(r.stdout).to.include(
'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com'
)
expect(r.stdout).to.include(
'Note: this was a dry-run. No changes were made.'
)
const updatedUser = await db.users.findOne({ _id: user._id })
expect(updatedUser.email).to.equal('user,email@test.com')
expect(updatedUser.emails).to.have.length(1)
expect(updatedUser.emails[0].email).to.equal('user,email@test.com')
})
})
describe('when handling multiple email replacements', function () {
beforeEach(async function () {
user = createUser('user,email@test.com', [
{
email: 'user,email@test.com',
createdAt: new Date(),
reversedHostname: 'moc.tset',
},
{
email: 'normal@test.com',
createdAt: new Date(),
reversedHostname: 'moc.tset',
},
])
await db.users.insertOne(user)
})
afterEach(async function () {
await db.users.deleteOne({ _id: user._id })
})
it('should only replace primary email with comma and keep other emails', async function () {
const r = await runScript(true)
expect(r.stdout).to.include(
'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com'
)
expect(r.stdout).to.include('Updated users: 1')
const updatedUser = await db.users.findOne({ _id: user._id })
expect(updatedUser.email).to.equal(
'support+user_2cemail_40test.com@overleaf.com'
)
expect(updatedUser.emails).to.have.length(2)
expect(updatedUser.emails[0].email).to.equal('normal@test.com')
expect(updatedUser.emails[1].email).to.equal(
'support+user_2cemail_40test.com@overleaf.com'
)
})
})
describe('when handling special characters in emails', function () {
beforeEach(async function () {
await fs.writeFile(
CSV_FILENAME,
'"user,email@test.com"\n",<user@test.com>"\n"user_special@test.co,"\n'
)
user = createUser('user,email@test.com', [
{
email: 'user,email@test.com',
createdAt: new Date(),
reversedHostname: 'moc.tset',
},
])
await db.users.insertOne(user)
const user2 = createUser('user<>@test.com', [
{
email: 'user<>@test.com',
createdAt: new Date(),
reversedHostname: 'moc.tset',
},
])
await db.users.insertOne(user2)
})
afterEach(async function () {
await db.users.deleteMany({
email: {
$in: [
'support+user_2cemail_40test.com@overleaf.com',
'support+user_60_62_40test.com@overleaf.com',
],
},
})
})
it('should correctly encode various special characters', async function () {
const r = await runScript(true)
expect(r.stdout).to.include(
'user,email@test.com -> support+user_2cemail_40test.com@overleaf.com'
)
expect(r.stdout).to.include(
',<user@test.com> -> support+_2c_60user_40test.com_62@overleaf.com'
)
const updatedUser1 = await db.users.findOne({ _id: user._id })
expect(updatedUser1.email).to.equal(
'support+user_2cemail_40test.com@overleaf.com'
)
})
})
describe('when user does not exist', function () {
beforeEach(async function () {
await fs.writeFile(CSV_FILENAME, '"nonexistent,email@test.com"\n')
})
it('should handle missing users gracefully', async function () {
const r = await runScript(true)
expect(r.stdout).to.include(
'User not found for email: nonexistent,email@test.com'
)
expect(r.stdout).to.include('Updated users: 0')
})
})
})