mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-11 23:20:47 +02:00
Create script: remove_unconfirmed_emails.mjs (#23079)
* Create script: remove_unconfirmed_emails.mjs * Update script remove_unconfirmed_emails.mjs after pairing Co-authored-by: Rebeka <rebeka.dekany@overleaf.com> * Add user counts. Add timing * Revert previous changes: just remove the fully unconfirmed emails * Add tests on scripts/remove_unconfirmed_emails.mjs * Update audit log so `removedEmail` is a string and doesn't break the admin-panel audit logs list * Update remove_unconfirmed_emails to have a `--generate` and a `--consume` mode * Update tests on remove_unconfirmed_emails * Add tests checking that `--consume` doesn't delete any email * Update script so `--consume` checks that emails shouldn't be deleted again! * Update CSV path to `/tmp/...` * Add test cases: deleted users, deleted email, comma in email --------- Co-authored-by: Rebeka <rebeka.dekany@overleaf.com> GitOrigin-RevId: 8c60b56bcdfa33bc6143d66c32a5f430fb76f6d7
This commit is contained in:
@@ -2,10 +2,11 @@ const OError = require('@overleaf/o-error')
|
||||
const { UserAuditLogEntry } = require('../../models/UserAuditLogEntry')
|
||||
const { callbackify } = require('util')
|
||||
|
||||
function _canHaveNoIpAddressId(operation) {
|
||||
function _canHaveNoIpAddressId(operation, info) {
|
||||
if (operation === 'join-group-subscription') return true
|
||||
if (operation === 'leave-group-subscription') return true
|
||||
if (operation === 'must-reset-password-set') return true
|
||||
if (operation === 'remove-email' && info.script) return true
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,262 @@
|
||||
// @ts-check
|
||||
|
||||
import minimist from 'minimist'
|
||||
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
|
||||
import UserAuditLogHandler from '../app/src/Features/User/UserAuditLogHandler.js'
|
||||
import fs from 'node:fs/promises'
|
||||
import * as csv from 'csv'
|
||||
import { promisify } from 'node:util'
|
||||
import _ from 'lodash'
|
||||
|
||||
const CSV_FILENAME = '/tmp/remove_unconfirmed_emails.csv'
|
||||
/**
|
||||
* @type {(records: string[][]) => Promise<string>}
|
||||
*/
|
||||
const stringifyAsync = promisify(csv.stringify)
|
||||
/**
|
||||
* @type {(csvString: string) => Promise<string[][]>}
|
||||
*/
|
||||
const parseAsync = promisify(csv.parse)
|
||||
|
||||
function usage() {
|
||||
console.log('Usage: node remove_unconfirmed_emails.mjs')
|
||||
console.log('Removes unconfirmed emails from users')
|
||||
console.log('Options:')
|
||||
console.log(
|
||||
'' +
|
||||
' --generate generate the CSV file (remove_unconfirmed_emails.csv) containing the emails to remove\n' +
|
||||
' --consume consume the CSV file (remove_unconfirmed_emails.csv) and remove the emails (by default it is a dry-run)\n' +
|
||||
' --commit apply the changes (to be used with --consume)\n'
|
||||
)
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const { generate, consume, commit, help } = minimist(process.argv.slice(2), {
|
||||
boolean: ['generate', 'consume', 'commit', 'help'],
|
||||
alias: { help: 'h' },
|
||||
default: { generate: false, consume: false, commit: false },
|
||||
})
|
||||
|
||||
async function generateCsvFile() {
|
||||
console.time('generate_csv')
|
||||
|
||||
let processedUsersCount = 0
|
||||
let skippedUnconfirmedPrimaries = 0
|
||||
let totalEmailsToRemove = 0
|
||||
let totalUsersInCsv = 0
|
||||
|
||||
const records = [['User ID', 'Email', 'Sign Up Date']]
|
||||
|
||||
await batchedUpdate(
|
||||
db.users,
|
||||
{
|
||||
$and: [
|
||||
{ emails: { $exists: true } },
|
||||
{ emails: { $not: { $size: 0 } } },
|
||||
// Warning: this also matches unconfirmed primary emails
|
||||
{
|
||||
emails: {
|
||||
$elemMatch: {
|
||||
$or: [{ confirmedAt: { $exists: false } }, { confirmedAt: null }],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
async users => {
|
||||
console.log('Process', users.length, 'users')
|
||||
processedUsersCount += users.length
|
||||
|
||||
for (const user of users) {
|
||||
const unconfirmedSecondaries = user.emails.filter(
|
||||
email => !email.confirmedAt && email.email !== user.email
|
||||
)
|
||||
|
||||
if (unconfirmedSecondaries.length === 0) {
|
||||
// Users can have been selected because of their unconfirmed primary email
|
||||
// we don't want to remove those
|
||||
skippedUnconfirmedPrimaries++
|
||||
continue
|
||||
}
|
||||
|
||||
for (const email of unconfirmedSecondaries) {
|
||||
records.push([
|
||||
user._id.toString(),
|
||||
email.email,
|
||||
user.signUpDate.toISOString(),
|
||||
])
|
||||
}
|
||||
|
||||
totalUsersInCsv++
|
||||
totalEmailsToRemove += unconfirmedSecondaries.length
|
||||
}
|
||||
},
|
||||
{ _id: 1, signUpDate: 1, emails: 1, email: 1 }
|
||||
)
|
||||
|
||||
const csvContent = await stringifyAsync(records)
|
||||
await fs.writeFile(CSV_FILENAME, csvContent)
|
||||
|
||||
console.log()
|
||||
console.log('Processed users:', processedUsersCount)
|
||||
console.log()
|
||||
console.log('Generated CSV file:', CSV_FILENAME)
|
||||
console.log('Total emails in the CSV:', totalEmailsToRemove)
|
||||
console.log('Total users in the CSV:', totalUsersInCsv)
|
||||
console.log(
|
||||
'Unconfirmed primary emails (skipped):',
|
||||
skippedUnconfirmedPrimaries
|
||||
)
|
||||
console.log()
|
||||
console.timeEnd('generate_csv')
|
||||
console.log()
|
||||
}
|
||||
|
||||
async function consumeCsvFile() {
|
||||
console.time('consume_csv')
|
||||
|
||||
const csvContent = await fs.readFile(CSV_FILENAME, 'utf8')
|
||||
const rows = await parseAsync(csvContent)
|
||||
rows.shift() // Remove header row
|
||||
const emailsByUserId = {}
|
||||
|
||||
for (const [userId, email] of rows) {
|
||||
if (!emailsByUserId[userId]) {
|
||||
emailsByUserId[userId] = []
|
||||
}
|
||||
emailsByUserId[userId].push(email)
|
||||
}
|
||||
|
||||
const userIds = Object.keys(emailsByUserId)
|
||||
let processedUsersCount = 0
|
||||
let removedEmailsCount = 0
|
||||
let totalModifiedUsersCount = 0
|
||||
const skippedEmail = {
|
||||
userNotFound: 0,
|
||||
nowConfirmed: 0,
|
||||
nowPrimary: 0,
|
||||
nowRemoved: 0,
|
||||
}
|
||||
|
||||
console.log('Total emails in the CSV:', rows.length)
|
||||
console.log('Total users in the CSV:', userIds.length)
|
||||
|
||||
for (const userId of userIds) {
|
||||
const emailsToRemove = emailsByUserId[userId]
|
||||
|
||||
const user = await db.users.findOne({ _id: new ObjectId(userId) })
|
||||
if (!user) {
|
||||
skippedEmail.userNotFound += emailsToRemove.length
|
||||
continue
|
||||
}
|
||||
|
||||
const emailsToRemoveNow = emailsToRemove.filter(email => {
|
||||
const currentEmail = user.emails.find(e => e.email === email)
|
||||
if (!currentEmail) {
|
||||
skippedEmail.nowRemoved++
|
||||
return false
|
||||
}
|
||||
if (currentEmail.confirmedAt) {
|
||||
skippedEmail.nowConfirmed++
|
||||
return false
|
||||
}
|
||||
if (currentEmail.email === user.email) {
|
||||
skippedEmail.nowPrimary++
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
removedEmailsCount += emailsToRemoveNow.length
|
||||
|
||||
if (commit && emailsToRemoveNow.length > 0) {
|
||||
for (const email of emailsToRemove) {
|
||||
await UserAuditLogHandler.promises.addEntry(
|
||||
userId,
|
||||
'remove-email',
|
||||
undefined,
|
||||
undefined,
|
||||
{
|
||||
removedEmail: email,
|
||||
script: true,
|
||||
note: 'remove unconfirmed secondary emails',
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
const updated = await db.users.updateOne(
|
||||
{ _id: new ObjectId(userId) },
|
||||
{ $pull: { emails: { email: { $in: emailsToRemove } } } }
|
||||
)
|
||||
totalModifiedUsersCount += updated.modifiedCount
|
||||
}
|
||||
|
||||
processedUsersCount++
|
||||
if (processedUsersCount % 100 === 0) {
|
||||
console.log('Processed', processedUsersCount, 'users')
|
||||
}
|
||||
}
|
||||
|
||||
console.log()
|
||||
if (!commit) {
|
||||
console.log('Dry-run, use --commit to apply changes')
|
||||
console.log('This would be the result:')
|
||||
console.log()
|
||||
}
|
||||
|
||||
console.log('Total emails in the CSV:', rows.length)
|
||||
console.log('Total users in the CSV:', userIds.length)
|
||||
console.log('Total users processed:', processedUsersCount)
|
||||
console.log('Total emails removed:', removedEmailsCount)
|
||||
console.log('Skipped emails:', _.sum(Object.values(skippedEmail)))
|
||||
console.log(' - User not found:', skippedEmail.userNotFound)
|
||||
console.log(' - Email now confirmed:', skippedEmail.nowConfirmed)
|
||||
console.log(' - Email now primary:', skippedEmail.nowPrimary)
|
||||
console.log(' - Email now removed:', skippedEmail.nowRemoved)
|
||||
console.log()
|
||||
|
||||
if (commit) {
|
||||
console.log('Total users modified:', totalModifiedUsersCount)
|
||||
} else {
|
||||
console.log('Note: this was a dry-run. No changes were made.')
|
||||
}
|
||||
console.log()
|
||||
console.timeEnd('consume_csv')
|
||||
console.log()
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (help) {
|
||||
return usage()
|
||||
}
|
||||
|
||||
if (!generate && !consume) {
|
||||
console.error('Error: Either --generate or --consume must be specified')
|
||||
return usage()
|
||||
}
|
||||
|
||||
if (generate && consume) {
|
||||
console.error('Error: Cannot use both --generate and --consume together')
|
||||
return usage()
|
||||
}
|
||||
|
||||
if (commit && !consume) {
|
||||
console.error('Error: --commit can only be used with --consume')
|
||||
return usage()
|
||||
}
|
||||
|
||||
if (generate) {
|
||||
await generateCsvFile()
|
||||
} else if (consume) {
|
||||
await consumeCsvFile()
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await main()
|
||||
process.exit(0)
|
||||
} catch (error) {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
}
|
||||
@@ -0,0 +1,290 @@
|
||||
import { promisify } from 'node:util'
|
||||
import { exec } from 'node:child_process'
|
||||
import { expect } from 'chai'
|
||||
import { filterOutput } from './helpers/settings.mjs'
|
||||
import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js'
|
||||
import fs from 'node:fs/promises'
|
||||
|
||||
const CSV_FILENAME = '/tmp/remove_unconfirmed_emails.csv'
|
||||
|
||||
async function runScript(mode, commit) {
|
||||
const result = await promisify(exec)(
|
||||
[
|
||||
'node',
|
||||
'scripts/remove_unconfirmed_emails.mjs',
|
||||
mode === 'generate' ? '--generate' : '--consume',
|
||||
commit && '--commit',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
)
|
||||
return {
|
||||
...result,
|
||||
stdout: result.stdout.split('\n').filter(filterOutput),
|
||||
}
|
||||
}
|
||||
|
||||
function createUser(signUpDate, emails, userIdx) {
|
||||
const email = `primary${userIdx ?? ''}@overleaf.com`
|
||||
return {
|
||||
_id: new ObjectId(),
|
||||
email,
|
||||
emails,
|
||||
signUpDate,
|
||||
}
|
||||
}
|
||||
|
||||
describe('scripts/remove_unconfirmed_emails', function () {
|
||||
let user
|
||||
|
||||
afterEach(async function () {
|
||||
try {
|
||||
await fs.unlink(CSV_FILENAME)
|
||||
} catch (err) {
|
||||
// Ignore errors if file doesn't exist
|
||||
}
|
||||
})
|
||||
|
||||
describe('when removing unconfirmed secondary emails', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com', confirmedAt: new Date() },
|
||||
{ email: 'unconfirmed1@overleaf.com' },
|
||||
{ email: 'unconfirmed-special-,\'"@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should remove all unconfirmed secondary emails', async function () {
|
||||
await runScript('generate')
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 2')
|
||||
expect(r.stdout).to.include('Total users processed: 1')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(1)
|
||||
expect(updatedUser.emails[0].email).to.equal(user.email)
|
||||
})
|
||||
|
||||
it('should not modify anything in dry run mode', async function () {
|
||||
await runScript('generate')
|
||||
const r = await runScript('consume', false)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 2')
|
||||
expect(r.stdout).to.include('Total users processed: 1')
|
||||
expect(r.stdout).to.include(
|
||||
'Note: this was a dry-run. No changes were made.'
|
||||
)
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(3)
|
||||
})
|
||||
})
|
||||
|
||||
describe('when handling confirmed secondary emails', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com', confirmedAt: new Date() },
|
||||
{ email: 'confirmed@overleaf.com', confirmedAt: new Date() },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should preserve confirmed secondary emails', async function () {
|
||||
await runScript('generate')
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 0')
|
||||
expect(r.stdout).to.include('Total users processed: 0')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(2)
|
||||
expect(updatedUser.emails[1].confirmedAt).to.exist
|
||||
})
|
||||
})
|
||||
|
||||
describe('when handling unconfirmed primary emails', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should not remove unconfirmed primary emails', async function () {
|
||||
await runScript('generate')
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 0')
|
||||
expect(r.stdout).to.include('Total users processed: 0')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(1)
|
||||
expect(updatedUser.emails[0].email).to.equal('primary@overleaf.com')
|
||||
})
|
||||
})
|
||||
|
||||
describe('when users confirmed their email in between', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com' },
|
||||
{ email: 'secondary@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should not remove emails from users who confirmed their email in between', async function () {
|
||||
await runScript('generate')
|
||||
|
||||
await db.users.updateOne(
|
||||
{ _id: user._id },
|
||||
{ $set: { 'emails.1.confirmedAt': new Date() } }
|
||||
)
|
||||
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 1')
|
||||
expect(r.stdout).to.include('Skipped emails: 1')
|
||||
expect(r.stdout).to.include(' - Email now confirmed: 1')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(2)
|
||||
})
|
||||
})
|
||||
|
||||
describe('when users changed their primary email in between', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com' },
|
||||
{ email: 'secondary@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should not remove emails from users who changed their primary email in between', async function () {
|
||||
await runScript('generate')
|
||||
|
||||
await db.users.updateOne(
|
||||
{ _id: user._id },
|
||||
{ $set: { email: 'secondary@overleaf.com' } }
|
||||
)
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 1')
|
||||
expect(r.stdout).to.include('Skipped emails: 1')
|
||||
expect(r.stdout).to.include(' - Email now primary: 1')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(2)
|
||||
})
|
||||
})
|
||||
|
||||
describe('when users account was deleted in between', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com' },
|
||||
{ email: 'secondary@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should skip emails from users whose account was deleted', async function () {
|
||||
await runScript('generate')
|
||||
|
||||
// Delete the user
|
||||
await db.users.deleteOne({ _id: user._id })
|
||||
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 1')
|
||||
expect(r.stdout).to.include('Skipped emails: 1')
|
||||
expect(r.stdout).to.include(' - User not found: 1')
|
||||
})
|
||||
})
|
||||
|
||||
describe('when users email was deleted in between', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com' },
|
||||
{ email: 'secondary@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should skip emails that were already removed', async function () {
|
||||
await runScript('generate')
|
||||
|
||||
// Remove the secondary email
|
||||
await db.users.updateOne(
|
||||
{ _id: user._id },
|
||||
{ $pull: { emails: { email: 'secondary@overleaf.com' } } }
|
||||
)
|
||||
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 1')
|
||||
expect(r.stdout).to.include('Skipped emails: 1')
|
||||
expect(r.stdout).to.include(' - Email now removed: 1')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(1)
|
||||
expect(updatedUser.emails[0].email).to.equal('primary@overleaf.com')
|
||||
})
|
||||
})
|
||||
|
||||
describe('when handling confirmation field edge cases', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com', confirmedAt: new Date() },
|
||||
{ email: 'secondary1@overleaf.com', confirmedAt: null },
|
||||
{ email: 'secondary2@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should remove emails with both missing and null confirmedAt', async function () {
|
||||
await runScript('generate')
|
||||
const r = await runScript('consume', true)
|
||||
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 2')
|
||||
expect(r.stdout).to.include('Total users processed: 1')
|
||||
|
||||
const updatedUser = await db.users.findOne({ _id: user._id })
|
||||
expect(updatedUser.emails).to.have.length(1)
|
||||
expect(updatedUser.emails[0].email).to.equal(user.email)
|
||||
})
|
||||
})
|
||||
|
||||
describe('CSV file generation', function () {
|
||||
beforeEach(async function () {
|
||||
user = createUser(new Date('2000-01-01'), [
|
||||
{ email: 'primary@overleaf.com', confirmedAt: new Date() },
|
||||
{ email: 'unconfirmed1@overleaf.com' },
|
||||
{ email: 'confirmed1@overleaf.com', confirmedAt: new Date() },
|
||||
{ email: 'unconfirmed2@overleaf.com' },
|
||||
{ email: '!,@overleaf.com' },
|
||||
{ email: "!'@overleaf.com" },
|
||||
{ email: '!,\'"@overleaf.com' },
|
||||
])
|
||||
await db.users.insertOne(user)
|
||||
})
|
||||
|
||||
it('should generate a valid CSV file', async function () {
|
||||
const r = await runScript('generate')
|
||||
|
||||
expect(r.stdout).to.include(
|
||||
'Generated CSV file: /tmp/remove_unconfirmed_emails.csv'
|
||||
)
|
||||
expect(r.stdout).to.include('Total emails in the CSV: 5')
|
||||
const csvContent = await fs.readFile(CSV_FILENAME, 'utf8')
|
||||
expect(csvContent).to.equal(`User ID,Email,Sign Up Date
|
||||
${user._id},unconfirmed1@overleaf.com,2000-01-01T00:00:00.000Z
|
||||
${user._id},unconfirmed2@overleaf.com,2000-01-01T00:00:00.000Z
|
||||
${user._id},"!,@overleaf.com",2000-01-01T00:00:00.000Z
|
||||
${user._id},!'@overleaf.com,2000-01-01T00:00:00.000Z
|
||||
${user._id},"!,'""@overleaf.com",2000-01-01T00:00:00.000Z
|
||||
`)
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user