Files
overleaf-cep/services/web/scripts/get_emails_by_ids.mjs
Olzhas Askar 6a93b6e76a Merge pull request #30093 from overleaf/oa-sub-marketing
[web] Enrich subscription data with details for marketing

GitOrigin-RevId: b2a07264e516ed4fac4643d0a64a8a1656c6fd13
2025-12-08 09:06:01 +00:00

129 lines
2.9 KiB
JavaScript

import { scriptRunner } from './lib/ScriptRunner.mjs'
import fs from 'node:fs'
import readline from 'node:readline'
import minimist from 'minimist'
import {
db,
ObjectId,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.mjs'
function usage() {
console.log(
`
This script extracts user emails given a list of newline separated IDs, outputs to /tmp/emails.txt
Usage:
- Locally:
docker compose exec web bash
node scripts/get_emails_by_ids.js [--inputPath=<path>] [--outputPath=<path>] [--batchSize=<number>]
- On the server:
rake run:pod[staging,web]
node scripts/get_emails_by_ids.js [--inputPath=<path>] [--outputPath=<path>] [--batchSize=<number>]
exit
kubectl cp web-standalone-prod-XXXXX:/tmp/emails.txt ~/emails.txt
Options:
--help Show this screen
--inputPath=<path> Input file path (default: ids.txt)
--outputPath=<path> Output file path (default: /tmp/emails.txt)
--batchSize=<number> Number of emails to be fetched in one query
`
)
}
function parseArgs() {
const argv = minimist(process.argv.slice(2), {
string: ['inputPath', 'outputPath'],
bool: ['help'],
number: ['batchSize'],
default: {
help: false,
inputPath: 'ids.txt',
outputPath: '/tmp/emails.txt',
batchSize: 1000,
},
})
if (argv.help) {
usage()
process.exit(0)
}
return argv
}
async function processBatch(idBatch, writeStream) {
try {
const cursor = db.users.find(
{
_id: { $in: idBatch },
},
{
projection: {
_id: 0,
email: 1,
},
readPreference: READ_PREFERENCE_SECONDARY,
}
)
for await (const doc of cursor) {
if (doc.email) {
writeStream.write(doc.email + '\n')
}
}
} catch (err) {
console.error('Error processing batch:', err)
}
}
async function main(trackProgress) {
const args = parseArgs()
const readStream = fs.createReadStream(args.inputPath)
const writeStream = fs.createWriteStream(args.outputPath)
const rl = readline.createInterface({
input: readStream,
crlfDelay: Infinity,
})
let idBatch = []
for await (const line of rl) {
const id = line.trim()
if (id) {
try {
idBatch.push(new ObjectId(id))
} catch (e) {
console.warn(`Skipping invalid ObjectId: ${id}`)
}
}
if (idBatch.length >= args.batchSize) {
await processBatch(idBatch, writeStream)
idBatch = []
}
}
if (idBatch.length > 0) {
await processBatch(idBatch, writeStream)
}
writeStream.end()
console.log(`✅ Success! Found emails written to ${args.outputPath}`)
await trackProgress('Job finished')
}
try {
await scriptRunner(main)
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}