mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 17:19:37 +02:00
[web] Enrich subscription data with details for marketing GitOrigin-RevId: b2a07264e516ed4fac4643d0a64a8a1656c6fd13
129 lines
2.9 KiB
JavaScript
129 lines
2.9 KiB
JavaScript
import { scriptRunner } from './lib/ScriptRunner.mjs'
|
|
import fs from 'node:fs'
|
|
import readline from 'node:readline'
|
|
import minimist from 'minimist'
|
|
import {
|
|
db,
|
|
ObjectId,
|
|
READ_PREFERENCE_SECONDARY,
|
|
} from '../app/src/infrastructure/mongodb.mjs'
|
|
|
|
function usage() {
|
|
console.log(
|
|
`
|
|
This script extracts user emails given a list of newline separated IDs, outputs to /tmp/emails.txt
|
|
|
|
Usage:
|
|
- Locally:
|
|
docker compose exec web bash
|
|
node scripts/get_emails_by_ids.js [--inputPath=<path>] [--outputPath=<path>] [--batchSize=<number>]
|
|
- On the server:
|
|
rake run:pod[staging,web]
|
|
node scripts/get_emails_by_ids.js [--inputPath=<path>] [--outputPath=<path>] [--batchSize=<number>]
|
|
exit
|
|
kubectl cp web-standalone-prod-XXXXX:/tmp/emails.txt ~/emails.txt
|
|
|
|
Options:
|
|
--help Show this screen
|
|
|
|
--inputPath=<path> Input file path (default: ids.txt)
|
|
|
|
--outputPath=<path> Output file path (default: /tmp/emails.txt)
|
|
|
|
--batchSize=<number> Number of emails to be fetched in one query
|
|
`
|
|
)
|
|
}
|
|
|
|
function parseArgs() {
|
|
const argv = minimist(process.argv.slice(2), {
|
|
string: ['inputPath', 'outputPath'],
|
|
bool: ['help'],
|
|
number: ['batchSize'],
|
|
default: {
|
|
help: false,
|
|
inputPath: 'ids.txt',
|
|
outputPath: '/tmp/emails.txt',
|
|
batchSize: 1000,
|
|
},
|
|
})
|
|
|
|
if (argv.help) {
|
|
usage()
|
|
process.exit(0)
|
|
}
|
|
|
|
return argv
|
|
}
|
|
|
|
async function processBatch(idBatch, writeStream) {
|
|
try {
|
|
const cursor = db.users.find(
|
|
{
|
|
_id: { $in: idBatch },
|
|
},
|
|
{
|
|
projection: {
|
|
_id: 0,
|
|
email: 1,
|
|
},
|
|
readPreference: READ_PREFERENCE_SECONDARY,
|
|
}
|
|
)
|
|
|
|
for await (const doc of cursor) {
|
|
if (doc.email) {
|
|
writeStream.write(doc.email + '\n')
|
|
}
|
|
}
|
|
} catch (err) {
|
|
console.error('Error processing batch:', err)
|
|
}
|
|
}
|
|
|
|
async function main(trackProgress) {
|
|
const args = parseArgs()
|
|
|
|
const readStream = fs.createReadStream(args.inputPath)
|
|
const writeStream = fs.createWriteStream(args.outputPath)
|
|
const rl = readline.createInterface({
|
|
input: readStream,
|
|
crlfDelay: Infinity,
|
|
})
|
|
|
|
let idBatch = []
|
|
|
|
for await (const line of rl) {
|
|
const id = line.trim()
|
|
if (id) {
|
|
try {
|
|
idBatch.push(new ObjectId(id))
|
|
} catch (e) {
|
|
console.warn(`Skipping invalid ObjectId: ${id}`)
|
|
}
|
|
}
|
|
|
|
if (idBatch.length >= args.batchSize) {
|
|
await processBatch(idBatch, writeStream)
|
|
idBatch = []
|
|
}
|
|
}
|
|
|
|
if (idBatch.length > 0) {
|
|
await processBatch(idBatch, writeStream)
|
|
}
|
|
|
|
writeStream.end()
|
|
|
|
console.log(`✅ Success! Found emails written to ${args.outputPath}`)
|
|
await trackProgress('Job finished')
|
|
}
|
|
|
|
try {
|
|
await scriptRunner(main)
|
|
process.exit(0)
|
|
} catch (error) {
|
|
console.error(error)
|
|
process.exit(1)
|
|
}
|