mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-25 02:00:10 +02:00
Merge pull request #24102 from overleaf/bg-backup-stats-script
add backup sampling script GitOrigin-RevId: cfd546d421cdcfb9f8ad2111703a437a7272bd54
This commit is contained in:
169
services/history-v1/storage/scripts/backup_sample.mjs
Normal file
169
services/history-v1/storage/scripts/backup_sample.mjs
Normal file
@@ -0,0 +1,169 @@
|
||||
// @ts-check
|
||||
import { ObjectId } from 'mongodb'
|
||||
import { READ_PREFERENCE_SECONDARY } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
import { db, client } from '../lib/mongodb.js'
|
||||
|
||||
const projectsCollection = db.collection('projects')
|
||||
|
||||
// Enable caching for ObjectId.toString()
|
||||
ObjectId.cacheHexString = true
|
||||
|
||||
// Configuration
|
||||
const SAMPLE_SIZE_PER_ITERATION = process.argv[2]
|
||||
? parseInt(process.argv[2], 10)
|
||||
: 10000
|
||||
const TARGET_ERROR_PERCENTAGE = process.argv[3]
|
||||
? parseFloat(process.argv[3])
|
||||
: 5.0
|
||||
|
||||
let gracefulShutdownInitiated = false
|
||||
|
||||
process.on('SIGINT', handleSignal)
|
||||
process.on('SIGTERM', handleSignal)
|
||||
|
||||
function handleSignal() {
|
||||
gracefulShutdownInitiated = true
|
||||
console.warn('graceful shutdown initiated')
|
||||
}
|
||||
|
||||
async function takeSample(sampleSize) {
|
||||
const results = await projectsCollection
|
||||
.aggregate(
|
||||
[
|
||||
{ $sample: { size: sampleSize } },
|
||||
{
|
||||
$project: {
|
||||
_id: 0,
|
||||
hasBackup: {
|
||||
$ifNull: ['$overleaf.backup.lastBackedUpVersion', false],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
$group: {
|
||||
_id: null,
|
||||
totalSampled: { $sum: 1 },
|
||||
backedUp: {
|
||||
$sum: {
|
||||
$cond: ['$hasBackup', 1, 0],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
{ readPreference: READ_PREFERENCE_SECONDARY }
|
||||
)
|
||||
.toArray()
|
||||
|
||||
if (results.length === 0) {
|
||||
return { totalSampled: 0, backedUp: 0 }
|
||||
}
|
||||
|
||||
return results[0]
|
||||
}
|
||||
|
||||
function calculateStatistics(
|
||||
cumulativeSampled,
|
||||
cumulativeBackedUp,
|
||||
totalPopulation
|
||||
) {
|
||||
const proportion = cumulativeBackedUp / cumulativeSampled
|
||||
|
||||
// Standard error with finite population correction
|
||||
const fpc = Math.sqrt(
|
||||
(totalPopulation - cumulativeSampled) / (totalPopulation - 1)
|
||||
)
|
||||
const stdError =
|
||||
Math.sqrt((proportion * (1 - proportion)) / cumulativeSampled) * fpc
|
||||
|
||||
// 95% confidence interval is approximately ±1.96 standard errors
|
||||
const marginOfError = 1.96 * stdError
|
||||
|
||||
return {
|
||||
proportion,
|
||||
percentage: (proportion * 100).toFixed(2),
|
||||
marginOfError,
|
||||
errorPercentage: (marginOfError * 100).toFixed(2),
|
||||
lowerBound: ((proportion - marginOfError) * 100).toFixed(2),
|
||||
upperBound: ((proportion + marginOfError) * 100).toFixed(2),
|
||||
sampleSize: cumulativeSampled,
|
||||
populationSize: totalPopulation,
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('Date:', new Date().toISOString())
|
||||
const totalCount = await projectsCollection.estimatedDocumentCount({
|
||||
readPreference: READ_PREFERENCE_SECONDARY,
|
||||
})
|
||||
console.log(
|
||||
`Total projects in collection (estimated): ${totalCount.toLocaleString()}`
|
||||
)
|
||||
console.log(`Target margin of error: ${TARGET_ERROR_PERCENTAGE}%`)
|
||||
|
||||
let cumulativeSampled = 0
|
||||
let cumulativeBackedUp = 0
|
||||
let currentError = Infinity
|
||||
let iteration = 0
|
||||
|
||||
console.log('Iteration | Total Sampled | % Backed Up | Margin of Error')
|
||||
console.log('----------|---------------|-------------|----------------')
|
||||
|
||||
while (currentError > TARGET_ERROR_PERCENTAGE) {
|
||||
if (gracefulShutdownInitiated) {
|
||||
console.log('Graceful shutdown initiated. Exiting sampling loop.')
|
||||
break
|
||||
}
|
||||
|
||||
iteration++
|
||||
const { totalSampled, backedUp } = await takeSample(
|
||||
SAMPLE_SIZE_PER_ITERATION
|
||||
)
|
||||
cumulativeSampled += totalSampled
|
||||
cumulativeBackedUp += backedUp
|
||||
|
||||
const stats = calculateStatistics(
|
||||
cumulativeSampled,
|
||||
cumulativeBackedUp,
|
||||
totalCount
|
||||
)
|
||||
currentError = parseFloat(stats.errorPercentage)
|
||||
|
||||
console.log(
|
||||
`${iteration.toString().padStart(9)} | ` +
|
||||
`${cumulativeSampled.toString().padStart(13)} | ` +
|
||||
`${stats.percentage.padStart(10)}% | ` +
|
||||
`\u00B1${stats.errorPercentage}%`
|
||||
)
|
||||
|
||||
// Small delay between iterations
|
||||
await new Promise(resolve => setTimeout(resolve, 100))
|
||||
}
|
||||
|
||||
const finalStats = calculateStatistics(
|
||||
cumulativeSampled,
|
||||
cumulativeBackedUp,
|
||||
totalCount
|
||||
)
|
||||
|
||||
console.log(
|
||||
`Projects sampled: ${cumulativeSampled.toLocaleString()} out of ${totalCount.toLocaleString()}`
|
||||
)
|
||||
console.log(
|
||||
`Estimated percentage with lastBackedUpVersion: ${finalStats.percentage}%`
|
||||
)
|
||||
console.log(
|
||||
`95% Confidence Interval: ${finalStats.lowerBound}% - ${finalStats.upperBound}%`
|
||||
)
|
||||
console.log(`Final Margin of Error: \u00B1${finalStats.errorPercentage}%`)
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => console.log('Done.'))
|
||||
.catch(err => {
|
||||
console.error('Error:', err)
|
||||
process.exitCode = 1
|
||||
})
|
||||
.finally(() => {
|
||||
client.close().catch(err => console.error('Error closing MongoDB:', err))
|
||||
})
|
||||
Reference in New Issue
Block a user