Merge pull request #27257 from overleaf/bg-filestore-migration-usability

filestore migration usability changes

GitOrigin-RevId: 47e8c8434c35b1b16c41700dfef11ce4602a3063
This commit is contained in:
Brian Gough
2025-07-23 12:04:01 +01:00
committed by Copybot
parent 1b4cbd4efb
commit b8e2a8bfdb
3 changed files with 399 additions and 71 deletions

View File

@@ -11,7 +11,7 @@ const LoggingManager = {
/**
* @param {string} name - The name of the logger
*/
initialize(name) {
initialize(name, options = {}) {
this.isProduction =
(process.env.NODE_ENV || '').toLowerCase() === 'production'
const isTest = (process.env.NODE_ENV || '').toLowerCase() === 'test'
@@ -27,7 +27,7 @@ const LoggingManager = {
req: Serializers.req,
res: Serializers.res,
},
streams: [this._getOutputStreamConfig()],
streams: options.streams ?? [this._getOutputStreamConfig()],
})
this._setupRingBuffer()
this._setupLogLevelChecker()

View File

@@ -79,16 +79,30 @@ ObjectId.cacheHexString = true
*/
/**
* @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean}}
* @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, PROCESS_BLOBS: boolean, DRY_RUN: boolean, OUTPUT_FILE: string, DISPLAY_REPORT: boolean}}
*/
function parseArgs() {
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
const DEFAULT_OUTPUT_FILE = `file-migration-${new Date()
.toISOString()
.replace(/[:.]/g, '_')}.log`
const args = commandLineArgs([
{ name: 'processNonDeletedProjects', type: String, defaultValue: 'false' },
{ name: 'processDeletedProjects', type: String, defaultValue: 'false' },
{ name: 'processHashedFiles', type: String, defaultValue: 'false' },
{ name: 'processBlobs', type: String, defaultValue: 'true' },
{ name: 'projectIdsFrom', type: String, defaultValue: '' },
{ name: 'help', alias: 'h', type: Boolean },
{ name: 'all', alias: 'a', type: Boolean },
{ name: 'projects', type: Boolean },
{ name: 'deleted-projects', type: Boolean },
{ name: 'skip-hashed-files', type: Boolean },
{ name: 'skip-existing-blobs', type: Boolean },
{ name: 'from-file', type: String, defaultValue: '' },
{ name: 'dry-run', alias: 'n', type: Boolean },
{
name: 'output',
alias: 'o',
type: String,
defaultValue: DEFAULT_OUTPUT_FILE,
},
{ name: 'report', type: Boolean },
{
name: 'BATCH_RANGE_START',
type: String,
@@ -99,31 +113,107 @@ function parseArgs() {
type: String,
defaultValue: new Date().toISOString(),
},
{ name: 'LOGGING_IDENTIFIER', type: String, defaultValue: '' },
{ name: 'logging-id', type: String, defaultValue: '' },
])
/**
* commandLineArgs cannot handle --foo=false, so go the long way
* @param {string} name
* @return {boolean}
*/
function boolVal(name) {
const v = args[name]
if (['true', 'false'].includes(v)) return v === 'true'
throw new Error(`expected "true" or "false" for boolean option ${name}`)
// If no arguments are provided, display a usage message
if (process.argv.length <= 2) {
console.error(
'Usage: node back_fill_file_hash.mjs --all | --projects | --deleted-projects'
)
process.exit(1)
}
const BATCH_RANGE_START = objectIdFromInput(
args['BATCH_RANGE_START']
).toString()
const BATCH_RANGE_END = objectIdFromInput(args['BATCH_RANGE_END']).toString()
// If --help is provided, display the help message
if (args.help) {
console.log(`Usage: node back_fill_file_hash.mjs [options]
Project selection options:
--all, -a Process all projects, including deleted ones
--projects Process projects (excluding deleted ones)
--deleted-projects Process deleted projects
--from-file <file> Process selected projects ids from file
File selection options:
--skip-hashed-files Skip processing files that already have a hash
--skip-existing-blobs Skip processing files already in the blob store
Logging options:
--output <file>, -o <file> Output log to the specified file
(default: file-migration-<timestamp>.log)
--logging-id <id> Identifier for logging
(default: BATCH_RANGE_START)
Batch range options:
--BATCH_RANGE_START <date> Start date for processing
(default: ${args.BATCH_RANGE_START})
--BATCH_RANGE_END <date> End date for processing
(default: ${args.BATCH_RANGE_END})
Other options:
--report Display a report of the current status
--dry-run, -n Perform a dry run without making changes
--help, -h Show this help message
Typical usage:
node back_fill_file_hash.mjs --all
is equivalent to
node back_fill_file_hash.mjs --projects --deleted-projects
`)
process.exit(0)
}
// Require at least one of --projects, --deleted-projects and --all or --report
if (
!args.projects &&
!args['deleted-projects'] &&
!args.all &&
!args.report
) {
console.error(
'Must specify at least one of --projects and --deleted-projects, --all or --report'
)
process.exit(1)
}
// Forbid --all with --projects or --deleted-projects
if (args.all && (args.projects || args['deleted-projects'])) {
console.error('Cannot use --all with --projects or --deleted-projects')
process.exit(1)
}
// Forbid --all, --projects, --deleted-projects with --report
if (args.report && (args.all || args.projects || args['deleted-projects'])) {
console.error(
'Cannot use --report with --all, --projects or --deleted-projects'
)
process.exit(1)
}
// The --all option processes all projects, including deleted ones
// and checks existing hashed files are present in the blob store.
if (args.all) {
args.projects = true
args['deleted-projects'] = true
}
const BATCH_RANGE_START = objectIdFromInput(args.BATCH_RANGE_START).toString()
const BATCH_RANGE_END = objectIdFromInput(args.BATCH_RANGE_END).toString()
return {
PROCESS_NON_DELETED_PROJECTS: boolVal('processNonDeletedProjects'),
PROCESS_DELETED_PROJECTS: boolVal('processDeletedProjects'),
PROCESS_BLOBS: boolVal('processBlobs'),
PROCESS_HASHED_FILES: boolVal('processHashedFiles'),
PROCESS_NON_DELETED_PROJECTS: args.projects,
PROCESS_DELETED_PROJECTS: args['deleted-projects'],
PROCESS_HASHED_FILES: !args['skip-hashed-files'],
PROCESS_BLOBS: !args['skip-existing-blobs'],
DRY_RUN: args['dry-run'],
OUTPUT_FILE: args.output,
BATCH_RANGE_START,
BATCH_RANGE_END,
LOGGING_IDENTIFIER: args['LOGGING_IDENTIFIER'] || BATCH_RANGE_START,
PROJECT_IDS_FROM: args['projectIdsFrom'],
LOGGING_IDENTIFIER: args['logging-id'] || BATCH_RANGE_START,
PROJECT_IDS_FROM: args['from-file'],
DISPLAY_REPORT: args.report,
}
}
@@ -132,10 +222,13 @@ const {
PROCESS_DELETED_PROJECTS,
PROCESS_BLOBS,
PROCESS_HASHED_FILES,
DRY_RUN,
OUTPUT_FILE,
BATCH_RANGE_START,
BATCH_RANGE_END,
LOGGING_IDENTIFIER,
PROJECT_IDS_FROM,
DISPLAY_REPORT,
} = parseArgs()
// We need to handle the start and end differently as ids of deleted projects are created at time of deletion.
@@ -162,6 +255,143 @@ const STREAM_HIGH_WATER_MARK = parseInt(
const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10)
const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10)
// Log output to a file
logger.initialize('file-migration', {
streams: [
{
stream:
OUTPUT_FILE === '-'
? process.stdout
: fs.createWriteStream(OUTPUT_FILE, { flags: 'a' }),
},
],
})
let lastElapsedTime = 0
async function displayProgress(options = {}) {
if (OUTPUT_FILE === '-') {
return // skip progress tracking when logging to stdout
}
if (options.completedAll) {
process.stdout.write('\n')
return
}
const elapsedTime = Math.floor((performance.now() - processStart) / 1000)
if (lastElapsedTime === elapsedTime && !options.completedBatch) {
// Avoid spamming the console with the same progress message
return
}
lastElapsedTime = elapsedTime
readline.clearLine(process.stdout, 0)
readline.cursorTo(process.stdout, 0)
process.stdout.write(
`Processed ${STATS.projects} projects, elapsed time ${elapsedTime}s`
)
}
/**
* Display the stats for the projects or deletedProjects collections.
*
* @param {number} N - Number of samples to take from the collection.
* @param {string} name - Name of the collection being sampled.
* @param {Collection} collection - MongoDB collection to query.
* @param {Object} query - MongoDB query to filter documents.
* @param {Object} projection - MongoDB projection to select fields.
* @param {number} collectionCount - Total number of documents in the collection.
* @returns {Promise<void>} Resolves when stats have been displayed.
*/
async function getStatsForCollection(
N,
name,
collection,
query,
projection,
collectionCount
) {
const stats = {
projectCount: 0,
projectsWithAllHashes: 0,
fileCount: 0,
fileWithHashCount: 0,
}
// Pick a random sample of projects and estimate the number of files without hashes
const result = await collection
.aggregate([
{ $sample: { size: N } },
{ $match: query },
{
$project: projection,
},
])
.toArray()
for (const project of result) {
const fileTree = JSON.stringify(project, [
'project',
'rootFolder',
'folders',
'fileRefs',
'hash',
'_id',
])
// count the number of files without a hash, these are uniquely identified
// by entries with {"_id":"...."} since we have filtered the file tree
const filesWithoutHash = fileTree.match(/\{"_id":"[0-9a-f]{24}"\}/g) || []
// count the number of files with a hash, these are uniquely identified
// by the number of "hash" strings due to the filtering
const filesWithHash = fileTree.match(/"hash"/g) || []
stats.fileCount += filesWithoutHash.length + filesWithHash.length
stats.fileWithHashCount += filesWithHash.length
stats.projectCount++
stats.projectsWithAllHashes += filesWithoutHash.length === 0 ? 1 : 0
}
console.log(`Sampled stats for ${name}:`)
const fractionSampled = stats.projectCount / collectionCount
const percentageSampled = (fractionSampled * 100).toFixed(1)
const fractionConverted = stats.projectsWithAllHashes / stats.projectCount
const percentageConverted = (fractionConverted * 100).toFixed(1)
console.log(
`- Sampled ${name}: ${stats.projectCount} (${percentageSampled}%)`
)
console.log(
`- Sampled ${name} with all hashes present: ${stats.projectsWithAllHashes}`
)
console.log(
`- Percentage of ${name} converted: ${percentageConverted}% (estimated)`
)
}
/**
* Displays a report of the current status of projects and deleted projects,
* including counts and estimated progress based on a sample.
*/
async function displayReport() {
const projectsCountResult = await projectsCollection.countDocuments()
const deletedProjectsCountResult =
await deletedProjectsCollection.countDocuments()
const sampleSize = 1000
console.log('Current status:')
console.log(`- Projects: ${projectsCountResult}`)
console.log(`- Deleted projects: ${deletedProjectsCountResult}`)
console.log(`Sampling ${sampleSize} projects to estimate progress...`)
await getStatsForCollection(
sampleSize,
'projects',
projectsCollection,
{ rootFolder: { $exists: true } },
{ rootFolder: 1 },
projectsCountResult
)
await getStatsForCollection(
sampleSize,
'deleted projects',
deletedProjectsCollection,
{ 'project.rootFolder': { $exists: true } },
{ 'project.rootFolder': 1 },
deletedProjectsCountResult
)
}
// Filestore endpoint location
const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1'
const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009'
@@ -245,8 +475,8 @@ let lastEventLoopStats = performance.eventLoopUtilization()
* @param {number} ms
*/
function toMiBPerSecond(v, ms) {
const ONE_MiB = 1024 * 1024
return v / ONE_MiB / (ms / 1000)
const MiB = 1024 * 1024
return v / MiB / (ms / 1000)
}
/**
@@ -289,7 +519,7 @@ function computeDiff(nextEventLoopStats, now) {
function printStats(isLast = false) {
const now = performance.now()
const nextEventLoopStats = performance.eventLoopUtilization()
const logLine = JSON.stringify({
const logLine = {
time: new Date(),
LOGGING_IDENTIFIER,
...STATS,
@@ -297,11 +527,11 @@ function printStats(isLast = false) {
eventLoop: nextEventLoopStats,
diff: computeDiff(nextEventLoopStats, now),
deferredBatches: Array.from(deferredBatches.keys()),
})
if (isLast) {
console.warn(logLine)
}
if (isLast && OUTPUT_FILE === '-') {
console.warn(JSON.stringify(logLine))
} else {
console.log(logLine)
logger.info(logLine, 'file-migration stats')
}
lastEventLoopStats = nextEventLoopStats
lastLog = Object.assign({}, STATS)
@@ -321,7 +551,7 @@ function handleSignal() {
/**
* @param {QueueEntry} entry
* @return {Promise<string>}
* @return {Promise<string|undefined>}
*/
async function processFileWithCleanup(entry) {
const {
@@ -332,17 +562,16 @@ async function processFileWithCleanup(entry) {
try {
return await processFile(entry, filePath)
} finally {
await Promise.all([
fs.promises.rm(filePath, { force: true }),
fs.promises.rm(filePath + GZ_SUFFIX, { force: true }),
])
if (!DRY_RUN) {
await fs.promises.rm(filePath, { force: true })
}
}
}
/**
* @param {QueueEntry} entry
* @param {string} filePath
* @return {Promise<string>}
* @return {Promise<string|undefined>}
*/
async function processFile(entry, filePath) {
for (let attempt = 0; attempt < RETRIES; attempt++) {
@@ -376,7 +605,7 @@ async function processFile(entry, filePath) {
/**
* @param {QueueEntry} entry
* @param {string} filePath
* @return {Promise<string>}
* @return {Promise<string|undefined>}
*/
async function processFileOnce(entry, filePath) {
const {
@@ -390,6 +619,9 @@ async function processFileOnce(entry, filePath) {
// know the hash of.
return entry.hash
}
if (DRY_RUN) {
return // skip processing in dry-run mode by returning undefined
}
const blobStore = new BlobStore(historyId)
STATS.readFromGCSCount++
// make a fetch request to filestore itself
@@ -458,8 +690,6 @@ async function uploadBlobToGCS(blobStore, entry, blob, hash, filePath) {
entry.ctx.recordHistoryBlob(blob)
}
const GZ_SUFFIX = '.gz'
/**
* @param {Array<QueueEntry>} files
* @return {Promise<void>}
@@ -499,6 +729,7 @@ async function waitForDeferredQueues() {
// Wait for ALL pending batches to finish, especially wait for their mongo
// writes to finish to avoid extra work when resuming the batch.
const all = await Promise.allSettled(deferredBatches.values())
displayProgress({ completedAll: true })
// Now that all batches finished, we can throw if needed.
for (const res of all) {
if (res.status === 'rejected') {
@@ -521,8 +752,10 @@ async function queueNextBatch(batch, prefix = 'rootFolder.0') {
const end = renderObjectId(batch[batch.length - 1]._id)
const deferred = processBatch(batch, prefix)
.then(() => {
console.error(`Actually completed batch ending ${end}`)
logger.info({ end }, 'actually completed batch')
displayProgress({ completedBatch: true })
})
.catch(err => {
logger.error({ err, start, end }, 'fatal error processing batch')
throw err
@@ -600,6 +833,9 @@ async function handleDeletedFileTreeBatch(batch) {
* @return {Promise<boolean>}
*/
async function tryUpdateFileRefInMongo(entry) {
if (DRY_RUN) {
return true // skip mongo updates in dry-run mode
}
if (entry.path.startsWith('project.')) {
return await tryUpdateFileRefInMongoInDeletedProject(entry)
}
@@ -622,6 +858,9 @@ async function tryUpdateFileRefInMongo(entry) {
* @return {Promise<boolean>}
*/
async function tryUpdateFileRefInMongoInDeletedProject(entry) {
if (DRY_RUN) {
return true // skip mongo updates in dry-run mode
}
STATS.mongoUpdates++
const result = await deletedProjectsCollection.updateOne(
{
@@ -922,6 +1161,7 @@ class ProjectContext {
*/
async #tryBatchHashWrites(collection, entries, query) {
if (entries.length === 0) return []
if (DRY_RUN) return [] // skip mongo updates in dry-run mode
const update = {}
for (const entry of entries) {
query[`${entry.path}._id`] = new ObjectId(entry.fileId)
@@ -967,7 +1207,7 @@ class ProjectContext {
}
}
/** @type {Map<string, Promise<string>>} */
/** @type {Map<string, Promise<string|undefined>>} */
#pendingFiles = new Map()
/**
@@ -980,7 +1220,16 @@ class ProjectContext {
this.#pendingFiles.set(entry.cacheKey, processFileWithCleanup(entry))
}
try {
entry.hash = await this.#pendingFiles.get(entry.cacheKey)
const hash = await this.#pendingFiles.get(entry.cacheKey)
if (!hash) {
if (DRY_RUN) {
return // hash is undefined in dry-run mode
} else {
throw new Error('undefined hash outside dry-run mode')
}
} else {
entry.hash = hash
}
} finally {
this.remainingQueueEntries--
}
@@ -1058,6 +1307,7 @@ async function processNonDeletedProjects() {
{
BATCH_RANGE_START,
BATCH_RANGE_END,
trackProgress: async message => {},
}
)
} catch (err) {
@@ -1085,7 +1335,9 @@ async function processDeletedProjects() {
'project.rootFolder': 1,
'project._id': 1,
'project.overleaf.history.id': 1,
}
},
{},
{ trackProgress: async message => {} }
)
} catch (err) {
gracefulShutdownInitiated = true
@@ -1118,6 +1370,12 @@ async function main() {
console.warn('Done.')
}
if (DISPLAY_REPORT) {
console.warn('Displaying report...')
await displayReport()
process.exit(0)
}
try {
try {
await main()
@@ -1134,7 +1392,9 @@ try {
let code = 0
if (STATS.filesFailed > 0) {
console.warn('Some files could not be processed, see logs and try again')
console.warn(
`Some files could not be processed, see logs in ${OUTPUT_FILE} and try again`
)
code++
}
if (STATS.fileHardDeleted > 0) {

View File

@@ -491,8 +491,9 @@ describe('back_fill_file_hash script', function () {
process.argv0,
[
'storage/scripts/back_fill_file_hash.mjs',
'--processNonDeletedProjects=true',
'--processDeletedProjects=true',
'--output=-',
'--projects',
'--deleted-projects',
...args,
],
{
@@ -801,7 +802,7 @@ describe('back_fill_file_hash script', function () {
// Practically, this is slow and moving it to the end of the tests gets us there most of the way.
it('should process nothing on re-run', async function () {
const rerun = await runScript(
processHashedFiles ? ['--processHashedFiles=true'] : [],
!processHashedFiles ? ['--skip-hashed-files'] : [],
{},
false
)
@@ -921,13 +922,13 @@ describe('back_fill_file_hash script', function () {
STATS_UP_FROM_PROJECT1_ONWARD
)
describe('error cases', () => {
describe('error cases', function () {
beforeEach('prepare environment', prepareEnvironment)
it('should gracefully handle fatal errors', async function () {
mockFilestore.deleteObject(projectId0, fileId0)
const t0 = Date.now()
const { stats, result } = await tryRunScript([], {
const { stats, result } = await tryRunScript(['--skip-hashed-files'], {
RETRIES: '10',
RETRY_DELAY_MS: '1000',
})
@@ -962,7 +963,7 @@ describe('back_fill_file_hash script', function () {
value: { stats, result },
},
] = await Promise.allSettled([
tryRunScript([], {
tryRunScript(['--skip-hashed-files'], {
RETRY_DELAY_MS: '100',
RETRIES: '60',
RETRY_FILESTORE_404: 'true', // 404s are the easiest to simulate in tests
@@ -988,7 +989,7 @@ describe('back_fill_file_hash script', function () {
let output
before('prepare environment', prepareEnvironment)
before('run script', async function () {
output = await runScript([], {
output = await runScript(['--skip-hashed-files'], {
CONCURRENCY: '1',
})
})
@@ -1057,10 +1058,10 @@ describe('back_fill_file_hash script', function () {
let output1, output2
before('prepare environment', prepareEnvironment)
before('run script without hashed files', async function () {
output1 = await runScript([], {})
output1 = await runScript(['--skip-hashed-files'], {})
})
before('run script with hashed files', async function () {
output2 = await runScript(['--processHashedFiles=true'], {})
output2 = await runScript([], {})
})
it('should print stats for the first run without hashed files', function () {
expect(output1.stats).deep.equal(STATS_ALL)
@@ -1076,11 +1077,66 @@ describe('back_fill_file_hash script', function () {
commonAssertions(true)
})
describe('full run in dry-run mode', function () {
let output
let projectRecordsBefore
let deletedProjectRecordsBefore
before('prepare environment', prepareEnvironment)
before(async function () {
projectRecordsBefore = await projectsCollection.find({}).toArray()
deletedProjectRecordsBefore = await deletedProjectsCollection
.find({})
.toArray()
})
before('run script', async function () {
output = await runScript(
['--dry-run'],
{
CONCURRENCY: '1',
},
false
)
})
it('should print stats for dry-run mode', function () {
// Compute the stats for running the script without dry-run mode.
const originalStats = sumStats(STATS_ALL, {
...STATS_FILES_HASHED_EXTRA,
readFromGCSCount: 30,
readFromGCSIngress: 72,
mongoUpdates: 0,
filesWithHash: 3,
})
// For a dry-run mode, we expect the stats to be zero except for the
// count of projects, blobs, bad file trees, duplicated files
// and files with/without hash. All the other stats such as mongoUpdates
// and writeToGCSCount, etc should be zero.
const expectedDryRunStats = {
...STATS_ALL_ZERO,
projects: originalStats.projects,
blobs: originalStats.blobs,
badFileTrees: originalStats.badFileTrees,
filesDuplicated: originalStats.filesDuplicated,
filesWithHash: originalStats.filesWithHash,
filesWithoutHash: originalStats.filesWithoutHash,
}
expect(output.stats).deep.equal(expectedDryRunStats)
})
it('should not update mongo', async function () {
expect(await projectsCollection.find({}).toArray()).to.deep.equal(
projectRecordsBefore
)
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal(
deletedProjectRecordsBefore
)
})
})
describe('full run CONCURRENCY=10', function () {
let output
before('prepare environment', prepareEnvironment)
before('run script', async function () {
output = await runScript([], {
output = await runScript(['--skip-hashed-files'], {
CONCURRENCY: '10',
})
})
@@ -1094,7 +1150,7 @@ describe('back_fill_file_hash script', function () {
let output
before('prepare environment', prepareEnvironment)
before('run script', async function () {
output = await runScript([], {
output = await runScript(['--skip-hashed-files'], {
STREAM_HIGH_WATER_MARK: (1024 * 1024).toString(),
})
})
@@ -1108,7 +1164,7 @@ describe('back_fill_file_hash script', function () {
let output
before('prepare environment', prepareEnvironment)
before('run script', async function () {
output = await runScript(['--processHashedFiles=true'], {})
output = await runScript([], {})
})
it('should print stats', function () {
expect(output.stats).deep.equal(
@@ -1137,7 +1193,7 @@ describe('back_fill_file_hash script', function () {
})
let output
before('run script', async function () {
output = await runScript([], {
output = await runScript(['--skip-hashed-files'], {
CONCURRENCY: '1',
})
})
@@ -1158,14 +1214,20 @@ describe('back_fill_file_hash script', function () {
let outputPart0, outputPart1
before('prepare environment', prepareEnvironment)
before('run script on part 0', async function () {
outputPart0 = await runScript([`--BATCH_RANGE_END=${edge}`], {
CONCURRENCY: '1',
})
outputPart0 = await runScript(
['--skip-hashed-files', `--BATCH_RANGE_END=${edge}`],
{
CONCURRENCY: '1',
}
)
})
before('run script on part 1', async function () {
outputPart1 = await runScript([`--BATCH_RANGE_START=${edge}`], {
CONCURRENCY: '1',
})
outputPart1 = await runScript(
['--skip-hashed-files', `--BATCH_RANGE_START=${edge}`],
{
CONCURRENCY: '1',
}
)
})
it('should print stats for part 0', function () {
@@ -1177,7 +1239,7 @@ describe('back_fill_file_hash script', function () {
commonAssertions()
})
describe('projectIds from file', () => {
describe('projectIds from file', function () {
const path0 = '/tmp/project-ids-0.txt'
const path1 = '/tmp/project-ids-1.txt'
before('prepare environment', prepareEnvironment)
@@ -1210,10 +1272,16 @@ describe('back_fill_file_hash script', function () {
let outputPart0, outputPart1
before('run script on part 0', async function () {
outputPart0 = await runScript([`--projectIdsFrom=${path0}`])
outputPart0 = await runScript([
'--skip-hashed-files',
`--from-file=${path0}`,
])
})
before('run script on part 1', async function () {
outputPart1 = await runScript([`--projectIdsFrom=${path1}`])
outputPart1 = await runScript([
'--skip-hashed-files',
`--from-file=${path1}`,
])
})
/**