From b8e2a8bfdba0cfc82386362713802fb2ebc36c47 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 23 Jul 2025 12:04:01 +0100 Subject: [PATCH] Merge pull request #27257 from overleaf/bg-filestore-migration-usability filestore migration usability changes GitOrigin-RevId: 47e8c8434c35b1b16c41700dfef11ce4602a3063 --- libraries/logger/logging-manager.js | 4 +- .../storage/scripts/back_fill_file_hash.mjs | 354 +++++++++++++++--- .../js/storage/back_fill_file_hash.test.mjs | 112 ++++-- 3 files changed, 399 insertions(+), 71 deletions(-) diff --git a/libraries/logger/logging-manager.js b/libraries/logger/logging-manager.js index edf922be72..9fb4f28405 100644 --- a/libraries/logger/logging-manager.js +++ b/libraries/logger/logging-manager.js @@ -11,7 +11,7 @@ const LoggingManager = { /** * @param {string} name - The name of the logger */ - initialize(name) { + initialize(name, options = {}) { this.isProduction = (process.env.NODE_ENV || '').toLowerCase() === 'production' const isTest = (process.env.NODE_ENV || '').toLowerCase() === 'test' @@ -27,7 +27,7 @@ const LoggingManager = { req: Serializers.req, res: Serializers.res, }, - streams: [this._getOutputStreamConfig()], + streams: options.streams ?? [this._getOutputStreamConfig()], }) this._setupRingBuffer() this._setupLogLevelChecker() diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs index 2e12328e5c..c0fdda35d8 100644 --- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs +++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs @@ -79,16 +79,30 @@ ObjectId.cacheHexString = true */ /** - * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, PROCESS_BLOBS: boolean, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean}} + * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, PROCESS_BLOBS: boolean, DRY_RUN: boolean, OUTPUT_FILE: string, DISPLAY_REPORT: boolean}} */ function parseArgs() { const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') + const DEFAULT_OUTPUT_FILE = `file-migration-${new Date() + .toISOString() + .replace(/[:.]/g, '_')}.log` + const args = commandLineArgs([ - { name: 'processNonDeletedProjects', type: String, defaultValue: 'false' }, - { name: 'processDeletedProjects', type: String, defaultValue: 'false' }, - { name: 'processHashedFiles', type: String, defaultValue: 'false' }, - { name: 'processBlobs', type: String, defaultValue: 'true' }, - { name: 'projectIdsFrom', type: String, defaultValue: '' }, + { name: 'help', alias: 'h', type: Boolean }, + { name: 'all', alias: 'a', type: Boolean }, + { name: 'projects', type: Boolean }, + { name: 'deleted-projects', type: Boolean }, + { name: 'skip-hashed-files', type: Boolean }, + { name: 'skip-existing-blobs', type: Boolean }, + { name: 'from-file', type: String, defaultValue: '' }, + { name: 'dry-run', alias: 'n', type: Boolean }, + { + name: 'output', + alias: 'o', + type: String, + defaultValue: DEFAULT_OUTPUT_FILE, + }, + { name: 'report', type: Boolean }, { name: 'BATCH_RANGE_START', type: String, @@ -99,31 +113,107 @@ function parseArgs() { type: String, defaultValue: new Date().toISOString(), }, - { name: 'LOGGING_IDENTIFIER', type: String, defaultValue: '' }, + { name: 'logging-id', type: String, defaultValue: '' }, ]) - /** - * commandLineArgs cannot handle --foo=false, so go the long way - * @param {string} name - * @return {boolean} - */ - function boolVal(name) { - const v = args[name] - if (['true', 'false'].includes(v)) return v === 'true' - throw new Error(`expected "true" or "false" for boolean option ${name}`) + + // If no arguments are provided, display a usage message + if (process.argv.length <= 2) { + console.error( + 'Usage: node back_fill_file_hash.mjs --all | --projects | --deleted-projects' + ) + process.exit(1) } - const BATCH_RANGE_START = objectIdFromInput( - args['BATCH_RANGE_START'] - ).toString() - const BATCH_RANGE_END = objectIdFromInput(args['BATCH_RANGE_END']).toString() + + // If --help is provided, display the help message + if (args.help) { + console.log(`Usage: node back_fill_file_hash.mjs [options] + +Project selection options: + --all, -a Process all projects, including deleted ones + --projects Process projects (excluding deleted ones) + --deleted-projects Process deleted projects + --from-file Process selected projects ids from file + +File selection options: + --skip-hashed-files Skip processing files that already have a hash + --skip-existing-blobs Skip processing files already in the blob store + +Logging options: + --output , -o Output log to the specified file + (default: file-migration-.log) + --logging-id Identifier for logging + (default: BATCH_RANGE_START) + +Batch range options: + --BATCH_RANGE_START Start date for processing + (default: ${args.BATCH_RANGE_START}) + --BATCH_RANGE_END End date for processing + (default: ${args.BATCH_RANGE_END}) + +Other options: + --report Display a report of the current status + --dry-run, -n Perform a dry run without making changes + --help, -h Show this help message + +Typical usage: + + node back_fill_file_hash.mjs --all + +is equivalent to + + node back_fill_file_hash.mjs --projects --deleted-projects +`) + process.exit(0) + } + + // Require at least one of --projects, --deleted-projects and --all or --report + if ( + !args.projects && + !args['deleted-projects'] && + !args.all && + !args.report + ) { + console.error( + 'Must specify at least one of --projects and --deleted-projects, --all or --report' + ) + process.exit(1) + } + + // Forbid --all with --projects or --deleted-projects + if (args.all && (args.projects || args['deleted-projects'])) { + console.error('Cannot use --all with --projects or --deleted-projects') + process.exit(1) + } + + // Forbid --all, --projects, --deleted-projects with --report + if (args.report && (args.all || args.projects || args['deleted-projects'])) { + console.error( + 'Cannot use --report with --all, --projects or --deleted-projects' + ) + process.exit(1) + } + + // The --all option processes all projects, including deleted ones + // and checks existing hashed files are present in the blob store. + if (args.all) { + args.projects = true + args['deleted-projects'] = true + } + + const BATCH_RANGE_START = objectIdFromInput(args.BATCH_RANGE_START).toString() + const BATCH_RANGE_END = objectIdFromInput(args.BATCH_RANGE_END).toString() return { - PROCESS_NON_DELETED_PROJECTS: boolVal('processNonDeletedProjects'), - PROCESS_DELETED_PROJECTS: boolVal('processDeletedProjects'), - PROCESS_BLOBS: boolVal('processBlobs'), - PROCESS_HASHED_FILES: boolVal('processHashedFiles'), + PROCESS_NON_DELETED_PROJECTS: args.projects, + PROCESS_DELETED_PROJECTS: args['deleted-projects'], + PROCESS_HASHED_FILES: !args['skip-hashed-files'], + PROCESS_BLOBS: !args['skip-existing-blobs'], + DRY_RUN: args['dry-run'], + OUTPUT_FILE: args.output, BATCH_RANGE_START, BATCH_RANGE_END, - LOGGING_IDENTIFIER: args['LOGGING_IDENTIFIER'] || BATCH_RANGE_START, - PROJECT_IDS_FROM: args['projectIdsFrom'], + LOGGING_IDENTIFIER: args['logging-id'] || BATCH_RANGE_START, + PROJECT_IDS_FROM: args['from-file'], + DISPLAY_REPORT: args.report, } } @@ -132,10 +222,13 @@ const { PROCESS_DELETED_PROJECTS, PROCESS_BLOBS, PROCESS_HASHED_FILES, + DRY_RUN, + OUTPUT_FILE, BATCH_RANGE_START, BATCH_RANGE_END, LOGGING_IDENTIFIER, PROJECT_IDS_FROM, + DISPLAY_REPORT, } = parseArgs() // We need to handle the start and end differently as ids of deleted projects are created at time of deletion. @@ -162,6 +255,143 @@ const STREAM_HIGH_WATER_MARK = parseInt( const LOGGING_INTERVAL = parseInt(process.env.LOGGING_INTERVAL || '60000', 10) const SLEEP_BEFORE_EXIT = parseInt(process.env.SLEEP_BEFORE_EXIT || '1000', 10) +// Log output to a file +logger.initialize('file-migration', { + streams: [ + { + stream: + OUTPUT_FILE === '-' + ? process.stdout + : fs.createWriteStream(OUTPUT_FILE, { flags: 'a' }), + }, + ], +}) + +let lastElapsedTime = 0 +async function displayProgress(options = {}) { + if (OUTPUT_FILE === '-') { + return // skip progress tracking when logging to stdout + } + if (options.completedAll) { + process.stdout.write('\n') + return + } + const elapsedTime = Math.floor((performance.now() - processStart) / 1000) + if (lastElapsedTime === elapsedTime && !options.completedBatch) { + // Avoid spamming the console with the same progress message + return + } + lastElapsedTime = elapsedTime + readline.clearLine(process.stdout, 0) + readline.cursorTo(process.stdout, 0) + process.stdout.write( + `Processed ${STATS.projects} projects, elapsed time ${elapsedTime}s` + ) +} + +/** + * Display the stats for the projects or deletedProjects collections. + * + * @param {number} N - Number of samples to take from the collection. + * @param {string} name - Name of the collection being sampled. + * @param {Collection} collection - MongoDB collection to query. + * @param {Object} query - MongoDB query to filter documents. + * @param {Object} projection - MongoDB projection to select fields. + * @param {number} collectionCount - Total number of documents in the collection. + * @returns {Promise} Resolves when stats have been displayed. + */ +async function getStatsForCollection( + N, + name, + collection, + query, + projection, + collectionCount +) { + const stats = { + projectCount: 0, + projectsWithAllHashes: 0, + fileCount: 0, + fileWithHashCount: 0, + } + // Pick a random sample of projects and estimate the number of files without hashes + const result = await collection + .aggregate([ + { $sample: { size: N } }, + { $match: query }, + { + $project: projection, + }, + ]) + .toArray() + + for (const project of result) { + const fileTree = JSON.stringify(project, [ + 'project', + 'rootFolder', + 'folders', + 'fileRefs', + 'hash', + '_id', + ]) + // count the number of files without a hash, these are uniquely identified + // by entries with {"_id":"...."} since we have filtered the file tree + const filesWithoutHash = fileTree.match(/\{"_id":"[0-9a-f]{24}"\}/g) || [] + // count the number of files with a hash, these are uniquely identified + // by the number of "hash" strings due to the filtering + const filesWithHash = fileTree.match(/"hash"/g) || [] + stats.fileCount += filesWithoutHash.length + filesWithHash.length + stats.fileWithHashCount += filesWithHash.length + stats.projectCount++ + stats.projectsWithAllHashes += filesWithoutHash.length === 0 ? 1 : 0 + } + console.log(`Sampled stats for ${name}:`) + const fractionSampled = stats.projectCount / collectionCount + const percentageSampled = (fractionSampled * 100).toFixed(1) + const fractionConverted = stats.projectsWithAllHashes / stats.projectCount + const percentageConverted = (fractionConverted * 100).toFixed(1) + console.log( + `- Sampled ${name}: ${stats.projectCount} (${percentageSampled}%)` + ) + console.log( + `- Sampled ${name} with all hashes present: ${stats.projectsWithAllHashes}` + ) + console.log( + `- Percentage of ${name} converted: ${percentageConverted}% (estimated)` + ) +} + +/** + * Displays a report of the current status of projects and deleted projects, + * including counts and estimated progress based on a sample. + */ +async function displayReport() { + const projectsCountResult = await projectsCollection.countDocuments() + const deletedProjectsCountResult = + await deletedProjectsCollection.countDocuments() + const sampleSize = 1000 + console.log('Current status:') + console.log(`- Projects: ${projectsCountResult}`) + console.log(`- Deleted projects: ${deletedProjectsCountResult}`) + console.log(`Sampling ${sampleSize} projects to estimate progress...`) + await getStatsForCollection( + sampleSize, + 'projects', + projectsCollection, + { rootFolder: { $exists: true } }, + { rootFolder: 1 }, + projectsCountResult + ) + await getStatsForCollection( + sampleSize, + 'deleted projects', + deletedProjectsCollection, + { 'project.rootFolder': { $exists: true } }, + { 'project.rootFolder': 1 }, + deletedProjectsCountResult + ) +} + // Filestore endpoint location const FILESTORE_HOST = process.env.FILESTORE_HOST || '127.0.0.1' const FILESTORE_PORT = process.env.FILESTORE_PORT || '3009' @@ -245,8 +475,8 @@ let lastEventLoopStats = performance.eventLoopUtilization() * @param {number} ms */ function toMiBPerSecond(v, ms) { - const ONE_MiB = 1024 * 1024 - return v / ONE_MiB / (ms / 1000) + const MiB = 1024 * 1024 + return v / MiB / (ms / 1000) } /** @@ -289,7 +519,7 @@ function computeDiff(nextEventLoopStats, now) { function printStats(isLast = false) { const now = performance.now() const nextEventLoopStats = performance.eventLoopUtilization() - const logLine = JSON.stringify({ + const logLine = { time: new Date(), LOGGING_IDENTIFIER, ...STATS, @@ -297,11 +527,11 @@ function printStats(isLast = false) { eventLoop: nextEventLoopStats, diff: computeDiff(nextEventLoopStats, now), deferredBatches: Array.from(deferredBatches.keys()), - }) - if (isLast) { - console.warn(logLine) + } + if (isLast && OUTPUT_FILE === '-') { + console.warn(JSON.stringify(logLine)) } else { - console.log(logLine) + logger.info(logLine, 'file-migration stats') } lastEventLoopStats = nextEventLoopStats lastLog = Object.assign({}, STATS) @@ -321,7 +551,7 @@ function handleSignal() { /** * @param {QueueEntry} entry - * @return {Promise} + * @return {Promise} */ async function processFileWithCleanup(entry) { const { @@ -332,17 +562,16 @@ async function processFileWithCleanup(entry) { try { return await processFile(entry, filePath) } finally { - await Promise.all([ - fs.promises.rm(filePath, { force: true }), - fs.promises.rm(filePath + GZ_SUFFIX, { force: true }), - ]) + if (!DRY_RUN) { + await fs.promises.rm(filePath, { force: true }) + } } } /** * @param {QueueEntry} entry * @param {string} filePath - * @return {Promise} + * @return {Promise} */ async function processFile(entry, filePath) { for (let attempt = 0; attempt < RETRIES; attempt++) { @@ -376,7 +605,7 @@ async function processFile(entry, filePath) { /** * @param {QueueEntry} entry * @param {string} filePath - * @return {Promise} + * @return {Promise} */ async function processFileOnce(entry, filePath) { const { @@ -390,6 +619,9 @@ async function processFileOnce(entry, filePath) { // know the hash of. return entry.hash } + if (DRY_RUN) { + return // skip processing in dry-run mode by returning undefined + } const blobStore = new BlobStore(historyId) STATS.readFromGCSCount++ // make a fetch request to filestore itself @@ -458,8 +690,6 @@ async function uploadBlobToGCS(blobStore, entry, blob, hash, filePath) { entry.ctx.recordHistoryBlob(blob) } -const GZ_SUFFIX = '.gz' - /** * @param {Array} files * @return {Promise} @@ -499,6 +729,7 @@ async function waitForDeferredQueues() { // Wait for ALL pending batches to finish, especially wait for their mongo // writes to finish to avoid extra work when resuming the batch. const all = await Promise.allSettled(deferredBatches.values()) + displayProgress({ completedAll: true }) // Now that all batches finished, we can throw if needed. for (const res of all) { if (res.status === 'rejected') { @@ -521,8 +752,10 @@ async function queueNextBatch(batch, prefix = 'rootFolder.0') { const end = renderObjectId(batch[batch.length - 1]._id) const deferred = processBatch(batch, prefix) .then(() => { - console.error(`Actually completed batch ending ${end}`) + logger.info({ end }, 'actually completed batch') + displayProgress({ completedBatch: true }) }) + .catch(err => { logger.error({ err, start, end }, 'fatal error processing batch') throw err @@ -600,6 +833,9 @@ async function handleDeletedFileTreeBatch(batch) { * @return {Promise} */ async function tryUpdateFileRefInMongo(entry) { + if (DRY_RUN) { + return true // skip mongo updates in dry-run mode + } if (entry.path.startsWith('project.')) { return await tryUpdateFileRefInMongoInDeletedProject(entry) } @@ -622,6 +858,9 @@ async function tryUpdateFileRefInMongo(entry) { * @return {Promise} */ async function tryUpdateFileRefInMongoInDeletedProject(entry) { + if (DRY_RUN) { + return true // skip mongo updates in dry-run mode + } STATS.mongoUpdates++ const result = await deletedProjectsCollection.updateOne( { @@ -922,6 +1161,7 @@ class ProjectContext { */ async #tryBatchHashWrites(collection, entries, query) { if (entries.length === 0) return [] + if (DRY_RUN) return [] // skip mongo updates in dry-run mode const update = {} for (const entry of entries) { query[`${entry.path}._id`] = new ObjectId(entry.fileId) @@ -967,7 +1207,7 @@ class ProjectContext { } } - /** @type {Map>} */ + /** @type {Map>} */ #pendingFiles = new Map() /** @@ -980,7 +1220,16 @@ class ProjectContext { this.#pendingFiles.set(entry.cacheKey, processFileWithCleanup(entry)) } try { - entry.hash = await this.#pendingFiles.get(entry.cacheKey) + const hash = await this.#pendingFiles.get(entry.cacheKey) + if (!hash) { + if (DRY_RUN) { + return // hash is undefined in dry-run mode + } else { + throw new Error('undefined hash outside dry-run mode') + } + } else { + entry.hash = hash + } } finally { this.remainingQueueEntries-- } @@ -1058,6 +1307,7 @@ async function processNonDeletedProjects() { { BATCH_RANGE_START, BATCH_RANGE_END, + trackProgress: async message => {}, } ) } catch (err) { @@ -1085,7 +1335,9 @@ async function processDeletedProjects() { 'project.rootFolder': 1, 'project._id': 1, 'project.overleaf.history.id': 1, - } + }, + {}, + { trackProgress: async message => {} } ) } catch (err) { gracefulShutdownInitiated = true @@ -1118,6 +1370,12 @@ async function main() { console.warn('Done.') } +if (DISPLAY_REPORT) { + console.warn('Displaying report...') + await displayReport() + process.exit(0) +} + try { try { await main() @@ -1134,7 +1392,9 @@ try { let code = 0 if (STATS.filesFailed > 0) { - console.warn('Some files could not be processed, see logs and try again') + console.warn( + `Some files could not be processed, see logs in ${OUTPUT_FILE} and try again` + ) code++ } if (STATS.fileHardDeleted > 0) { diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs index b6cdd4b9bf..f6f4a6fb76 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs @@ -491,8 +491,9 @@ describe('back_fill_file_hash script', function () { process.argv0, [ 'storage/scripts/back_fill_file_hash.mjs', - '--processNonDeletedProjects=true', - '--processDeletedProjects=true', + '--output=-', + '--projects', + '--deleted-projects', ...args, ], { @@ -801,7 +802,7 @@ describe('back_fill_file_hash script', function () { // Practically, this is slow and moving it to the end of the tests gets us there most of the way. it('should process nothing on re-run', async function () { const rerun = await runScript( - processHashedFiles ? ['--processHashedFiles=true'] : [], + !processHashedFiles ? ['--skip-hashed-files'] : [], {}, false ) @@ -921,13 +922,13 @@ describe('back_fill_file_hash script', function () { STATS_UP_FROM_PROJECT1_ONWARD ) - describe('error cases', () => { + describe('error cases', function () { beforeEach('prepare environment', prepareEnvironment) it('should gracefully handle fatal errors', async function () { mockFilestore.deleteObject(projectId0, fileId0) const t0 = Date.now() - const { stats, result } = await tryRunScript([], { + const { stats, result } = await tryRunScript(['--skip-hashed-files'], { RETRIES: '10', RETRY_DELAY_MS: '1000', }) @@ -962,7 +963,7 @@ describe('back_fill_file_hash script', function () { value: { stats, result }, }, ] = await Promise.allSettled([ - tryRunScript([], { + tryRunScript(['--skip-hashed-files'], { RETRY_DELAY_MS: '100', RETRIES: '60', RETRY_FILESTORE_404: 'true', // 404s are the easiest to simulate in tests @@ -988,7 +989,7 @@ describe('back_fill_file_hash script', function () { let output before('prepare environment', prepareEnvironment) before('run script', async function () { - output = await runScript([], { + output = await runScript(['--skip-hashed-files'], { CONCURRENCY: '1', }) }) @@ -1057,10 +1058,10 @@ describe('back_fill_file_hash script', function () { let output1, output2 before('prepare environment', prepareEnvironment) before('run script without hashed files', async function () { - output1 = await runScript([], {}) + output1 = await runScript(['--skip-hashed-files'], {}) }) before('run script with hashed files', async function () { - output2 = await runScript(['--processHashedFiles=true'], {}) + output2 = await runScript([], {}) }) it('should print stats for the first run without hashed files', function () { expect(output1.stats).deep.equal(STATS_ALL) @@ -1076,11 +1077,66 @@ describe('back_fill_file_hash script', function () { commonAssertions(true) }) + describe('full run in dry-run mode', function () { + let output + let projectRecordsBefore + let deletedProjectRecordsBefore + before('prepare environment', prepareEnvironment) + before(async function () { + projectRecordsBefore = await projectsCollection.find({}).toArray() + deletedProjectRecordsBefore = await deletedProjectsCollection + .find({}) + .toArray() + }) + before('run script', async function () { + output = await runScript( + ['--dry-run'], + { + CONCURRENCY: '1', + }, + false + ) + }) + + it('should print stats for dry-run mode', function () { + // Compute the stats for running the script without dry-run mode. + const originalStats = sumStats(STATS_ALL, { + ...STATS_FILES_HASHED_EXTRA, + readFromGCSCount: 30, + readFromGCSIngress: 72, + mongoUpdates: 0, + filesWithHash: 3, + }) + // For a dry-run mode, we expect the stats to be zero except for the + // count of projects, blobs, bad file trees, duplicated files + // and files with/without hash. All the other stats such as mongoUpdates + // and writeToGCSCount, etc should be zero. + const expectedDryRunStats = { + ...STATS_ALL_ZERO, + projects: originalStats.projects, + blobs: originalStats.blobs, + badFileTrees: originalStats.badFileTrees, + filesDuplicated: originalStats.filesDuplicated, + filesWithHash: originalStats.filesWithHash, + filesWithoutHash: originalStats.filesWithoutHash, + } + expect(output.stats).deep.equal(expectedDryRunStats) + }) + it('should not update mongo', async function () { + expect(await projectsCollection.find({}).toArray()).to.deep.equal( + projectRecordsBefore + ) + expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal( + deletedProjectRecordsBefore + ) + }) + }) + describe('full run CONCURRENCY=10', function () { let output before('prepare environment', prepareEnvironment) before('run script', async function () { - output = await runScript([], { + output = await runScript(['--skip-hashed-files'], { CONCURRENCY: '10', }) }) @@ -1094,7 +1150,7 @@ describe('back_fill_file_hash script', function () { let output before('prepare environment', prepareEnvironment) before('run script', async function () { - output = await runScript([], { + output = await runScript(['--skip-hashed-files'], { STREAM_HIGH_WATER_MARK: (1024 * 1024).toString(), }) }) @@ -1108,7 +1164,7 @@ describe('back_fill_file_hash script', function () { let output before('prepare environment', prepareEnvironment) before('run script', async function () { - output = await runScript(['--processHashedFiles=true'], {}) + output = await runScript([], {}) }) it('should print stats', function () { expect(output.stats).deep.equal( @@ -1137,7 +1193,7 @@ describe('back_fill_file_hash script', function () { }) let output before('run script', async function () { - output = await runScript([], { + output = await runScript(['--skip-hashed-files'], { CONCURRENCY: '1', }) }) @@ -1158,14 +1214,20 @@ describe('back_fill_file_hash script', function () { let outputPart0, outputPart1 before('prepare environment', prepareEnvironment) before('run script on part 0', async function () { - outputPart0 = await runScript([`--BATCH_RANGE_END=${edge}`], { - CONCURRENCY: '1', - }) + outputPart0 = await runScript( + ['--skip-hashed-files', `--BATCH_RANGE_END=${edge}`], + { + CONCURRENCY: '1', + } + ) }) before('run script on part 1', async function () { - outputPart1 = await runScript([`--BATCH_RANGE_START=${edge}`], { - CONCURRENCY: '1', - }) + outputPart1 = await runScript( + ['--skip-hashed-files', `--BATCH_RANGE_START=${edge}`], + { + CONCURRENCY: '1', + } + ) }) it('should print stats for part 0', function () { @@ -1177,7 +1239,7 @@ describe('back_fill_file_hash script', function () { commonAssertions() }) - describe('projectIds from file', () => { + describe('projectIds from file', function () { const path0 = '/tmp/project-ids-0.txt' const path1 = '/tmp/project-ids-1.txt' before('prepare environment', prepareEnvironment) @@ -1210,10 +1272,16 @@ describe('back_fill_file_hash script', function () { let outputPart0, outputPart1 before('run script on part 0', async function () { - outputPart0 = await runScript([`--projectIdsFrom=${path0}`]) + outputPart0 = await runScript([ + '--skip-hashed-files', + `--from-file=${path0}`, + ]) }) before('run script on part 1', async function () { - outputPart1 = await runScript([`--projectIdsFrom=${path1}`]) + outputPart1 = await runScript([ + '--skip-hashed-files', + `--from-file=${path1}`, + ]) }) /**