diff --git a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js index a5172ac464..9032bf1897 100644 --- a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js +++ b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js @@ -177,10 +177,13 @@ class PerProjectEncryptedS3Persistor extends S3Persistor { /** * @param {string} bucketName * @param {string} path - * @return {Promise} + * @return {Promise} */ async generateDataEncryptionKey(bucketName, path) { - await this.#generateDataEncryptionKeyOptions(bucketName, path) + return new CachedPerProjectEncryptedS3Persistor( + this, + await this.#generateDataEncryptionKeyOptions(bucketName, path) + ) } /** diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs index d6f033109a..b496a8bbc2 100644 --- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs +++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs @@ -989,8 +989,25 @@ class ProjectContext { * @return {Promise} */ async #getCachedPersistorWithRetries(key) { + // Optimization: Skip GET on DEK in case no blobs are marked as backed up yet. + let tryGenerateDEKFirst = this.#backedUpBlobs.size === 0 for (let attempt = 0; attempt < RETRIES; attempt++) { try { + if (tryGenerateDEKFirst) { + try { + return await backupPersistor.generateDataEncryptionKey( + projectBlobsBucket, + key + ) + } catch (err) { + if (err instanceof AlreadyWrittenError) { + tryGenerateDEKFirst = false + // fall back to GET below + } else { + throw err + } + } + } return await backupPersistor.forProject(projectBlobsBucket, key) } catch (err) { if (gracefulShutdownInitiated) throw err diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs index 41ac23e568..04c949b011 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs @@ -1158,6 +1158,43 @@ describe('back_fill_file_hash script', function () { commonAssertions() }) + describe('with something in the bucket and marked as processed', function () { + beforeEach('create a file in s3', async function () { + await backupPersistor.sendStream( + projectBlobsBucket, + makeProjectKey(historyId0, hashTextBlob0), + Stream.Readable.from([contentTextBlob0]), + { contentLength: contentTextBlob0.byteLength } + ) + await backedUpBlobs.insertMany([ + { + _id: projectId0, + blobs: [binaryForGitBlobHash(hashTextBlob0)], + }, + ]) + }) + let output + beforeEach('run script', async function () { + output = await runScript([], { + CONCURRENCY: '1', + }) + }) + + it('should print stats', function () { + expect(output.stats).deep.equal( + sumStats(STATS_ALL, { + ...STATS_ALL_ZERO, + backedUpBlobs: 1, + writeToAWSCount: -1, + writeToAWSEgress: -27, + readFromGCSCount: -1, + readFromGCSIngress: -7, + }) + ) + }) + commonAssertions() + }) + describe('split run CONCURRENCY=1', function () { // part0: project0+project1, part1: project2 onwards const edge = projectId1.toString()