Merge pull request #22115 from overleaf/jpa-skip-get

[history-v1] back_fill_file_hash: optimize obtaining the DEK

GitOrigin-RevId: 6fc5218737551ae4e6152414c32560f545767091
This commit is contained in:
Jakob Ackermann
2024-11-25 13:07:15 +01:00
committed by Copybot
parent ed27af11f8
commit 0bd8729cc1
3 changed files with 59 additions and 2 deletions
@@ -177,10 +177,13 @@ class PerProjectEncryptedS3Persistor extends S3Persistor {
/**
* @param {string} bucketName
* @param {string} path
* @return {Promise<void>}
* @return {Promise<CachedPerProjectEncryptedS3Persistor>}
*/
async generateDataEncryptionKey(bucketName, path) {
await this.#generateDataEncryptionKeyOptions(bucketName, path)
return new CachedPerProjectEncryptedS3Persistor(
this,
await this.#generateDataEncryptionKeyOptions(bucketName, path)
)
}
/**
@@ -989,8 +989,25 @@ class ProjectContext {
* @return {Promise<CachedPerProjectEncryptedS3Persistor>}
*/
async #getCachedPersistorWithRetries(key) {
// Optimization: Skip GET on DEK in case no blobs are marked as backed up yet.
let tryGenerateDEKFirst = this.#backedUpBlobs.size === 0
for (let attempt = 0; attempt < RETRIES; attempt++) {
try {
if (tryGenerateDEKFirst) {
try {
return await backupPersistor.generateDataEncryptionKey(
projectBlobsBucket,
key
)
} catch (err) {
if (err instanceof AlreadyWrittenError) {
tryGenerateDEKFirst = false
// fall back to GET below
} else {
throw err
}
}
}
return await backupPersistor.forProject(projectBlobsBucket, key)
} catch (err) {
if (gracefulShutdownInitiated) throw err
@@ -1158,6 +1158,43 @@ describe('back_fill_file_hash script', function () {
commonAssertions()
})
describe('with something in the bucket and marked as processed', function () {
beforeEach('create a file in s3', async function () {
await backupPersistor.sendStream(
projectBlobsBucket,
makeProjectKey(historyId0, hashTextBlob0),
Stream.Readable.from([contentTextBlob0]),
{ contentLength: contentTextBlob0.byteLength }
)
await backedUpBlobs.insertMany([
{
_id: projectId0,
blobs: [binaryForGitBlobHash(hashTextBlob0)],
},
])
})
let output
beforeEach('run script', async function () {
output = await runScript([], {
CONCURRENCY: '1',
})
})
it('should print stats', function () {
expect(output.stats).deep.equal(
sumStats(STATS_ALL, {
...STATS_ALL_ZERO,
backedUpBlobs: 1,
writeToAWSCount: -1,
writeToAWSEgress: -27,
readFromGCSCount: -1,
readFromGCSIngress: -7,
})
)
})
commonAssertions()
})
describe('split run CONCURRENCY=1', function () {
// part0: project0+project1, part1: project2 onwards
const edge = projectId1.toString()