Merge pull request #21947 from overleaf/bg-jpa-back-fill-script-tweaks

[history-v1] back_fill_file_hash: performance tweaks

GitOrigin-RevId: c3d0c7906707fc902addcde64eaf41c24ceeece7
This commit is contained in:
Jakob Ackermann
2024-11-19 10:46:48 +01:00
committed by Copybot
parent 87e7e3017a
commit 8e74d3c58c
4 changed files with 195 additions and 28 deletions

View File

@@ -61,6 +61,9 @@ class SSECOptions {
}
class S3Persistor extends AbstractPersistor {
/** @type {Map<string, S3>} */
#clients = new Map()
constructor(settings = {}) {
super()
@@ -131,19 +134,19 @@ class S3Persistor extends AbstractPersistor {
// if we have an md5 hash, pass this to S3 to verify the upload - otherwise
// we rely on the S3 client's checksum calculation to validate the upload
const clientOptions = {}
let computeChecksums = false
if (opts.sourceMd5) {
uploadOptions.ContentMD5 = PersistorHelper.hexToBase64(opts.sourceMd5)
} else {
clientOptions.computeChecksums = true
computeChecksums = true
}
if (this.settings.disableMultiPartUpload) {
await this._getClientForBucket(bucketName, clientOptions)
await this._getClientForBucket(bucketName, computeChecksums)
.putObject(uploadOptions)
.promise()
} else {
await this._getClientForBucket(bucketName, clientOptions)
await this._getClientForBucket(bucketName, computeChecksums)
.upload(uploadOptions, { partSize: this.settings.partSize })
.promise()
}
@@ -517,23 +520,34 @@ class S3Persistor extends AbstractPersistor {
/**
* @param {string} bucket
* @param {Object} [clientOptions]
* @param {boolean} computeChecksums
* @return {S3}
* @private
*/
_getClientForBucket(bucket, clientOptions) {
return new S3(
this._buildClientOptions(
this.settings.bucketCreds?.[bucket],
clientOptions
_getClientForBucket(bucket, computeChecksums = false) {
/** @type {S3.Types.ClientConfiguration} */
const clientOptions = {}
const cacheKey = `${bucket}:${computeChecksums}`
if (computeChecksums) {
clientOptions.computeChecksums = true
}
let client = this.#clients.get(cacheKey)
if (!client) {
client = new S3(
this._buildClientOptions(
this.settings.bucketCreds?.[bucket],
clientOptions
)
)
)
this.#clients.set(cacheKey, client)
}
return client
}
/**
* @param {Object} bucketCredentials
* @param {Object} clientOptions
* @return {Object}
* @param {S3.Types.ClientConfiguration} clientOptions
* @return {S3.Types.ClientConfiguration}
* @private
*/
_buildClientOptions(bucketCredentials, clientOptions) {

View File

@@ -147,7 +147,7 @@ describe('S3PersistorTests', function () {
deleteObjects: sinon.stub().returns(EmptyPromise),
getSignedUrlPromise: sinon.stub().resolves(redirectUrl),
}
S3 = sinon.stub().returns(S3Client)
S3 = sinon.stub().callsFake(() => Object.assign({}, S3Client))
Hash = {
end: sinon.stub(),
@@ -1027,4 +1027,22 @@ describe('S3PersistorTests', function () {
})
})
})
describe('_getClientForBucket', function () {
it('should return same instance for same bucket', function () {
const a = S3Persistor._getClientForBucket('foo')
const b = S3Persistor._getClientForBucket('foo')
expect(a).to.equal(b)
})
it('should return different instance for different bucket', function () {
const a = S3Persistor._getClientForBucket('foo')
const b = S3Persistor._getClientForBucket('bar')
expect(a).to.not.equal(b)
})
it('should return different instance for same bucket different computeChecksums', function () {
const a = S3Persistor._getClientForBucket('foo', false)
const b = S3Persistor._getClientForBucket('foo', true)
expect(a).to.not.equal(b)
})
})
})