From 3a35b8680ee22f1354fe98691b66157089273c56 Mon Sep 17 00:00:00 2001 From: Miguel Serrano Date: Tue, 7 Oct 2025 12:56:55 +0200 Subject: [PATCH] Merge pull request #28554 from overleaf/msm-force-s3-lib-storage-uploads [object-persistor] Use `@aws-sdk/lib-storage` for all uploads GitOrigin-RevId: ab8e54a7bae843f9e6b05ed9cf936130a36b8c2f --- libraries/object-persistor/README.md | 11 +-- .../src/MigrationPersistor.js | 14 +--- .../src/PerProjectEncryptedS3Persistor.js | 1 - libraries/object-persistor/src/S3Persistor.js | 74 ++++++------------- .../test/unit/MigrationPersistorTests.js | 20 +---- .../test/unit/S3ClientMock.js | 6 -- .../test/unit/S3PersistorTests.js | 49 ++++-------- services/docstore/app/js/DocArchiveManager.js | 46 +++++++----- .../test/unit/js/DocArchiveManagerTests.js | 19 +++++ services/filestore/docker-compose.ci.yml | 21 +++++- services/filestore/docker-compose.yml | 21 +++++- services/history-v1/docker-compose.ci.yml | 21 +++++- services/history-v1/docker-compose.yml | 21 +++++- .../history-v1/storage/lib/backupBlob.mjs | 7 -- .../storage/lib/backupPersistor.mjs | 1 - .../history-v1/storage/scripts/backup.mjs | 2 - .../acceptance/js/api/backupVerifier.test.mjs | 2 - 17 files changed, 159 insertions(+), 177 deletions(-) diff --git a/libraries/object-persistor/README.md b/libraries/object-persistor/README.md index 346672d8c5..23bb05b361 100644 --- a/libraries/object-persistor/README.md +++ b/libraries/object-persistor/README.md @@ -46,7 +46,7 @@ Uploads a stream to the backend. - `key`: The key for the uploaded object - `readStream`: The data stream to upload - `opts` (optional): - - `sourceMd5`: The md5 hash of the source data, if known. The uploaded data will be compared against this and the operation will fail if it does not match. If omitted, the md5 is calculated as the data is uploaded instead, and verified against the backend. + - `sourceMd5`: The md5 hash of the source data, if known. The uploaded data will be compared against this and the operation will fail if it does not match. If omitted, the md5 is calculated as the data is uploaded instead, and verified against the backend. This is not supported in `S3Persistor` as it performs [its own integrity protections](https://aws.amazon.com/blogs/aws/introducing-default-data-integrity-protections-for-new-objects-in-amazon-s3/). Setting `sourceMd5` with `S3Persistor` will result in an error being thrown. - `contentType`: The content type to write in the object metadata - `contentEncoding`: The content encoding to write in the object metadata @@ -266,7 +266,8 @@ For the `FS` persistor, the `bucketName` should be the full path to the folder o - `s3.partSize`: The part size for S3 uploads. Defaults to 100 megabytes. - `s3.httpOptions`: HTTP Options passed to the [`NodeHttpHandler` constructor](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/Package/-smithy-node-http-handler/Class/NodeHttpHandler/) - For backwards compatibility reasons, the `timeout` property that was passed to the [S3 constructor](https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#constructor-property) before migrating to AWS SDK v3 is now passed to the `NodeHttpHandler` constructor as `connectionTimeout`. -- `s3.maxRetries`: The number of times the S3 client will retry in case of an error +- `s3.maxRetries` (legacy): The number of times the S3 client will retry in case of an error +- `s3.maxAttempts`: The number of times the S3 client will attempt to perform the operation in case there are errors. Default value is 3. - `s3.endpoint`: For testing - overrides the S3 endpoint to use a different service (e.g. a fake S3 server) - `s3.pathStyle`: For testing - use old path-style URLs, for services that do not support subdomain-based access @@ -281,12 +282,6 @@ For the `FS` persistor, the `bucketName` should be the full path to the folder o } ``` -#### Notes - -In order for server-side MD5 generation to work, uploads must be below the `partSize`. Otherwise a multipart upload will be used, and the S3 `eTag` which is used to retrieve the MD5 will not be the MD5 hash of the uploaded object. In these cases, we download the data and calculate the MD5 manually. - -For verification during upload, we use S3's checksum mechanism to verify the integrity of the uploaded data, but when explicitly retrieving the md5 hash this will download the entire object if its size is above the part size. - ### GCS-specific parameters GCS authentication is configured automatically via the local service account, or the `GOOGLE_APPLICATION_CREDENTIALS` environment variable. diff --git a/libraries/object-persistor/src/MigrationPersistor.js b/libraries/object-persistor/src/MigrationPersistor.js index 0cb665fa00..d70f6e550a 100644 --- a/libraries/object-persistor/src/MigrationPersistor.js +++ b/libraries/object-persistor/src/MigrationPersistor.js @@ -171,19 +171,7 @@ module.exports = class MigrationPersistor extends AbstractPersistor { destKey ) { try { - let sourceMd5 - try { - sourceMd5 = await this.fallbackPersistor.getObjectMd5Hash( - sourceBucket, - sourceKey - ) - } catch (err) { - Logger.warn(err, 'error getting md5 hash from fallback persistor') - } - - await this.primaryPersistor.sendStream(destBucket, destKey, stream, { - sourceMd5, - }) + await this.primaryPersistor.sendStream(destBucket, destKey, stream) } catch (err) { const error = new WriteError( 'unable to copy file to destination persistor', diff --git a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js index bcdedf7979..946ff0d480 100644 --- a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js +++ b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js @@ -433,7 +433,6 @@ class CachedPerProjectEncryptedS3Persistor { * @param {number} [opts.contentLength] * @param {'*'} [opts.ifNoneMatch] * @param {SSECOptions} [opts.ssecOptions] - * @param {string} [opts.sourceMd5] * @return {Promise} */ async sendStream(bucketName, path, sourceStream, opts = {}) { diff --git a/libraries/object-persistor/src/S3Persistor.js b/libraries/object-persistor/src/S3Persistor.js index ec26ce0823..eac758d760 100644 --- a/libraries/object-persistor/src/S3Persistor.js +++ b/libraries/object-persistor/src/S3Persistor.js @@ -81,9 +81,6 @@ class S3Persistor extends AbstractPersistor { /** @type {Map} */ #clients = new Map() - /** @type {Map} */ - #noRetryClients = new Map() - constructor(settings = {}) { super() @@ -111,7 +108,6 @@ class S3Persistor extends AbstractPersistor { * @param {number} [opts.contentLength] * @param {'*'} [opts.ifNoneMatch] * @param {SSECOptions} [opts.ssecOptions] - * @param {string} [opts.sourceMd5] * @return {Promise} */ async sendStream(bucketName, key, readStream, opts = {}) { @@ -136,6 +132,12 @@ class S3Persistor extends AbstractPersistor { uploadOptions.StorageClass = this.settings.storageClass[bucketName] } + if ('sourceMd5' in opts) { + // we fail straight away to prevent the client from wasting CPU/IO precomputing the hash + throw new Error( + 'sourceMd5 option is not supported, S3 provides its own integrity protection mechanism' + ) + } if (opts.contentType) { uploadOptions.ContentType = opts.contentType } @@ -152,32 +154,12 @@ class S3Persistor extends AbstractPersistor { Object.assign(uploadOptions, opts.ssecOptions.getPutOptions()) } - // if we have an md5 hash, pass this to S3 to verify the upload - otherwise - // we rely on the S3 client's checksum calculation to validate the upload - let computeChecksums = false - if (opts.sourceMd5) { - uploadOptions.ContentMD5 = PersistorHelper.hexToBase64(opts.sourceMd5) - } else { - computeChecksums = true - } - - if (this.settings.disableMultiPartUpload) { - // retries are disabled for `PutObjectCommand` when using streams - // https://github.com/aws/aws-sdk-js-v3/issues/6770 - const noRetry = true - await this._getClientForBucket( - bucketName, - computeChecksums, - noRetry - ).send(new PutObjectCommand(uploadOptions)) - } else { - const upload = new Upload({ - client: this._getClientForBucket(bucketName, computeChecksums), - params: uploadOptions, - partSize: this.settings.partSize, - }) - await upload.done() - } + const upload = new Upload({ + client: this._getClientForBucket(bucketName), + params: uploadOptions, + partSize: this.settings.partSize, + }) + await upload.done() } catch (err) { throw PersistorHelper.wrapError( err, @@ -555,34 +537,31 @@ class S3Persistor extends AbstractPersistor { /** * @param {string} bucket - * @param {boolean} computeChecksums - * @param {boolean} noRetries * @return {S3Client} * @private */ - _getClientForBucket(bucket, computeChecksums = false, noRetries = false) { + _getClientForBucket(bucket) { /** @type {import('@aws-sdk/client-s3').S3ClientConfig} */ const clientOptions = {} - const cacheKey = `${bucket}:${computeChecksums}` - if (computeChecksums) { - clientOptions.requestChecksumCalculation = 'WHEN_SUPPORTED' - clientOptions.responseChecksumValidation = 'WHEN_SUPPORTED' - } + const cacheKey = bucket - const clientMap = noRetries ? this.#noRetryClients : this.#clients - let client = clientMap.get(cacheKey) + let client = this.#clients.get(cacheKey) if (!client) { client = new S3Client( this._buildClientOptions( this.settings.bucketCreds?.[bucket], - clientOptions, - noRetries + clientOptions ) ) - clientMap.set(cacheKey, client) + this.#clients.set(cacheKey, client) + // Some third-party S3-compatible services (as MinIO) do not support the default checksums + // for object integrity in `DeleteObjectsCommand`. We add a fallback for those cases. // https://github.com/aws/aws-sdk-js-v3/blob/main/supplemental-docs/MD5_FALLBACK.md - addMd5Middleware(client) + const useMd5Fallback = process.env.DELETE_OBJECTS_MD5_FALLBACK === 'true' + if (useMd5Fallback) { + addMd5Middleware(client) + } } return client @@ -591,11 +570,10 @@ class S3Persistor extends AbstractPersistor { /** * @param {Object} bucketCredentials * @param {import('@aws-sdk/client-s3').S3ClientConfig} clientOptions - * @param {boolean} noRetries * @return {import('@aws-sdk/client-s3').S3ClientConfig} * @private */ - _buildClientOptions(bucketCredentials, clientOptions, noRetries = false) { + _buildClientOptions(bucketCredentials, clientOptions) { const options = clientOptions || {} if (bucketCredentials) { @@ -635,10 +613,6 @@ class S3Persistor extends AbstractPersistor { options.maxAttempts = this.settings.maxRetries + 1 } - if (noRetries) { - options.maxAttempts = 1 - } - const requestHandlerParams = this.settings.httpOptions || {} if (sslEnabled && this.settings.ca) { diff --git a/libraries/object-persistor/test/unit/MigrationPersistorTests.js b/libraries/object-persistor/test/unit/MigrationPersistorTests.js index a37aa537af..7af8f6db6d 100644 --- a/libraries/object-persistor/test/unit/MigrationPersistorTests.js +++ b/libraries/object-persistor/test/unit/MigrationPersistorTests.js @@ -197,19 +197,11 @@ describe('MigrationPersistorTests', function () { expect(fallbackPersistor.getObjectStream).to.have.been.calledOnce }) - it('should get the md5 hash from the source', function () { - expect(fallbackPersistor.getObjectMd5Hash).to.have.been.calledWith( - fallbackBucket, - key - ) - }) - it('should send a stream to the primary', function () { expect(primaryPersistor.sendStream).to.have.been.calledWithExactly( bucket, key, - sinon.match.instanceOf(Stream.PassThrough), - { sourceMd5: md5 } + sinon.match.instanceOf(Stream.PassThrough) ) }) @@ -476,19 +468,11 @@ describe('MigrationPersistorTests', function () { ).not.to.have.been.calledWithExactly(fallbackBucket, key) }) - it('should get the md5 hash from the source', function () { - expect(fallbackPersistor.getObjectMd5Hash).to.have.been.calledWith( - fallbackBucket, - key - ) - }) - it('should send the file to the primary', function () { expect(primaryPersistor.sendStream).to.have.been.calledWithExactly( bucket, destKey, - sinon.match.instanceOf(Stream.PassThrough), - { sourceMd5: md5 } + sinon.match.instanceOf(Stream.PassThrough) ) }) }) diff --git a/libraries/object-persistor/test/unit/S3ClientMock.js b/libraries/object-persistor/test/unit/S3ClientMock.js index b7ed048622..25529da396 100644 --- a/libraries/object-persistor/test/unit/S3ClientMock.js +++ b/libraries/object-persistor/test/unit/S3ClientMock.js @@ -108,11 +108,5 @@ module.exports = function () { this.payload = payload } }, - PutObjectCommand: class PutObjectCommand { - constructor(payload) { - this.name = 'PutObjectCommand' - this.payload = payload - } - }, } } diff --git a/libraries/object-persistor/test/unit/S3PersistorTests.js b/libraries/object-persistor/test/unit/S3PersistorTests.js index 3255eb6435..944ac4504d 100644 --- a/libraries/object-persistor/test/unit/S3PersistorTests.js +++ b/libraries/object-persistor/test/unit/S3PersistorTests.js @@ -549,27 +549,6 @@ describe('S3PersistorTests', function () { }) }) - describe('when a hash is supplied', function () { - beforeEach(async function () { - await S3Persistor.sendStream(bucket, key, ReadStream, { - sourceMd5: 'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb', - }) - }) - - it('sends the hash in base64', function () { - expect(awsLibStorageUpload).to.have.been.calledWith({ - client: S3.s3ClientStub, - params: { - Bucket: bucket, - Key: key, - Body: sinon.match.instanceOf(Transform), - ContentMD5: 'qqqqqru7u7uqqqqqu7u7uw==', - }, - partSize: 100 * 1024 * 1024, - }) - }) - }) - describe('when metadata is supplied', function () { const contentType = 'text/csv' const contentEncoding = 'gzip' @@ -596,23 +575,23 @@ describe('S3PersistorTests', function () { }) }) - describe('when multipart upload is disabled', function () { - const contentType = 'text/csv' - const contentEncoding = 'gzip' - + describe('with sourceMd5 option', function () { + let error beforeEach(async function () { - S3.mockSend(S3.PutObjectCommand, {}) - settings.disableMultiPartUpload = true - await S3Persistor.sendStream(bucket, key, ReadStream, { - contentType, - contentEncoding, - }) + try { + await S3Persistor.sendStream(bucket, key, ReadStream, { + sourceMd5: 'ffffffff', + }) + } catch (err) { + error = err + } }) - it('configures the options to not to retry requests', function () { - expect(S3.S3Client).to.have.been.calledWithMatch({ - maxAttempts: 1, - }) + it('should throw an error', function () { + expect(error.message).to.equal('upload to S3 failed') + expect(error.cause.message).to.equal( + 'sourceMd5 option is not supported, S3 provides its own integrity protection mechanism' + ) }) }) diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js index d03ee161a8..58f600eb37 100644 --- a/services/docstore/app/js/DocArchiveManager.js +++ b/services/docstore/app/js/DocArchiveManager.js @@ -74,11 +74,15 @@ async function archiveDoc(projectId, docId) { throw error } - const md5 = crypto.createHash('md5').update(json).digest('hex') const stream = new ReadableString(json) - await PersistorManager.sendStream(Settings.docstore.bucket, key, stream, { - sourceMd5: md5, - }) + if (Settings.docstore.backend === 's3') { + await PersistorManager.sendStream(Settings.docstore.bucket, key, stream) + } else { + await PersistorManager.sendStream(Settings.docstore.bucket, key, stream, { + sourceMd5: crypto.createHash('md5').update(json).digest('hex'), + }) + } + await MongoManager.markDocAsArchived(projectId, docId, doc.rev) } @@ -112,25 +116,31 @@ async function unArchiveAllDocs(projectId) { // get the doc from the PersistorManager without storing it in mongo async function getDoc(projectId, docId) { const key = `${projectId}/${docId}` - const sourceMd5 = await PersistorManager.getObjectMd5Hash( - Settings.docstore.bucket, - key - ) const stream = await PersistorManager.getObjectStream( Settings.docstore.bucket, key ) - stream.resume() - const buffer = await streamToBuffer(projectId, docId, stream) - const md5 = crypto.createHash('md5').update(buffer).digest('hex') - if (sourceMd5 !== md5) { - throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', { - key, - sourceMd5, - md5, - }) - } + let buffer + if (Settings.docstore.backend === 's3') { + stream.resume() + buffer = await streamToBuffer(projectId, docId, stream) + } else { + const sourceMd5 = await PersistorManager.getObjectMd5Hash( + Settings.docstore.bucket, + key + ) + stream.resume() + buffer = await streamToBuffer(projectId, docId, stream) + const md5 = crypto.createHash('md5').update(buffer).digest('hex') + if (sourceMd5 !== md5) { + throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', { + key, + sourceMd5, + md5, + }) + } + } return _deserializeArchivedDoc(buffer) } diff --git a/services/docstore/test/unit/js/DocArchiveManagerTests.js b/services/docstore/test/unit/js/DocArchiveManagerTests.js index 2ec1cb2016..e7ac4f6fe8 100644 --- a/services/docstore/test/unit/js/DocArchiveManagerTests.js +++ b/services/docstore/test/unit/js/DocArchiveManagerTests.js @@ -232,6 +232,25 @@ describe('DocArchiveManager', function () { ) }) + describe('with S3 persistor', function () { + beforeEach(async function () { + Settings.docstore.backend = 's3' + await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id) + }) + + it('should not calculate the hex md5 sum of the content', function () { + expect(Crypto.createHash).not.to.have.been.called + expect(HashUpdate).not.to.have.been.called + expect(HashDigest).not.to.have.been.called + }) + + it('should not pass an md5 hash to the object persistor for verification', function () { + expect(PersistorManager.sendStream).not.to.have.been.calledWithMatch({ + sourceMd5: sinon.match.any, + }) + }) + }) + it('should pass the correct bucket and key to the persistor', async function () { await DocArchiveManager.archiveDoc(projectId, mongoDocs[0]._id) diff --git a/services/filestore/docker-compose.ci.yml b/services/filestore/docker-compose.ci.yml index ca1e86c17d..5dfcd5f115 100644 --- a/services/filestore/docker-compose.ci.yml +++ b/services/filestore/docker-compose.ci.yml @@ -25,6 +25,7 @@ services: POSTGRES_HOST: postgres AWS_S3_ENDPOINT: https://minio:9000 AWS_S3_PATH_STYLE: 'true' + DELETE_OBJECTS_MD5_FALLBACK: true AWS_ACCESS_KEY_ID: OVERLEAF_FILESTORE_S3_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY: OVERLEAF_FILESTORE_S3_SECRET_ACCESS_KEY MINIO_ROOT_USER: MINIO_ROOT_USER @@ -138,7 +139,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::fake-user-files/*" }, @@ -154,7 +158,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::fake-user-files-dek/*" }, @@ -170,7 +177,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::fake-template-files/*" }, @@ -186,7 +196,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::random-bucket-*" } diff --git a/services/filestore/docker-compose.yml b/services/filestore/docker-compose.yml index 0eef4aecbc..b866903ad6 100644 --- a/services/filestore/docker-compose.yml +++ b/services/filestore/docker-compose.yml @@ -42,6 +42,7 @@ services: POSTGRES_HOST: postgres AWS_S3_ENDPOINT: https://minio:9000 AWS_S3_PATH_STYLE: 'true' + DELETE_OBJECTS_MD5_FALLBACK: true AWS_ACCESS_KEY_ID: OVERLEAF_FILESTORE_S3_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY: OVERLEAF_FILESTORE_S3_SECRET_ACCESS_KEY MINIO_ROOT_USER: MINIO_ROOT_USER @@ -146,7 +147,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::fake-user-files/*" }, @@ -162,7 +166,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::fake-user-files-dek/*" }, @@ -178,7 +185,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::fake-template-files/*" }, @@ -194,7 +204,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::random-bucket-*" } diff --git a/services/history-v1/docker-compose.ci.yml b/services/history-v1/docker-compose.ci.yml index 3d1314663c..99b67c60d7 100644 --- a/services/history-v1/docker-compose.ci.yml +++ b/services/history-v1/docker-compose.ci.yml @@ -41,6 +41,7 @@ services: POSTGRES_HOST: postgres AWS_S3_ENDPOINT: https://minio:9000 AWS_S3_PATH_STYLE: 'true' + DELETE_OBJECTS_MD5_FALLBACK: true AWS_ACCESS_KEY_ID: OVERLEAF_HISTORY_S3_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY: OVERLEAF_HISTORY_S3_SECRET_ACCESS_KEY MINIO_ROOT_USER: MINIO_ROOT_USER @@ -188,7 +189,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-chunks/*" }, @@ -204,7 +208,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-deks/*" }, @@ -220,7 +227,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-global-blobs/*" }, @@ -236,7 +246,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-project-blobs/*" } diff --git a/services/history-v1/docker-compose.yml b/services/history-v1/docker-compose.yml index a4d79d39d2..3b81bda8c5 100644 --- a/services/history-v1/docker-compose.yml +++ b/services/history-v1/docker-compose.yml @@ -58,6 +58,7 @@ services: POSTGRES_HOST: postgres AWS_S3_ENDPOINT: https://minio:9000 AWS_S3_PATH_STYLE: 'true' + DELETE_OBJECTS_MD5_FALLBACK: true AWS_ACCESS_KEY_ID: OVERLEAF_HISTORY_S3_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY: OVERLEAF_HISTORY_S3_SECRET_ACCESS_KEY MINIO_ROOT_USER: MINIO_ROOT_USER @@ -196,7 +197,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-chunks/*" }, @@ -212,7 +216,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-deks/*" }, @@ -228,7 +235,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-global-blobs/*" }, @@ -244,7 +254,10 @@ services: "Action": [ "s3:PutObject", "s3:GetObject", - "s3:DeleteObject" + "s3:DeleteObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts", + "s3:ListBucketMultipartUploads" ], "Resource": "arn:aws:s3:::overleaf-test-history-project-blobs/*" } diff --git a/services/history-v1/storage/lib/backupBlob.mjs b/services/history-v1/storage/lib/backupBlob.mjs index 8ae1a6a901..89625c6ed6 100644 --- a/services/history-v1/storage/lib/backupBlob.mjs +++ b/services/history-v1/storage/lib/backupBlob.mjs @@ -70,7 +70,6 @@ export async function downloadBlobToDir(historyId, blob, tmpDir) { * @return {Promise} */ export async function uploadBlobToBackup(historyId, blob, path, persistor) { - const md5 = Crypto.createHash('md5') const filePathCompressed = path + '.gz' let backupSource let contentEncoding @@ -86,7 +85,6 @@ export async function uploadBlobToBackup(historyId, blob, path, persistor) { async function* (source) { for await (const chunk of source) { size += chunk.byteLength - md5.update(chunk) yield chunk } }, @@ -97,10 +95,6 @@ export async function uploadBlobToBackup(historyId, blob, path, persistor) { } else { backupSource = path size = blob.getByteLength() - await Stream.promises.pipeline( - fs.createReadStream(path, { highWaterMark: HIGHWATER_MARK }), - md5 - ) } const key = makeProjectKey(historyId, blob.getHash()) await persistor.sendStream( @@ -111,7 +105,6 @@ export async function uploadBlobToBackup(historyId, blob, path, persistor) { contentEncoding, contentType: 'application/octet-stream', contentLength: size, - sourceMd5: md5.digest('hex'), ifNoneMatch: '*', } ) diff --git a/services/history-v1/storage/lib/backupPersistor.mjs b/services/history-v1/storage/lib/backupPersistor.mjs index 8f80e5faaf..d13d49ff4e 100644 --- a/services/history-v1/storage/lib/backupPersistor.mjs +++ b/services/history-v1/storage/lib/backupPersistor.mjs @@ -104,7 +104,6 @@ async function getRootKeyEncryptionKeys() { export const backupPersistor = new PerProjectEncryptedS3Persistor({ ...persistorConfig.s3SSEC, - disableMultiPartUpload: true, dataEncryptionKeyBucketName: deksBucket, pathToProjectFolder, getRootKeyEncryptionKeys, diff --git a/services/history-v1/storage/scripts/backup.mjs b/services/history-v1/storage/scripts/backup.mjs index 8cbbadfe12..f1019f5142 100644 --- a/services/history-v1/storage/scripts/backup.mjs +++ b/services/history-v1/storage/scripts/backup.mjs @@ -221,7 +221,6 @@ async function backupChunk( } const key = makeChunkKey(historyId, chunkToBackup.startVersion) logger.debug({ chunkRecord, historyId, projectId, key }, 'backing up chunk') - const md5 = Crypto.createHash('md5').update(chunkBuffer) await chunkBackupPersistorForProject.sendStream( chunksBucket, makeChunkKey(historyId, chunkToBackup.startVersion), @@ -230,7 +229,6 @@ async function backupChunk( contentType: 'application/json', contentEncoding: 'gzip', contentLength: chunkBuffer.byteLength, - sourceMd5: md5.digest('hex'), } ) } diff --git a/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs b/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs index 906eac2cb9..c025a6dd10 100644 --- a/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs +++ b/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs @@ -131,7 +131,6 @@ async function backupChunk(historyId) { historyId, newChunkMetadata.id ) - const md5 = Crypto.createHash('md5').update(chunkBuffer) await backupPersistor.sendStream( chunksBucket, path.join( @@ -143,7 +142,6 @@ async function backupChunk(historyId) { contentType: 'application/json', contentEncoding: 'gzip', contentLength: chunkBuffer.byteLength, - sourceMd5: md5.digest('hex'), } ) }