From 3d7254b419fd0fb5379e749506d76c9cd17b3f7a Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Tue, 26 Nov 2024 16:06:11 +0100 Subject: [PATCH] Merge pull request #22153 from overleaf/jpa-backup-verifier-minimal [history-v1] backup-verifier-app: initial revision GitOrigin-RevId: 922c9f94cb7ca7c129e38fd6961d42bdff819cd8 --- .../src/PerProjectEncryptedS3Persistor.js | 12 + package-lock.json | 2 + package.json | 2 + services/history-v1/backup-verifier-app.mjs | 83 +++++++ services/history-v1/buildscript.txt | 2 +- .../config/custom-environment-variables.json | 1 + services/history-v1/config/test.json | 1 + .../history-v1/storage/lib/backupVerifier.mjs | 98 ++++++++ .../storage/scripts/verify_backup_blob.mjs | 21 ++ .../acceptance/js/api/backupDeletion.test.mjs | 2 +- .../acceptance/js/api/backupVerifier.test.mjs | 209 ++++++++++++++++++ ...er.mjs => test_backup_deletion_server.mjs} | 0 .../support/test_backup_verifier_server.mjs | 43 ++++ services/history-v1/tsconfig.json | 1 + 14 files changed, 475 insertions(+), 2 deletions(-) create mode 100644 services/history-v1/backup-verifier-app.mjs create mode 100644 services/history-v1/storage/lib/backupVerifier.mjs create mode 100644 services/history-v1/storage/scripts/verify_backup_blob.mjs create mode 100644 services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs rename services/history-v1/test/acceptance/js/api/support/{test_backup_server.mjs => test_backup_deletion_server.mjs} (100%) create mode 100644 services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs diff --git a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js index 9032bf1897..86ee336b93 100644 --- a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js +++ b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js @@ -174,6 +174,18 @@ class PerProjectEncryptedS3Persistor extends S3Persistor { ) } + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async forProjectRO(bucketName, path) { + return new CachedPerProjectEncryptedS3Persistor( + this, + await this.#getExistingDataEncryptionKeyOptions(bucketName, path) + ) + } + /** * @param {string} bucketName * @param {string} path diff --git a/package-lock.json b/package-lock.json index 492472de69..1ea4167c8b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -36,6 +36,8 @@ "patch-package": "^8.0.0" }, "devDependencies": { + "@types/chai": "^4.3.0", + "@types/chai-as-promised": "^7.1.8", "@types/mocha": "^10.0.6", "@typescript-eslint/eslint-plugin": "^8.0.0", "@typescript-eslint/parser": "^8.0.0", diff --git a/package.json b/package.json index 4877aa0223..6860755f03 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,8 @@ "patch-package": "^8.0.0" }, "devDependencies": { + "@types/chai": "^4.3.0", + "@types/chai-as-promised": "^7.1.8", "@types/mocha": "^10.0.6", "@typescript-eslint/eslint-plugin": "^8.0.0", "@typescript-eslint/parser": "^8.0.0", diff --git a/services/history-v1/backup-verifier-app.mjs b/services/history-v1/backup-verifier-app.mjs new file mode 100644 index 0000000000..de427a3765 --- /dev/null +++ b/services/history-v1/backup-verifier-app.mjs @@ -0,0 +1,83 @@ +// @ts-check +// Metrics must be initialized before importing anything else +import '@overleaf/metrics/initialize.js' +import http from 'node:http' +import { fileURLToPath } from 'node:url' +import { promisify } from 'node:util' +import express from 'express' +import logger from '@overleaf/logger' +import Metrics from '@overleaf/metrics' +import { + BackupCorruptedError, + healthCheck, + verifyBlob, +} from './storage/lib/backupVerifier.mjs' +import { mongodb } from './storage/index.js' +import { expressify } from '@overleaf/promise-utils' +import { Blob } from 'overleaf-editor-core' + +const app = express() + +logger.initialize('history-v1-backup-verifier') +Metrics.open_sockets.monitor() +Metrics.injectMetricsRoute(app) +app.use(Metrics.http.monitor(logger)) +Metrics.leaked_sockets.monitor(logger) +Metrics.event_loop.monitor(logger) +Metrics.memory.monitor(logger) + +app.get( + '/history/:historyId/blob/:hash/verify', + expressify(async (req, res) => { + const { historyId, hash } = req.params + try { + await verifyBlob(historyId, hash) + res.sendStatus(200) + } catch (err) { + logger.warn({ err, historyId, hash }, 'manual verify blob failed') + if (err instanceof Blob.NotFoundError) { + res.status(404).send(err.message) + } else if (err instanceof BackupCorruptedError) { + res.status(422).send(err.message) + } else { + throw err + } + } + }) +) + +app.get('/status', (req, res) => { + res.send('history-v1-backup-verifier is up') +}) + +app.get( + '/health_check', + expressify(async (req, res) => { + await healthCheck() + res.sendStatus(200) + }) +) + +app.use((err, req, res, next) => { + req.logger.addFields({ err }) + req.logger.setLevel('error') + next(err) +}) + +/** + * @param {number} port + * @return {Promise} + */ +export async function startApp(port) { + await mongodb.client.connect() + await healthCheck() + const server = http.createServer(app) + await promisify(server.listen.bind(server, port))() + return server +} + +// Run this if we're called directly +if (process.argv[1] === fileURLToPath(import.meta.url)) { + const PORT = parseInt(process.env.PORT || '3102', 10) + await startApp(PORT) +} diff --git a/services/history-v1/buildscript.txt b/services/history-v1/buildscript.txt index 32019c60c8..9ab6fff24a 100644 --- a/services/history-v1/buildscript.txt +++ b/services/history-v1/buildscript.txt @@ -7,4 +7,4 @@ history-v1 --node-version=20.18.0 --public-repo=False --script-version=4.5.0 ---tsconfig-extra-includes=backup-deletion-app.mjs,api/**/*,migrations/**/*,storage/**/* +--tsconfig-extra-includes=backup-deletion-app.mjs,backup-verifier-app.mjs,api/**/*,migrations/**/*,storage/**/* diff --git a/services/history-v1/config/custom-environment-variables.json b/services/history-v1/config/custom-environment-variables.json index d12d82d9f5..15c0a9dc01 100644 --- a/services/history-v1/config/custom-environment-variables.json +++ b/services/history-v1/config/custom-environment-variables.json @@ -63,6 +63,7 @@ "globalBlobsBucket":"BACKUP_OVERLEAF_EDITOR_GLOBAL_BLOBS_BUCKET", "projectBlobsBucket":"BACKUP_OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET" }, + "healthCheckBlobs": "HEALTH_CHECK_BLOBS", "healthCheckProjects": "HEALTH_CHECK_PROJECTS", "minSoftDeletionPeriodDays": "MIN_SOFT_DELETION_PERIOD_DAYS", "mongo": { diff --git a/services/history-v1/config/test.json b/services/history-v1/config/test.json index 89ede8bbb8..ab192b0a92 100644 --- a/services/history-v1/config/test.json +++ b/services/history-v1/config/test.json @@ -33,6 +33,7 @@ }, "tieringStorageClass": "REDUCED_REDUNDANCY" }, + "healthCheckBlobs": "[\"42/f70d7bba4ae1f07682e0358bd7a2068094fc023b\",\"000000000000000000000042/98d5521fe746bc2d11761edab5d0829bee286009\"]", "healthCheckProjects": "[\"42\",\"000000000000000000000042\"]", "maxDeleteKeys": "3", "useDeleteObjects": "false", diff --git a/services/history-v1/storage/lib/backupVerifier.mjs b/services/history-v1/storage/lib/backupVerifier.mjs new file mode 100644 index 0000000000..55247b91d7 --- /dev/null +++ b/services/history-v1/storage/lib/backupVerifier.mjs @@ -0,0 +1,98 @@ +// @ts-check +import config from 'config' +import OError from '@overleaf/o-error' +import { backupPersistor, projectBlobsBucket } from './backupPersistor.mjs' +import { Blob } from 'overleaf-editor-core' +import { BlobStore, makeProjectKey } from './blob_store/index.js' +import blobHash from './blob_hash.js' +import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js' + +/** + * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor + */ + +/** + * @param {string} historyId + * @param {string} hash + */ +export async function verifyBlob(historyId, hash) { + return await verifyBlobs(historyId, [hash]) +} + +/** + * @param {string} historyId + * @param {Array} hashes + */ +export async function verifyBlobs(historyId, hashes) { + let projectCache + try { + projectCache = await backupPersistor.forProjectRO( + projectBlobsBucket, + makeProjectKey(historyId, '') + ) + } catch (err) { + if (err instanceof NotFoundError) { + throw new BackupCorruptedError('dek does not exist', {}, err) + } + throw err + } + await verifyBlobsWithCache(historyId, projectCache, hashes) +} + +/** + * @param {string} historyId + * @param {CachedPerProjectEncryptedS3Persistor} projectCache + * @param {Array} hashes + */ +export async function verifyBlobsWithCache(historyId, projectCache, hashes) { + if (hashes.length === 0) throw new Error('bug: empty hashes') + const blobStore = new BlobStore(historyId) + for (const hash of hashes) { + const path = makeProjectKey(historyId, hash) + const blob = await blobStore.getBlob(hash) + if (!blob) throw new Blob.NotFoundError(hash) + let stream + try { + stream = await projectCache.getObjectStream(projectBlobsBucket, path, { + autoGunzip: true, + }) + } catch (err) { + if (err instanceof NotFoundError) { + throw new BackupCorruptedError('missing blob') + } + throw err + } + const backupHash = await blobHash.fromStream(blob.getByteLength(), stream) + if (backupHash !== hash) { + throw new BackupCorruptedError('hash mismatch for backed up blob', { + path, + hash, + backupHash, + }) + } + } +} + +export class BackupCorruptedError extends OError {} + +export async function healthCheck() { + /** @type {Array} */ + const HEALTH_CHECK_BLOBS = JSON.parse(config.get('healthCheckBlobs')) + if (HEALTH_CHECK_BLOBS.length !== 2) { + throw new Error('expected 2 healthCheckBlobs') + } + if (!HEALTH_CHECK_BLOBS.some(path => path.split('/')[0].length === 24)) { + throw new Error('expected mongo id in healthCheckBlobs') + } + if (!HEALTH_CHECK_BLOBS.some(path => path.split('/')[0].length < 24)) { + throw new Error('expected postgres id in healthCheckBlobs') + } + if (HEALTH_CHECK_BLOBS.some(path => path.split('/')[1]?.length !== 40)) { + throw new Error('expected hash in healthCheckBlobs') + } + + for (const path of HEALTH_CHECK_BLOBS) { + const [historyId, hash] = path.split('/') + await verifyBlob(historyId, hash) + } +} diff --git a/services/history-v1/storage/scripts/verify_backup_blob.mjs b/services/history-v1/storage/scripts/verify_backup_blob.mjs new file mode 100644 index 0000000000..504f9072a2 --- /dev/null +++ b/services/history-v1/storage/scripts/verify_backup_blob.mjs @@ -0,0 +1,21 @@ +import logger from '@overleaf/logger' +import commandLineArgs from 'command-line-args' +import { verifyBlobs } from '../lib/backupVerifier.mjs' + +const { historyId, hashes } = commandLineArgs([ + { name: 'historyId', type: String }, + { name: 'hashes', type: String, multiple: true, defaultOption: true }, +]) + +if (hashes.length === 0) { + throw new Error('missing --hashes flag') +} + +try { + await verifyBlobs(historyId, hashes) + console.log('OK') + process.exit(0) +} catch (err) { + logger.err({ err }, 'failed to verify blob') + process.exit(1) +} diff --git a/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs b/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs index d6d4ae0db9..8493f4d143 100644 --- a/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs +++ b/services/history-v1/test/acceptance/js/api/backupDeletion.test.mjs @@ -1,7 +1,7 @@ // @ts-check import cleanup from '../storage/support/cleanup.js' import fetch from 'node-fetch' -import testServer from './support/test_backup_server.mjs' +import testServer from './support/test_backup_deletion_server.mjs' import { expect } from 'chai' import testProjects from './support/test_projects.js' import { db } from '../../../../storage/lib/mongodb.js' diff --git a/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs b/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs new file mode 100644 index 0000000000..af041b0d7a --- /dev/null +++ b/services/history-v1/test/acceptance/js/api/backupVerifier.test.mjs @@ -0,0 +1,209 @@ +// @ts-check +import cleanup from '../storage/support/cleanup.js' +import fetch from 'node-fetch' +import testServer from './support/test_backup_verifier_server.mjs' +import { expect } from 'chai' +import testProjects from './support/test_projects.js' +import { + backupPersistor, + projectBlobsBucket, +} from '../../../../storage/lib/backupPersistor.mjs' +import { + BlobStore, + makeProjectKey, +} from '../../../../storage/lib/blob_store/index.js' +import Stream from 'stream' +import * as zlib from 'node:zlib' +import { promisify } from 'node:util' +import { execFile } from 'node:child_process' +import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js' + +/** + * @typedef {import("node-fetch").Response} Response + * @typedef {import("overleaf-editor-core").Blob} Blob + */ + +/** + * @param {string} historyId + * @param {string} hash + * @return {Promise<{stdout: string, status:number }>} + */ +async function verifyBlobScript(historyId, hash) { + try { + const result = await promisify(execFile)( + process.argv0, + [ + 'storage/scripts/verify_backup_blob.mjs', + `--historyId=${historyId}`, + hash, + ], + { + encoding: 'utf-8', + timeout: 5_000, + env: process.env, + } + ) + return { status: 0, stdout: result.stdout } + } catch (err) { + if (err && typeof err === 'object' && 'stdout' in err && 'code' in err) { + return { + stdout: typeof err.stdout === 'string' ? err.stdout : '', + status: typeof err.code === 'number' ? err.code : -1, + } + } + throw err + } +} +/** + * @param {string} historyId + * @param {string} hash + * @return {Promise} + */ +async function verifyBlobHTTP(historyId, hash) { + return await fetch( + testServer.testUrl(`/history/${historyId}/blob/${hash}/verify`), + { method: 'GET' } + ) +} + +/** + * @param {string} historyId + * @return {Promise} + */ +async function prepareProjectAndBlob(historyId) { + await testProjects.createEmptyProject(historyId) + const blobStore = new BlobStore(historyId) + const blob = await blobStore.putString(historyId) + const gzipped = zlib.gzipSync(Buffer.from(historyId)) + await backupPersistor.sendStream( + projectBlobsBucket, + makeProjectKey(historyId, blob.getHash()), + Stream.Readable.from([gzipped]), + { contentLength: gzipped.byteLength, contentEncoding: 'gzip' } + ) + await checkDEKExists(historyId) + return blob.getHash() +} + +/** + * @param {string} historyId + * @return {Promise} + */ +async function checkDEKExists(historyId) { + await backupPersistor.forProjectRO( + projectBlobsBucket, + makeProjectKey(historyId, '') + ) +} + +describe('backupVerifier', function () { + const historyIdPostgres = '42' + const historyIdMongo = '000000000000000000000042' + let blobHashPG, blobHashMongo, blobPathPG + + beforeEach(cleanup.everything) + beforeEach('create health check projects', async function () { + ;[blobHashPG, blobHashMongo] = await Promise.all([ + prepareProjectAndBlob('42'), + prepareProjectAndBlob('000000000000000000000042'), + ]) + blobPathPG = makeProjectKey(historyIdPostgres, blobHashPG) + }) + beforeEach(testServer.listenOnRandomPort) + + it('renders 200 on /status', async function () { + const response = await fetch(testServer.testUrl('/status')) + expect(response.status).to.equal(200) + }) + + it('renders 200 on /health_check', async function () { + const response = await fetch(testServer.testUrl('/health_check')) + expect(response.status).to.equal(200) + }) + describe('storage/scripts/verify_backup_blob.mjs', function () { + it('throws and does not create DEK if missing', async function () { + const historyId = '404' + const hash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const response = await verifyBlobScript(historyId, hash) + expect(response.status).to.equal(1) + expect(response.stdout).to.include('dek does not exist') + await expect(checkDEKExists(historyId)).to.be.rejectedWith(NotFoundError) + }) + it('throws when deleted in db', async function () { + const blobStore = new BlobStore(historyIdPostgres) + await blobStore.deleteBlobs() + const response = await verifyBlobScript(historyIdPostgres, blobHashPG) + expect(response.status).to.equal(1) + expect(response.stdout).to.include(`blob ${blobHashPG} not found`) + }) + it('throws when not existing', async function () { + await backupPersistor.deleteObject(projectBlobsBucket, blobPathPG) + const result = await verifyBlobScript(historyIdPostgres, blobHashPG) + expect(result.status).to.equal(1) + expect(result.stdout).to.include('missing blob') + }) + it('throws when corrupted', async function () { + await backupPersistor.sendStream( + projectBlobsBucket, + blobPathPG, + Stream.Readable.from(['something else']), + { contentLength: 14 } + ) + const result = await verifyBlobScript(historyIdPostgres, blobHashPG) + expect(result.status).to.equal(1) + expect(result.stdout).to.include('hash mismatch for backed up blob') + }) + it('should successfully verify from postgres', async function () { + const result = await verifyBlobScript(historyIdPostgres, blobHashPG) + expect(result.status).to.equal(0) + expect(result.stdout.split('\n')).to.include('OK') + }) + it('should successfully verify from mongo', async function () { + const result = await verifyBlobScript(historyIdMongo, blobHashMongo) + expect(result.status).to.equal(0) + expect(result.stdout.split('\n')).to.include('OK') + }) + }) + describe('GET /history/:historyId/blob/:hash/verify', function () { + it('returns 404 when deleted in db', async function () { + const blobStore = new BlobStore(historyIdPostgres) + await blobStore.deleteBlobs() + const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG) + expect(response.status).to.equal(404) + expect(await response.text()).to.equal(`blob ${blobHashPG} not found`) + }) + it('returns 422 and does not create DEK if missing', async function () { + const historyId = '404' + const hash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const response = await verifyBlobHTTP(historyId, hash) + expect(response.status).to.equal(422) + expect(await response.text()).to.equal('dek does not exist') + await expect(checkDEKExists(historyId)).to.be.rejectedWith(NotFoundError) + }) + it('returns 422 when not existing', async function () { + await backupPersistor.deleteObject(projectBlobsBucket, blobPathPG) + const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG) + expect(response.status).to.equal(422) + expect(await response.text()).to.equal('missing blob') + }) + it('returns 422 when corrupted', async function () { + await backupPersistor.sendStream( + projectBlobsBucket, + blobPathPG, + Stream.Readable.from(['something else']), + { contentLength: 14 } + ) + const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG) + expect(response.status).to.equal(422) + expect(await response.text()).to.equal('hash mismatch for backed up blob') + }) + it('should successfully verify from postgres', async function () { + const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG) + expect(response.status).to.equal(200) + }) + it('should successfully verify from mongo', async function () { + const response = await verifyBlobHTTP(historyIdMongo, blobHashMongo) + expect(response.status).to.equal(200) + }) + }) +}) diff --git a/services/history-v1/test/acceptance/js/api/support/test_backup_server.mjs b/services/history-v1/test/acceptance/js/api/support/test_backup_deletion_server.mjs similarity index 100% rename from services/history-v1/test/acceptance/js/api/support/test_backup_server.mjs rename to services/history-v1/test/acceptance/js/api/support/test_backup_deletion_server.mjs diff --git a/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs b/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs new file mode 100644 index 0000000000..10d6dbc6c1 --- /dev/null +++ b/services/history-v1/test/acceptance/js/api/support/test_backup_verifier_server.mjs @@ -0,0 +1,43 @@ +// @ts-check +import { startApp } from '../../../../../backup-verifier-app.mjs' + +/** @type {import("http").Server} */ +let server + +/** + * @param {string} pathname + * @return {string} + */ +function testUrl(pathname) { + const url = new URL('http://127.0.0.1') + const addr = server.address() + if (addr && typeof addr === 'object') { + url.port = addr.port.toString() + } + url.pathname = pathname + return url.toString() +} + +async function listenOnRandomPort() { + if (server) return // already running + for (let i = 0; i < 10; i++) { + try { + server = await startApp(0) + return + } catch {} + } + server = await startApp(0) +} + +after('close server', function (done) { + if (server) { + server.close(done) + } else { + done() + } +}) + +export default { + testUrl, + listenOnRandomPort, +} diff --git a/services/history-v1/tsconfig.json b/services/history-v1/tsconfig.json index a0d23b3aab..0688609f41 100644 --- a/services/history-v1/tsconfig.json +++ b/services/history-v1/tsconfig.json @@ -5,6 +5,7 @@ "app.js", "app/js/**/*", "backup-deletion-app.mjs", + "backup-verifier-app.mjs", "benchmarks/**/*", "config/**/*", "migrations/**/*",