diff --git a/services/history-v1/storage/scripts/redact.mjs b/services/history-v1/storage/scripts/redact.mjs new file mode 100644 index 0000000000..3b63af8eb1 --- /dev/null +++ b/services/history-v1/storage/scripts/redact.mjs @@ -0,0 +1,144 @@ +import fs from 'node:fs' +import { Readable } from 'node:stream' +import { createRequire } from 'node:module' +import * as readline from 'node:readline/promises' +import commandLineArgs from 'command-line-args' +import { makeProjectKey } from '../lib/blob_store/index.js' +import { client } from '../lib/mongodb.js' +import knex from '../lib/knex.js' +import redis from '../lib/redis.js' + +const require = createRequire(import.meta.url) +const config = require('config') +const persistor = require('../lib/persistor.js') +const { Errors } = require('@overleaf/object-persistor') + +const optionDefinitions = [ + { name: 'historyId', alias: 'p', type: String }, + { name: 'blob', alias: 'b', type: String }, + { name: 'file', alias: 'f', type: String }, + { name: 'empty', alias: 'e', type: Boolean }, + { name: 'delete', alias: 'd', type: Boolean }, + { name: 'yes', alias: 'y', type: Boolean }, + { name: 'message', alias: 'm', type: String }, +] + +async function replaceBlob(historyId, blobHash, options) { + const bucket = config.get('blobStore.projectBucket') + const key = makeProjectKey(historyId, blobHash) + + // 1. Check existence + let originalSize + try { + originalSize = await persistor.getObjectSize(bucket, key) + console.log(`Found blob ${blobHash} of size ${originalSize} bytes`) + } catch (err) { + if ( + err instanceof Errors.NotFoundError || + err.code === 'NoSuchKey' || + err.name === 'NoSuchKey' + ) { + throw new Error(`Blob ${blobHash} not found in project ${historyId}`) + } + throw err + } + + // 2. Prepare action + let stream + let streamSize + let actionDesc + if (!options.delete) { + if (options.empty) { + stream = Readable.from([]) + streamSize = 0 + actionDesc = 'empty file' + } else if (options.file) { + const stat = fs.statSync(options.file) + stream = fs.createReadStream(options.file) + streamSize = stat.size + actionDesc = `file ${options.file}` + } else { + const baseMessage = options.message || 'REDACTED' + const msg = `${baseMessage} ${new Date().toISOString()}` + const buf = Buffer.from(msg, 'utf8') + stream = Readable.from([buf]) + streamSize = buf.length + actionDesc = `message "${msg}"` + } + } + + const actionLog = options.delete + ? `Deleting blob ${blobHash} in ${historyId}` + : `Replacing blob ${blobHash} in ${historyId} with ${actionDesc} (${streamSize} bytes)` + + console.log(actionLog) + + if (!options.yes) { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + const answer = await rl.question('Proceed (Y/N)? ') + rl.close() + if (answer.toLowerCase() !== 'y') { + console.log('Aborted.') + return + } + } + + // 3. Execute action + if (options.delete) { + await persistor.deleteObject(bucket, key) + console.log('Blob deleted successfully.') + } else { + await persistor.sendStream(bucket, key, stream, { + contentType: 'application/octet-stream', + contentLength: streamSize, + }) + console.log('Blob replaced successfully.') + } +} + +async function main() { + const options = commandLineArgs(optionDefinitions) + if (!options.historyId) { + console.error('Error: --historyId is required.') + process.exit(1) + } + if (!options.blob) { + console.error('Error: --blob is required.') + process.exit(1) + } + + const activeModes = [ + options.delete ? '--delete' : null, + options.empty ? '--empty' : null, + options.file ? '--file' : null, + options.message !== undefined ? '--message' : null, + ].filter(Boolean) + + if (activeModes.length > 1) { + console.error( + `Error: Conflicting options provided (${activeModes.join( + ', ' + )}). Please select exactly one redaction mode.` + ) + process.exit(1) + } + + await replaceBlob(options.historyId, options.blob, options) +} + +main() + .then(() => console.log('Done.')) + .catch(err => { + console.error('Error:', err.message) + process.exit(1) + }) + .finally(() => { + knex.destroy().catch(err => console.error('Error closing Postgres:', err)) + client.close().catch(err => console.error('Error closing MongoDB:', err)) + redis + .disconnect() + .catch(err => console.error('Error disconnecting Redis:', err)) + }) diff --git a/services/history-v1/test/acceptance/js/storage/redact_blob.test.mjs b/services/history-v1/test/acceptance/js/storage/redact_blob.test.mjs new file mode 100644 index 0000000000..10696e261a --- /dev/null +++ b/services/history-v1/test/acceptance/js/storage/redact_blob.test.mjs @@ -0,0 +1,219 @@ +import { expect } from 'chai' +import { promisify } from 'node:util' +import { execFile } from 'node:child_process' +import { ObjectId } from 'mongodb' + +import { BlobStore } from '../../../../storage/lib/blob_store/index.js' +import cleanup from './support/cleanup.js' + +describe('redact.mjs script', function () { + const TIMEOUT = 20 * 1000 + + beforeEach(cleanup.everything) + + async function runScript(args = []) { + let result + try { + result = await promisify(execFile)( + process.argv0, + ['storage/scripts/redact.mjs', ...args], + { + encoding: 'utf-8', + timeout: TIMEOUT, + env: { + ...process.env, + LOG_LEVEL: 'warn', + }, + } + ) + result.status = 0 + } catch (err) { + const { stdout, stderr, code } = err + if (typeof code !== 'number') { + console.log(err) + } + result = { stdout, stderr, status: code } + } + return result + } + + it('should redact one blob completely (via delete) and leave other unmodified', async function () { + const historyId = new ObjectId().toString() + const blobStore = new BlobStore(historyId) + + const blob1 = await blobStore.putString('Confidential data') + const blob2 = await blobStore.putString('Public data') + + const hash1 = blob1.getHash() + const hash2 = blob2.getHash() + + // Redact blob1 completely + const result = await runScript([ + '--historyId', + historyId, + '--blob', + hash1, + '--delete', + '--yes', + ]) + + expect(result.status).to.equal(0) + expect(result.stdout).to.include(`Deleting blob ${hash1}`) + + // Check blob1 is absent using getStream (as getString can mask specific NotFoundError) + let fetchError + try { + await blobStore.getStream(hash1) + } catch (err) { + fetchError = err + } + expect(fetchError).to.exist + expect(fetchError.message).to.match(/not found/i) + + // Check blob2 is unmodified + const publicContent = await blobStore.getString(hash2) + expect(publicContent).to.equal('Public data') + }) + + it('should redact a blob with a default message if no flag is provided', async function () { + const historyId = new ObjectId().toString() + const blobStore = new BlobStore(historyId) + + const blob1 = await blobStore.putString('Confidential data') + const hash1 = blob1.getHash() + + // Redact blob1 + const result = await runScript([ + '--historyId', + historyId, + '--blob', + hash1, + '--yes', + ]) + + expect(result.status).to.equal(0) + expect(result.stdout).to.include(`Replacing blob ${hash1}`) + + // Check blob1 is redacted + const redactedContent = await blobStore.getString(hash1) + expect(redactedContent).to.match( + /^REDACTED \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/ + ) + }) + + it('should redact a blob with a custom message', async function () { + const historyId = new ObjectId().toString() + const blobStore = new BlobStore(historyId) + + const blob1 = await blobStore.putString('Confidential data') + const hash1 = blob1.getHash() + + // Redact blob1 + const result = await runScript([ + '--historyId', + historyId, + '--blob', + hash1, + '--message', + 'MY_CUSTOM_MSG', + '--yes', + ]) + + expect(result.status).to.equal(0) + expect(result.stdout).to.include(`Replacing blob ${hash1}`) + + // Check blob1 is redacted + const redactedContent = await blobStore.getString(hash1) + expect(redactedContent).to.match( + /^MY_CUSTOM_MSG \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/ + ) + }) + + it('should redact a blob with an empty file if --empty is used', async function () { + const historyId = new ObjectId().toString() + const blobStore = new BlobStore(historyId) + + const blob1 = await blobStore.putString('Confidential data') + const hash1 = blob1.getHash() + + // Redact blob1 + const result = await runScript([ + '--historyId', + historyId, + '--blob', + hash1, + '--empty', + '--yes', + ]) + + expect(result.status).to.equal(0) + expect(result.stdout).to.include(`Replacing blob ${hash1}`) + + // Check blob1 is empty + const redactedContent = await blobStore.getString(hash1) + expect(redactedContent).to.equal('') + }) + + it('should redact a blob with a specific file if --file is used', async function () { + const historyId = new ObjectId().toString() + const blobStore = new BlobStore(historyId) + + const blob1 = await blobStore.putString('Confidential data') + const hash1 = blob1.getHash() + + // Create a temporary file + const fs = await import('node:fs/promises') + const os = await import('node:os') + const path = await import('node:path') + + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'redact-test-')) + const tmpFile = path.join(tmpDir, 'replacement.txt') + await fs.writeFile(tmpFile, 'Replacement file content') + + try { + // Redact blob1 + const result = await runScript([ + '--historyId', + historyId, + '--blob', + hash1, + '--file', + tmpFile, + '--yes', + ]) + + expect(result.status).to.equal(0) + expect(result.stdout).to.include(`Replacing blob ${hash1}`) + + // Check blob1 has replacement content + const redactedContent = await blobStore.getString(hash1) + expect(redactedContent).to.equal('Replacement file content') + } finally { + await fs.rm(tmpDir, { recursive: true, force: true }) + } + }) + + it('should error when conflicting options are provided', async function () { + const historyId = new ObjectId().toString() + const blobStore = new BlobStore(historyId) + + const blob1 = await blobStore.putString('Confidential data') + const hash1 = blob1.getHash() + + // Redact blob1 with conflicting flags + const result = await runScript([ + '--historyId', + historyId, + '--blob', + hash1, + '--delete', + '--file', + 'dummy.txt', + ]) + + expect(result.status).to.equal(1) + expect(result.stderr).to.include('Error: Conflicting options provided') + expect(result.stderr).to.include('--delete') + expect(result.stderr).to.include('--file') + }) +})