mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-04 22:59:01 +02:00
Merge pull request #32702 from overleaf/bg-allow-redacting-blobs
add script for redacting unwanted blobs GitOrigin-RevId: cddbeb4d27546b7cb98634ab364cc8dad0ada76c
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
import fs from 'node:fs'
|
||||
import { Readable } from 'node:stream'
|
||||
import { createRequire } from 'node:module'
|
||||
import * as readline from 'node:readline/promises'
|
||||
import commandLineArgs from 'command-line-args'
|
||||
import { makeProjectKey } from '../lib/blob_store/index.js'
|
||||
import { client } from '../lib/mongodb.js'
|
||||
import knex from '../lib/knex.js'
|
||||
import redis from '../lib/redis.js'
|
||||
|
||||
const require = createRequire(import.meta.url)
|
||||
const config = require('config')
|
||||
const persistor = require('../lib/persistor.js')
|
||||
const { Errors } = require('@overleaf/object-persistor')
|
||||
|
||||
const optionDefinitions = [
|
||||
{ name: 'historyId', alias: 'p', type: String },
|
||||
{ name: 'blob', alias: 'b', type: String },
|
||||
{ name: 'file', alias: 'f', type: String },
|
||||
{ name: 'empty', alias: 'e', type: Boolean },
|
||||
{ name: 'delete', alias: 'd', type: Boolean },
|
||||
{ name: 'yes', alias: 'y', type: Boolean },
|
||||
{ name: 'message', alias: 'm', type: String },
|
||||
]
|
||||
|
||||
async function replaceBlob(historyId, blobHash, options) {
|
||||
const bucket = config.get('blobStore.projectBucket')
|
||||
const key = makeProjectKey(historyId, blobHash)
|
||||
|
||||
// 1. Check existence
|
||||
let originalSize
|
||||
try {
|
||||
originalSize = await persistor.getObjectSize(bucket, key)
|
||||
console.log(`Found blob ${blobHash} of size ${originalSize} bytes`)
|
||||
} catch (err) {
|
||||
if (
|
||||
err instanceof Errors.NotFoundError ||
|
||||
err.code === 'NoSuchKey' ||
|
||||
err.name === 'NoSuchKey'
|
||||
) {
|
||||
throw new Error(`Blob ${blobHash} not found in project ${historyId}`)
|
||||
}
|
||||
throw err
|
||||
}
|
||||
|
||||
// 2. Prepare action
|
||||
let stream
|
||||
let streamSize
|
||||
let actionDesc
|
||||
if (!options.delete) {
|
||||
if (options.empty) {
|
||||
stream = Readable.from([])
|
||||
streamSize = 0
|
||||
actionDesc = 'empty file'
|
||||
} else if (options.file) {
|
||||
const stat = fs.statSync(options.file)
|
||||
stream = fs.createReadStream(options.file)
|
||||
streamSize = stat.size
|
||||
actionDesc = `file ${options.file}`
|
||||
} else {
|
||||
const baseMessage = options.message || 'REDACTED'
|
||||
const msg = `${baseMessage} ${new Date().toISOString()}`
|
||||
const buf = Buffer.from(msg, 'utf8')
|
||||
stream = Readable.from([buf])
|
||||
streamSize = buf.length
|
||||
actionDesc = `message "${msg}"`
|
||||
}
|
||||
}
|
||||
|
||||
const actionLog = options.delete
|
||||
? `Deleting blob ${blobHash} in ${historyId}`
|
||||
: `Replacing blob ${blobHash} in ${historyId} with ${actionDesc} (${streamSize} bytes)`
|
||||
|
||||
console.log(actionLog)
|
||||
|
||||
if (!options.yes) {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
})
|
||||
const answer = await rl.question('Proceed (Y/N)? ')
|
||||
rl.close()
|
||||
if (answer.toLowerCase() !== 'y') {
|
||||
console.log('Aborted.')
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Execute action
|
||||
if (options.delete) {
|
||||
await persistor.deleteObject(bucket, key)
|
||||
console.log('Blob deleted successfully.')
|
||||
} else {
|
||||
await persistor.sendStream(bucket, key, stream, {
|
||||
contentType: 'application/octet-stream',
|
||||
contentLength: streamSize,
|
||||
})
|
||||
console.log('Blob replaced successfully.')
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const options = commandLineArgs(optionDefinitions)
|
||||
if (!options.historyId) {
|
||||
console.error('Error: --historyId is required.')
|
||||
process.exit(1)
|
||||
}
|
||||
if (!options.blob) {
|
||||
console.error('Error: --blob is required.')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const activeModes = [
|
||||
options.delete ? '--delete' : null,
|
||||
options.empty ? '--empty' : null,
|
||||
options.file ? '--file' : null,
|
||||
options.message !== undefined ? '--message' : null,
|
||||
].filter(Boolean)
|
||||
|
||||
if (activeModes.length > 1) {
|
||||
console.error(
|
||||
`Error: Conflicting options provided (${activeModes.join(
|
||||
', '
|
||||
)}). Please select exactly one redaction mode.`
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
await replaceBlob(options.historyId, options.blob, options)
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => console.log('Done.'))
|
||||
.catch(err => {
|
||||
console.error('Error:', err.message)
|
||||
process.exit(1)
|
||||
})
|
||||
.finally(() => {
|
||||
knex.destroy().catch(err => console.error('Error closing Postgres:', err))
|
||||
client.close().catch(err => console.error('Error closing MongoDB:', err))
|
||||
redis
|
||||
.disconnect()
|
||||
.catch(err => console.error('Error disconnecting Redis:', err))
|
||||
})
|
||||
@@ -0,0 +1,219 @@
|
||||
import { expect } from 'chai'
|
||||
import { promisify } from 'node:util'
|
||||
import { execFile } from 'node:child_process'
|
||||
import { ObjectId } from 'mongodb'
|
||||
|
||||
import { BlobStore } from '../../../../storage/lib/blob_store/index.js'
|
||||
import cleanup from './support/cleanup.js'
|
||||
|
||||
describe('redact.mjs script', function () {
|
||||
const TIMEOUT = 20 * 1000
|
||||
|
||||
beforeEach(cleanup.everything)
|
||||
|
||||
async function runScript(args = []) {
|
||||
let result
|
||||
try {
|
||||
result = await promisify(execFile)(
|
||||
process.argv0,
|
||||
['storage/scripts/redact.mjs', ...args],
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
timeout: TIMEOUT,
|
||||
env: {
|
||||
...process.env,
|
||||
LOG_LEVEL: 'warn',
|
||||
},
|
||||
}
|
||||
)
|
||||
result.status = 0
|
||||
} catch (err) {
|
||||
const { stdout, stderr, code } = err
|
||||
if (typeof code !== 'number') {
|
||||
console.log(err)
|
||||
}
|
||||
result = { stdout, stderr, status: code }
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
it('should redact one blob completely (via delete) and leave other unmodified', async function () {
|
||||
const historyId = new ObjectId().toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const blob1 = await blobStore.putString('Confidential data')
|
||||
const blob2 = await blobStore.putString('Public data')
|
||||
|
||||
const hash1 = blob1.getHash()
|
||||
const hash2 = blob2.getHash()
|
||||
|
||||
// Redact blob1 completely
|
||||
const result = await runScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--blob',
|
||||
hash1,
|
||||
'--delete',
|
||||
'--yes',
|
||||
])
|
||||
|
||||
expect(result.status).to.equal(0)
|
||||
expect(result.stdout).to.include(`Deleting blob ${hash1}`)
|
||||
|
||||
// Check blob1 is absent using getStream (as getString can mask specific NotFoundError)
|
||||
let fetchError
|
||||
try {
|
||||
await blobStore.getStream(hash1)
|
||||
} catch (err) {
|
||||
fetchError = err
|
||||
}
|
||||
expect(fetchError).to.exist
|
||||
expect(fetchError.message).to.match(/not found/i)
|
||||
|
||||
// Check blob2 is unmodified
|
||||
const publicContent = await blobStore.getString(hash2)
|
||||
expect(publicContent).to.equal('Public data')
|
||||
})
|
||||
|
||||
it('should redact a blob with a default message if no flag is provided', async function () {
|
||||
const historyId = new ObjectId().toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const blob1 = await blobStore.putString('Confidential data')
|
||||
const hash1 = blob1.getHash()
|
||||
|
||||
// Redact blob1
|
||||
const result = await runScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--blob',
|
||||
hash1,
|
||||
'--yes',
|
||||
])
|
||||
|
||||
expect(result.status).to.equal(0)
|
||||
expect(result.stdout).to.include(`Replacing blob ${hash1}`)
|
||||
|
||||
// Check blob1 is redacted
|
||||
const redactedContent = await blobStore.getString(hash1)
|
||||
expect(redactedContent).to.match(
|
||||
/^REDACTED \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/
|
||||
)
|
||||
})
|
||||
|
||||
it('should redact a blob with a custom message', async function () {
|
||||
const historyId = new ObjectId().toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const blob1 = await blobStore.putString('Confidential data')
|
||||
const hash1 = blob1.getHash()
|
||||
|
||||
// Redact blob1
|
||||
const result = await runScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--blob',
|
||||
hash1,
|
||||
'--message',
|
||||
'MY_CUSTOM_MSG',
|
||||
'--yes',
|
||||
])
|
||||
|
||||
expect(result.status).to.equal(0)
|
||||
expect(result.stdout).to.include(`Replacing blob ${hash1}`)
|
||||
|
||||
// Check blob1 is redacted
|
||||
const redactedContent = await blobStore.getString(hash1)
|
||||
expect(redactedContent).to.match(
|
||||
/^MY_CUSTOM_MSG \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/
|
||||
)
|
||||
})
|
||||
|
||||
it('should redact a blob with an empty file if --empty is used', async function () {
|
||||
const historyId = new ObjectId().toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const blob1 = await blobStore.putString('Confidential data')
|
||||
const hash1 = blob1.getHash()
|
||||
|
||||
// Redact blob1
|
||||
const result = await runScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--blob',
|
||||
hash1,
|
||||
'--empty',
|
||||
'--yes',
|
||||
])
|
||||
|
||||
expect(result.status).to.equal(0)
|
||||
expect(result.stdout).to.include(`Replacing blob ${hash1}`)
|
||||
|
||||
// Check blob1 is empty
|
||||
const redactedContent = await blobStore.getString(hash1)
|
||||
expect(redactedContent).to.equal('')
|
||||
})
|
||||
|
||||
it('should redact a blob with a specific file if --file is used', async function () {
|
||||
const historyId = new ObjectId().toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const blob1 = await blobStore.putString('Confidential data')
|
||||
const hash1 = blob1.getHash()
|
||||
|
||||
// Create a temporary file
|
||||
const fs = await import('node:fs/promises')
|
||||
const os = await import('node:os')
|
||||
const path = await import('node:path')
|
||||
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'redact-test-'))
|
||||
const tmpFile = path.join(tmpDir, 'replacement.txt')
|
||||
await fs.writeFile(tmpFile, 'Replacement file content')
|
||||
|
||||
try {
|
||||
// Redact blob1
|
||||
const result = await runScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--blob',
|
||||
hash1,
|
||||
'--file',
|
||||
tmpFile,
|
||||
'--yes',
|
||||
])
|
||||
|
||||
expect(result.status).to.equal(0)
|
||||
expect(result.stdout).to.include(`Replacing blob ${hash1}`)
|
||||
|
||||
// Check blob1 has replacement content
|
||||
const redactedContent = await blobStore.getString(hash1)
|
||||
expect(redactedContent).to.equal('Replacement file content')
|
||||
} finally {
|
||||
await fs.rm(tmpDir, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
|
||||
it('should error when conflicting options are provided', async function () {
|
||||
const historyId = new ObjectId().toString()
|
||||
const blobStore = new BlobStore(historyId)
|
||||
|
||||
const blob1 = await blobStore.putString('Confidential data')
|
||||
const hash1 = blob1.getHash()
|
||||
|
||||
// Redact blob1 with conflicting flags
|
||||
const result = await runScript([
|
||||
'--historyId',
|
||||
historyId,
|
||||
'--blob',
|
||||
hash1,
|
||||
'--delete',
|
||||
'--file',
|
||||
'dummy.txt',
|
||||
])
|
||||
|
||||
expect(result.status).to.equal(1)
|
||||
expect(result.stderr).to.include('Error: Conflicting options provided')
|
||||
expect(result.stderr).to.include('--delete')
|
||||
expect(result.stderr).to.include('--file')
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user