From ff492d572ce843a621d679fda4df33ce877b7cf2 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Thu, 23 Mar 2023 14:22:55 +0000 Subject: [PATCH] Merge pull request #12367 from overleaf/jlm-jpa-log-large-unarchive-job [docstore] log a warning when a potentially large doc is unarchived GitOrigin-RevId: 665b6cf700e76e7ea433788c09413823f95e7829 --- services/docstore/app/js/DocArchiveManager.js | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js index 91f5dafd20..dcddef41f4 100644 --- a/services/docstore/app/js/DocArchiveManager.js +++ b/services/docstore/app/js/DocArchiveManager.js @@ -126,7 +126,7 @@ async function getDoc(projectId, docId) { key ) stream.resume() - const buffer = await _streamToBuffer(stream) + const buffer = await _streamToBuffer(projectId, docId, stream) const md5 = crypto.createHash('md5').update(buffer).digest('hex') if (sourceMd5 !== md5) { throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', { @@ -171,12 +171,31 @@ async function destroyProject(projectId) { await Promise.all(tasks) } -async function _streamToBuffer(stream) { +async function _streamToBuffer(projectId, docId, stream) { const chunks = [] + let size + let logged = false + const logIfTooLarge = finishedReading => { + if (size <= Settings.max_doc_length) return + // Log progress once and then again at the end. + if (logged && !finishedReading) return + logger.warn( + { projectId, docId, size, finishedReading }, + 'potentially large doc pulled down from gcs' + ) + logged = true + } return new Promise((resolve, reject) => { - stream.on('data', chunk => chunks.push(chunk)) + stream.on('data', chunk => { + size += chunk.byteLength + logIfTooLarge(false) + chunks.push(chunk) + }) stream.on('error', reject) - stream.on('end', () => resolve(Buffer.concat(chunks))) + stream.on('end', () => { + logIfTooLarge(true) + resolve(Buffer.concat(chunks)) + }) }) }