From 865a973426258a9bf646d3e44d52beded2bd33ad Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 20 Sep 2021 09:23:37 +0100 Subject: [PATCH] Merge pull request #5132 from overleaf/bg-fix-size-check [docupdater] use more accurate doc size check GitOrigin-RevId: f66d68a7f7fdc127cc31539abdcab65549823d02 --- .../document-updater/app/js/HttpController.js | 11 +-- services/document-updater/app/js/Limits.js | 31 +++++++ .../document-updater/app/js/RedisManager.js | 14 +++- .../test/acceptance/js/SizeCheckTests.js | 69 ++++++++++++++- .../test/unit/js/Limits/LimitsTests.js | 84 +++++++++++++++++++ 5 files changed, 194 insertions(+), 15 deletions(-) create mode 100644 services/document-updater/app/js/Limits.js create mode 100644 services/document-updater/test/unit/js/Limits/LimitsTests.js diff --git a/services/document-updater/app/js/HttpController.js b/services/document-updater/app/js/HttpController.js index 4ea7a00d4c..fd5792ebc2 100644 --- a/services/document-updater/app/js/HttpController.js +++ b/services/document-updater/app/js/HttpController.js @@ -8,6 +8,7 @@ const Settings = require('@overleaf/settings') const Metrics = require('./Metrics') const ProjectFlusher = require('./ProjectFlusher') const DeleteQueueManager = require('./DeleteQueueManager') +const { getTotalSizeOfLines } = require('./Limits') const async = require('async') module.exports = { @@ -83,14 +84,6 @@ function peekDoc(req, res, next) { }) } -function _getTotalSizeOfLines(lines) { - let size = 0 - for (const line of lines) { - size += line.length + 1 - } - return size -} - function getProjectDocsAndFlushIfOld(req, res, next) { const projectId = req.params.project_id const projectStateHash = req.query.state @@ -150,7 +143,7 @@ function setDoc(req, res, next) { const docId = req.params.doc_id const projectId = req.params.project_id const { lines, source, user_id: userId, undoing } = req.body - const lineSize = _getTotalSizeOfLines(lines) + const lineSize = getTotalSizeOfLines(lines) if (lineSize > Settings.max_doc_length) { logger.log( { projectId, docId, source, lineSize, userId }, diff --git a/services/document-updater/app/js/Limits.js b/services/document-updater/app/js/Limits.js new file mode 100644 index 0000000000..268ccd3f9b --- /dev/null +++ b/services/document-updater/app/js/Limits.js @@ -0,0 +1,31 @@ +module.exports = { + // compute the total size of the document in chararacters, including newlines + getTotalSizeOfLines(lines) { + let size = 0 + for (const line of lines) { + size += line.length + 1 // include the newline + } + return size + }, + + // check whether the total size of the document in characters exceeds the + // maxDocLength. + // + // The estimated size should be an upper bound on the true size, typically + // it will be the size of the JSON.stringified array of lines. If the + // estimated size is less than the maxDocLength then we know that the total + // size of lines will also be less than maxDocLength. + docIsTooLarge(estimatedSize, lines, maxDocLength) { + if (estimatedSize <= maxDocLength) { + return false // definitely under the limit, no need to calculate the total size + } + // calculate the total size, bailing out early if the size limit is reached + let size = 0 + for (const line of lines) { + size += line.length + 1 // include the newline + if (size > maxDocLength) return true + } + // since we didn't hit the limit in the loop, the document is within the allowed length + return false + }, +} diff --git a/services/document-updater/app/js/RedisManager.js b/services/document-updater/app/js/RedisManager.js index 11ff1f8fcc..c7e89d0f79 100644 --- a/services/document-updater/app/js/RedisManager.js +++ b/services/document-updater/app/js/RedisManager.js @@ -24,6 +24,7 @@ const Errors = require('./Errors') const crypto = require('crypto') const async = require('async') const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager') +const { docIsTooLarge } = require('./Limits') // Sometimes Redis calls take an unexpectedly long time. We have to be // quick with Redis calls because we're holding a lock that expires @@ -64,6 +65,7 @@ module.exports = RedisManager = { timer.done() return _callback(error) } + const docLinesArray = docLines docLines = JSON.stringify(docLines) if (docLines.indexOf('\u0000') !== -1) { const error = new Error('null bytes found in doc lines') @@ -72,8 +74,10 @@ module.exports = RedisManager = { logger.error({ err: error, doc_id, docLines }, error.message) return callback(error) } - // Do a cheap size check on the serialized blob. - if (docLines.length > Settings.max_doc_length) { + // Do an optimised size check on the docLines using the serialised + // length as an upper bound + const sizeBound = docLines.length + if (docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)) { const docSize = docLines.length const err = new Error('blocking doc insert into redis: doc is too large') logger.error({ project_id, doc_id, err, docSize }, err.message) @@ -468,8 +472,10 @@ module.exports = RedisManager = { logger.error({ err: error, doc_id, newDocLines }, error.message) return callback(error) } - // Do a cheap size check on the serialized blob. - if (newDocLines.length > Settings.max_doc_length) { + // Do an optimised size check on the docLines using the serialised + // length as an upper bound + const sizeBound = newDocLines.length + if (docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)) { const err = new Error('blocking doc update: doc is too large') const docSize = newDocLines.length logger.error({ project_id, doc_id, err, docSize }, err.message) diff --git a/services/document-updater/test/acceptance/js/SizeCheckTests.js b/services/document-updater/test/acceptance/js/SizeCheckTests.js index 6267df3b1a..dd48461b8b 100644 --- a/services/document-updater/test/acceptance/js/SizeCheckTests.js +++ b/services/document-updater/test/acceptance/js/SizeCheckTests.js @@ -27,7 +27,7 @@ describe('SizeChecks', function () { describe('when a doc is above the doc size limit already', function () { beforeEach(function () { - this.lines = ['0123456789'.repeat(Settings.max_doc_length / 10 + 1)] + this.lines = ['x'.repeat(Settings.max_doc_length)] // including the extra newline, this will be over the limit MockWebApi.insertDoc(this.project_id, this.doc_id, { lines: this.lines, v: this.version, @@ -72,9 +72,74 @@ describe('SizeChecks', function () { }) }) + describe('when the stringified JSON is above the doc size limit but the doc character count is not', function () { + beforeEach(function () { + let charsRemaining = Settings.max_doc_length + this.lines = [] + // Take the maximum allowed doc length and split it into N lines of 63 characters + a newline. + // The character count will be exactly max_doc_length + // The JSON stringified size will exceed max_doc_length, due to the JSON formatting of the array. + // This document should be allowed, because we use the character count as the limit, not the JSON size. + while (charsRemaining > 0) { + const charstoAdd = Math.min(charsRemaining - 1, 63) // allow for additional newline + this.lines.push('x'.repeat(charstoAdd)) + charsRemaining -= charstoAdd + 1 + } + MockWebApi.insertDoc(this.project_id, this.doc_id, { + lines: this.lines, + v: this.version, + }) + }) + + it('should be able to fetch the doc', function (done) { + DocUpdaterClient.getDoc( + this.project_id, + this.doc_id, + (error, res, doc) => { + if (error) return done(error) + expect(doc.lines).to.deep.equal(this.lines) + done() + } + ) + }) + + describe('when trying to update', function () { + beforeEach(function (done) { + const update = { + doc: this.doc_id, + op: this.update.op, + v: this.version, + } + DocUpdaterClient.sendUpdate( + this.project_id, + this.doc_id, + update, + error => { + if (error != null) { + throw error + } + setTimeout(done, 200) + } + ) + }) + + it('should not update the doc', function (done) { + DocUpdaterClient.getDoc( + this.project_id, + this.doc_id, + (error, res, doc) => { + if (error) return done(error) + expect(doc.lines).to.deep.equal(this.lines) + done() + } + ) + }) + }) + }) + describe('when a doc is just below the doc size limit', function () { beforeEach(function () { - this.lines = ['0123456789'.repeat(Settings.max_doc_length / 10 - 1)] + this.lines = ['x'.repeat(Settings.max_doc_length - 1)] // character count is exactly max_doc_length after including the newline MockWebApi.insertDoc(this.project_id, this.doc_id, { lines: this.lines, v: this.version, diff --git a/services/document-updater/test/unit/js/Limits/LimitsTests.js b/services/document-updater/test/unit/js/Limits/LimitsTests.js new file mode 100644 index 0000000000..34a5c13c26 --- /dev/null +++ b/services/document-updater/test/unit/js/Limits/LimitsTests.js @@ -0,0 +1,84 @@ +const { expect } = require('chai') +const modulePath = '../../../../app/js/Limits.js' +const SandboxedModule = require('sandboxed-module') + +describe('Limits', function () { + beforeEach(function () { + return (this.Limits = SandboxedModule.require(modulePath)) + }) + + describe('getTotalSizeOfLines', function () { + it('should compute the character count for a document with multiple lines', function () { + const count = this.Limits.getTotalSizeOfLines(['123', '4567']) + expect(count).to.equal(9) + }) + + it('should compute the character count for a document with a single line', function () { + const count = this.Limits.getTotalSizeOfLines(['123']) + expect(count).to.equal(4) + }) + + it('should compute the character count for an empty document', function () { + const count = this.Limits.getTotalSizeOfLines([]) + expect(count).to.equal(0) + }) + }) + + describe('docIsTooLarge', function () { + describe('when the estimated size is below the limit', function () { + it('should return false when the estimated size is below the limit', function () { + const result = this.Limits.docIsTooLarge(128, ['hello', 'world'], 1024) + expect(result).to.be.false + }) + }) + + describe('when the estimated size is at the limit', function () { + it('should return false when the estimated size is at the limit', function () { + const result = this.Limits.docIsTooLarge(1024, ['hello', 'world'], 1024) + expect(result).to.be.false + }) + }) + + describe('when the estimated size is above the limit', function () { + it('should return false when the actual character count is below the limit', function () { + const result = this.Limits.docIsTooLarge(2048, ['hello', 'world'], 1024) + expect(result).to.be.false + }) + + it('should return false when the actual character count is at the limit', function () { + const result = this.Limits.docIsTooLarge(2048, ['x'.repeat(1023)], 1024) + expect(result).to.be.false + }) + + it('should return true when the actual character count is above the limit by 1', function () { + const count = this.Limits.docIsTooLarge(2048, ['x'.repeat(1024)], 1024) + expect(count).to.be.true + }) + + it('should return true when the actual character count is above the limit', function () { + const count = this.Limits.docIsTooLarge(2048, ['x'.repeat(2000)], 1024) + expect(count).to.be.true + }) + }) + + describe('when the document has many lines', function () { + it('should return false when the actual character count is below the limit ', function () { + const count = this.Limits.docIsTooLarge( + 2048, + '1234567890'.repeat(100).split('0'), + 1024 + ) + expect(count).to.be.false + }) + + it('should return true when the actual character count is above the limit', function () { + const count = this.Limits.docIsTooLarge( + 2048, + '1234567890'.repeat(2000).split('0'), + 1024 + ) + expect(count).to.be.true + }) + }) + }) +})