From 5499a67d78f7467382d4620f6d4d9646effb9b03 Mon Sep 17 00:00:00 2001 From: James Allen Date: Wed, 15 Mar 2017 14:12:06 +0000 Subject: [PATCH] Add in a consistency check after applying updates that ranges still match --- .../app/coffee/RangesManager.coffee | 10 +- .../app/coffee/RangesTracker.coffee | 93 ++++++--- .../app/coffee/UpdateManager.coffee | 2 +- .../RangesManager/RangesManagerTests.coffee | 179 ++++++++++++++++++ .../UpdateManager/UpdateManagerTests.coffee | 2 +- 5 files changed, 254 insertions(+), 32 deletions(-) create mode 100644 services/document-updater/test/unit/coffee/RangesManager/RangesManagerTests.coffee diff --git a/services/document-updater/app/coffee/RangesManager.coffee b/services/document-updater/app/coffee/RangesManager.coffee index 7f430f476b..ebef566424 100644 --- a/services/document-updater/app/coffee/RangesManager.coffee +++ b/services/document-updater/app/coffee/RangesManager.coffee @@ -5,7 +5,7 @@ module.exports = RangesManager = MAX_COMMENTS: 500 MAX_CHANGES: 2000 - applyUpdate: (project_id, doc_id, entries = {}, updates = [], callback = (error, new_entries) ->) -> + applyUpdate: (project_id, doc_id, entries = {}, updates = [], newDocLines, callback = (error, new_entries) ->) -> {changes, comments} = entries rangesTracker = new RangesTracker(changes, comments) for update in updates @@ -21,6 +21,14 @@ module.exports = RangesManager = if rangesTracker.changes?.length > RangesManager.MAX_CHANGES or rangesTracker.comments?.length > RangesManager.MAX_COMMENTS return callback new Error("too many comments or tracked changes") + try + # This is a consistency check that all of our ranges and + # comments still match the corresponding text + rangesTracker.validate(newDocLines.join("\n")) + catch error + logger.error {err: error, project_id, doc_id, newDocLines, updates}, "error validating ranges" + return callback(error) + response = RangesManager._getRanges rangesTracker logger.log {project_id, doc_id, changesCount: response.changes?.length, commentsCount: response.comments?.length}, "applied updates to ranges" callback null, response diff --git a/services/document-updater/app/coffee/RangesTracker.coffee b/services/document-updater/app/coffee/RangesTracker.coffee index 865ecf4ef6..5f67a9561b 100644 --- a/services/document-updater/app/coffee/RangesTracker.coffee +++ b/services/document-updater/app/coffee/RangesTracker.coffee @@ -1,5 +1,8 @@ -load = (EventEmitter) -> - class RangesTracker extends EventEmitter +# This file is shared between document-updater and web, so that the server and client share +# an identical track changes implementation. Do not edit it directly in web or document-updater, +# instead edit it at https://github.com/sharelatex/ranges-tracker, where it has a suite of tests +load = () -> + class RangesTracker # The purpose of this class is to track a set of inserts and deletes to a document, like # track changes in Word. We store these as a set of ShareJs style ranges: # {i: "foo", p: 42} # Insert 'foo' at offset 42 @@ -36,6 +39,7 @@ load = (EventEmitter) -> # middle of a previous insert by the first user, the original insert will be split into two. constructor: (@changes = [], @comments = []) -> @setIdSeed(RangesTracker.generateIdSeed()) + @resetDirtyState() getIdSeed: () -> return @id_seed @@ -75,7 +79,7 @@ load = (EventEmitter) -> comment = @getComment(comment_id) return if !comment? @comments = @comments.filter (c) -> c.id != comment_id - @emit "comment:removed", comment + @_markAsDirty comment, "comment", "removed" getChange: (change_id) -> change = null @@ -89,6 +93,18 @@ load = (EventEmitter) -> change = @getChange(change_id) return if !change? @_removeChange(change) + + validate: (text) -> + for change in @changes + if change.op.i? + content = text.slice(change.op.p, change.op.p + change.op.i.length) + if content != change.op.i + throw new Error("Change (#{JSON.stringify(change)}) doesn't match text (#{JSON.stringify(content)})") + for comment in @comments + content = text.slice(comment.op.p, comment.op.p + comment.op.c.length) + if content != comment.op.c + throw new Error("Comment (#{JSON.stringify(comment)}) doesn't match text (#{JSON.stringify(content)})") + return true applyOp: (op, metadata = {}) -> metadata.ts ?= new Date() @@ -103,7 +119,11 @@ load = (EventEmitter) -> @addComment(op, metadata) else throw new Error("unknown op type") - + + applyOps: (ops, metadata = {}) -> + for op in ops + @applyOp(op, metadata) + addComment: (op, metadata) -> # TODO: Don't allow overlapping comments? @comments.push comment = { @@ -114,18 +134,18 @@ load = (EventEmitter) -> t: op.t metadata } - @emit "comment:added", comment + @_markAsDirty comment, "comment", "added" return comment applyInsertToComments: (op) -> for comment in @comments if op.p <= comment.op.p comment.op.p += op.i.length - @emit "comment:moved", comment + @_markAsDirty comment, "comment", "moved" else if op.p < comment.op.p + comment.op.c.length offset = op.p - comment.op.p comment.op.c = comment.op.c[0..(offset-1)] + op.i + comment.op.c[offset...] - @emit "comment:moved", comment + @_markAsDirty comment, "comment", "moved" applyDeleteToComments: (op) -> op_start = op.p @@ -138,7 +158,7 @@ load = (EventEmitter) -> if op_end <= comment_start # delete is fully before comment comment.op.p -= op_length - @emit "comment:moved", comment + @_markAsDirty comment, "comment", "moved" else if op_start >= comment_end # delete is fully after comment, nothing to do else @@ -161,12 +181,13 @@ load = (EventEmitter) -> comment.op.p = Math.min(comment_start, op_start) comment.op.c = remaining_before + remaining_after - @emit "comment:moved", comment + @_markAsDirty comment, "comment", "moved" applyInsertToChanges: (op, metadata) -> op_start = op.p op_length = op.i.length op_end = op.p + op_length + undoing = !!op.u already_merged = false @@ -184,8 +205,9 @@ load = (EventEmitter) -> change.op.p += op_length moved_changes.push change else if op_start == change_start - # If the insert matches the start of the delete, just remove it from the delete instead - if change.op.d.length >= op.i.length and change.op.d.slice(0, op.i.length) == op.i + # If we are undoing, then we want to cancel any existing delete ranges if we can. + # Check if the insert matches the start of the delete, and just remove it from the delete instead if so. + if undoing and change.op.d.length >= op.i.length and change.op.d.slice(0, op.i.length) == op.i change.op.d = change.op.d.slice(op.i.length) change.op.p += op.i.length if change.op.d == "" @@ -203,15 +225,15 @@ load = (EventEmitter) -> # Only merge inserts if they are from the same user is_same_user = metadata.user_id == change.metadata.user_id - # If this is an insert op at the end of an existing insert with a delete following, and it cancels out the following - # delete then we shouldn't append it to this insert, but instead only cancel the following delete. + # If we are undoing, then our changes will be removed from any delete ops just after. In that case, if there is also + # an insert op just before, then we shouldn't append it to this insert, but instead only cancel the following delete. # E.g. # foo|<--- about to insert 'b' here # inserted 'foo' --^ ^-- deleted 'bar' # should become just 'foo' not 'foob' (with the delete marker becoming just 'ar'), . next_change = @changes[i+1] is_op_adjacent_to_next_delete = next_change? and next_change.op.d? and op.p == change_end and next_change.op.p == op.p - will_op_cancel_next_delete = is_op_adjacent_to_next_delete and next_change.op.d.slice(0, op.i.length) == op.i + will_op_cancel_next_delete = undoing and is_op_adjacent_to_next_delete and next_change.op.d.slice(0, op.i.length) == op.i # If there is a delete at the start of the insert, and we're inserting # at the start, we SHOULDN'T merge since the delete acts as a partition. @@ -281,8 +303,8 @@ load = (EventEmitter) -> for change in remove_changes @_removeChange change - if moved_changes.length > 0 - @emit "changes:moved", moved_changes + for change in moved_changes + @_markAsDirty change, "change", "moved" applyDeleteToChanges: (op, metadata) -> op_start = op.p @@ -406,8 +428,8 @@ load = (EventEmitter) -> @_removeChange change moved_changes = moved_changes.filter (c) -> c != change - if moved_changes.length > 0 - @emit "changes:moved", moved_changes + for change in moved_changes + @_markAsDirty change, "change", "moved" _addOp: (op, metadata) -> change = { @@ -427,17 +449,11 @@ load = (EventEmitter) -> else return -1 - if op.d? - @emit "delete:added", change - else if op.i? - @emit "insert:added", change + @_markAsDirty(change, "change", "added") _removeChange: (change) -> @changes = @changes.filter (c) -> c.id != change.id - if change.op.d? - @emit "delete:removed", change - else if change.op.i? - @emit "insert:removed", change + @_markAsDirty change, "change", "removed" _applyOpModifications: (content, op_modifications) -> # Put in descending position order, with deleting first if at the same offset @@ -486,13 +502,32 @@ load = (EventEmitter) -> previous_change = change return { moved_changes, remove_changes } + resetDirtyState: () -> + @_dirtyState = { + comment: { + moved: {} + removed: {} + added: {} + } + change: { + moved: {} + removed: {} + added: {} + } + } + + getDirtyState: () -> + return @_dirtyState + + _markAsDirty: (object, type, action) -> + @_dirtyState[type][action][object.id] = object + _clone: (object) -> clone = {} (clone[k] = v for k,v of object) return clone if define? - define ["utils/EventEmitter"], load + define [], load else - EventEmitter = require("events").EventEmitter - module.exports = load(EventEmitter) \ No newline at end of file + module.exports = load() diff --git a/services/document-updater/app/coffee/UpdateManager.coffee b/services/document-updater/app/coffee/UpdateManager.coffee index 2ad3281bfe..b6a5f98c4c 100644 --- a/services/document-updater/app/coffee/UpdateManager.coffee +++ b/services/document-updater/app/coffee/UpdateManager.coffee @@ -59,7 +59,7 @@ module.exports = UpdateManager = return callback(new Errors.NotFoundError("document not found: #{doc_id}")) ShareJsUpdateManager.applyUpdate project_id, doc_id, update, lines, version, (error, updatedDocLines, version, appliedOps) -> return callback(error) if error? - RangesManager.applyUpdate project_id, doc_id, ranges, appliedOps, (error, new_ranges) -> + RangesManager.applyUpdate project_id, doc_id, ranges, appliedOps, updatedDocLines, (error, new_ranges) -> return callback(error) if error? RedisManager.updateDocument doc_id, updatedDocLines, version, appliedOps, new_ranges, (error) -> return callback(error) if error? diff --git a/services/document-updater/test/unit/coffee/RangesManager/RangesManagerTests.coffee b/services/document-updater/test/unit/coffee/RangesManager/RangesManagerTests.coffee new file mode 100644 index 0000000000..0c0c556a38 --- /dev/null +++ b/services/document-updater/test/unit/coffee/RangesManager/RangesManagerTests.coffee @@ -0,0 +1,179 @@ +sinon = require('sinon') +chai = require('chai') +should = chai.should() +expect = chai.expect +modulePath = "../../../../app/js/RangesManager.js" +SandboxedModule = require('sandboxed-module') + +describe "RangesManager", -> + beforeEach -> + @RangesManager = SandboxedModule.require modulePath, + requires: + "logger-sharelatex": @logger = { error: sinon.stub(), log: sinon.stub(), warn: sinon.stub() } + + @doc_id = "doc-id-123" + @project_id = "project-id-123" + @user_id = "user-id-123" + @callback = sinon.stub() + + describe "applyUpdate", -> + beforeEach -> + @updates = [{ + meta: + user_id: @user_id + op: [{ + i: "two " + p: 4 + }] + }] + @entries = { + comments: [{ + op: + c: "three " + p: 4 + metadata: + user_id: @user_id + }] + changes: [{ + op: + i: "five" + p: 15 + metadata: + user_id: @user_id + }] + } + @newDocLines = ["one two three four five"] # old is "one three four five" + + describe "successfully", -> + beforeEach -> + @RangesManager.applyUpdate @project_id, @doc_id, @entries, @updates, @newDocLines, @callback + + it "should return the modified the comments and changes", -> + @callback.called.should.equal true + [error, entries] = @callback.args[0] + expect(error).to.be.null + entries.comments[0].op.should.deep.equal { + c: "three " + p: 8 + } + entries.changes[0].op.should.deep.equal { + i: "five" + p: 19 + } + + describe "with empty comments", -> + beforeEach -> + @entries.comments = [] + @RangesManager.applyUpdate @project_id, @doc_id, @entries, @updates, @newDocLines, @callback + + it "should return an object with no comments", -> + # Save space in redis and don't store just {} + @callback.called.should.equal true + [error, entries] = @callback.args[0] + expect(error).to.be.null + expect(entries.comments).to.be.undefined + + describe "with empty changes", -> + beforeEach -> + @entries.changes = [] + @RangesManager.applyUpdate @project_id, @doc_id, @entries, @updates, @newDocLines, @callback + + it "should return an object with no changes", -> + # Save space in redis and don't store just {} + @callback.called.should.equal true + [error, entries] = @callback.args[0] + expect(error).to.be.null + expect(entries.changes).to.be.undefined + + describe "with too many comments", -> + beforeEach -> + @RangesManager.MAX_COMMENTS = 2 + @updates = [{ + meta: + user_id: @user_id + op: [{ + c: "one" + p: 0 + }] + }] + @entries = { + comments: [{ + op: + c: "three " + p: 4 + metadata: + user_id: @user_id + }, { + op: + c: "four " + p: 10 + metadata: + user_id: @user_id + }] + changes: [] + } + @RangesManager.applyUpdate @project_id, @doc_id, @entries, @updates, @newDocLines, @callback + + it "should return an error", -> + # Save space in redis and don't store just {} + @callback.called.should.equal true + [error, entries] = @callback.args[0] + expect(error).to.not.be.null + expect(error.message).to.equal("too many comments or tracked changes") + + describe "with too many changes", -> + beforeEach -> + @RangesManager.MAX_CHANGES = 2 + @updates = [{ + meta: + user_id: @user_id + tc: "track-changes-id-yes" + op: [{ + i: "one " + p: 0 + }] + }] + @entries = { + changes: [{ + op: + i: "three" + p: 4 + metadata: + user_id: @user_id + }, { + op: + i: "four" + p: 10 + metadata: + user_id: @user_id + }] + comments: [] + } + @newDocLines = ["one two three four"] + @RangesManager.applyUpdate @project_id, @doc_id, @entries, @updates, @newDocLines, @callback + + it "should return an error", -> + # Save space in redis and don't store just {} + @callback.called.should.equal true + [error, entries] = @callback.args[0] + expect(error).to.not.be.null + expect(error.message).to.equal("too many comments or tracked changes") + + describe "inconsistent changes", -> + beforeEach -> + @updates = [{ + meta: + user_id: @user_id + op: [{ + c: "doesn't match" + p: 0 + }] + }] + @RangesManager.applyUpdate @project_id, @doc_id, @entries, @updates, @newDocLines, @callback + + it "should return an error", -> + # Save space in redis and don't store just {} + @callback.called.should.equal true + [error, entries] = @callback.args[0] + expect(error).to.not.be.null + expect(error.message).to.equal("Change ({\"op\":{\"i\":\"five\",\"p\":15},\"metadata\":{\"user_id\":\"user-id-123\"}}) doesn't match text (\"our \")") \ No newline at end of file diff --git a/services/document-updater/test/unit/coffee/UpdateManager/UpdateManagerTests.coffee b/services/document-updater/test/unit/coffee/UpdateManager/UpdateManagerTests.coffee index 33578cb6f0..fbf9b21ddc 100644 --- a/services/document-updater/test/unit/coffee/UpdateManager/UpdateManagerTests.coffee +++ b/services/document-updater/test/unit/coffee/UpdateManager/UpdateManagerTests.coffee @@ -179,7 +179,7 @@ describe "UpdateManager", -> it "should update the ranges", -> @RangesManager.applyUpdate - .calledWith(@project_id, @doc_id, @ranges, @appliedOps) + .calledWith(@project_id, @doc_id, @ranges, @appliedOps, @updatedDocLines) .should.equal true it "should save the document", ->