diff --git a/services/document-updater/Jenkinsfile b/services/document-updater/Jenkinsfile index 9ec298487b..9abbdc917b 100644 --- a/services/document-updater/Jenkinsfile +++ b/services/document-updater/Jenkinsfile @@ -4,10 +4,10 @@ pipeline { agent any environment { - GIT_PROJECT = "document-updater-sharelatex" - JENKINS_WORKFLOW = "document-updater-sharelatex" + GIT_PROJECT = "document-updater" + JENKINS_WORKFLOW = "document-updater-sharelatex-internal" TARGET_URL = "${env.JENKINS_URL}blue/organizations/jenkins/${JENKINS_WORKFLOW}/detail/$BRANCH_NAME/$BUILD_NUMBER/pipeline" - GIT_API_URL = "https://api.github.com/repos/sharelatex/${GIT_PROJECT}/statuses/$GIT_COMMIT" + GIT_API_URL = "https://api.github.com/repos/overleaf/${GIT_PROJECT}/statuses/$GIT_COMMIT" } triggers { diff --git a/services/document-updater/README.md b/services/document-updater/README.md index a4ba8aa55e..2e6f0003a5 100644 --- a/services/document-updater/README.md +++ b/services/document-updater/README.md @@ -1,4 +1,12 @@ -THIS WILL BE DELETED SOON -DO NOT UPDATE -USE https://github.com/overleaf/document-updater INSTEAD +overleaf/document-updater +=========================== + +An API for applying incoming updates to documents in real-time. + +License +------- + +The code in this repository is released under the GNU AFFERO GENERAL PUBLIC LICENSE, version 3. A copy can be found in the `LICENSE` file. + +Copyright (c) Overleaf, 2014-2019. diff --git a/services/document-updater/app.coffee b/services/document-updater/app.coffee index 70c3fc875d..66d941b832 100644 --- a/services/document-updater/app.coffee +++ b/services/document-updater/app.coffee @@ -59,6 +59,8 @@ app.post '/project/:project_id/doc/:doc_id/change/:change_id/accept', HttpCont app.post '/project/:project_id/doc/:doc_id/change/accept', HttpController.acceptChanges app.del '/project/:project_id/doc/:doc_id/comment/:comment_id', HttpController.deleteComment +app.get '/flush_all_projects', HttpController.flushAllProjects + app.get '/total', (req, res)-> timer = new Metrics.Timer("http.allDocList") RedisManager.getCountOfDocsInMemory (err, count)-> diff --git a/services/document-updater/app/coffee/HttpController.coffee b/services/document-updater/app/coffee/HttpController.coffee index d0e8e1994b..54a69deae9 100644 --- a/services/document-updater/app/coffee/HttpController.coffee +++ b/services/document-updater/app/coffee/HttpController.coffee @@ -4,6 +4,8 @@ ProjectManager = require "./ProjectManager" Errors = require "./Errors" logger = require "logger-sharelatex" Metrics = require "./Metrics" +ProjectFlusher = require("./ProjectFlusher") + TWO_MEGABYTES = 2 * 1024 * 1024 @@ -179,3 +181,18 @@ module.exports = HttpController = return next(error) if error? logger.log {project_id}, "queued project history resync via http" res.send 204 + + flushAllProjects: (req, res, next = (error)-> )-> + res.setTimeout(5 * 60 * 1000) + options = + limit : req.query.limit || 1000 + concurrency : req.query.concurrency || 5 + dryRun : req.query.dryRun || false + ProjectFlusher.flushAllProjects options, (err, project_ids)-> + if err? + logger.err err:err, "error bulk flushing projects" + res.send 500 + else + res.send project_ids + + diff --git a/services/document-updater/app/coffee/ProjectFlusher.coffee b/services/document-updater/app/coffee/ProjectFlusher.coffee new file mode 100644 index 0000000000..fabc334930 --- /dev/null +++ b/services/document-updater/app/coffee/ProjectFlusher.coffee @@ -0,0 +1,63 @@ +request = require("request") +Settings = require('settings-sharelatex') +RedisManager = require("./RedisManager") +rclient = RedisManager.rclient +docUpdaterKeys = Settings.redis.documentupdater.key_schema +async = require("async") +ProjectManager = require("./ProjectManager") +_ = require("lodash") + +ProjectFlusher = + + # iterate over keys asynchronously using redis scan (non-blocking) + # handle all the cluster nodes or single redis server + _getKeys: (pattern, limit, callback) -> + nodes = rclient.nodes?('master') || [ rclient ]; + doKeyLookupForNode = (node, cb) -> + ProjectFlusher._getKeysFromNode node, pattern, limit, cb + async.concatSeries nodes, doKeyLookupForNode, callback + + _getKeysFromNode: (node, pattern, limit = 1000, callback) -> + cursor = 0 # redis iterator + keySet = {} # use hash to avoid duplicate results + batchSize = if limit? then Math.min(limit, 1000) else 1000 + # scan over all keys looking for pattern + doIteration = (cb) -> + node.scan cursor, "MATCH", pattern, "COUNT", batchSize, (error, reply) -> + return callback(error) if error? + [cursor, keys] = reply + for key in keys + keySet[key] = true + keys = Object.keys(keySet) + noResults = cursor == "0" # redis returns string results not numeric + limitReached = (limit? && keys.length >= limit) + if noResults || limitReached + return callback(null, keys) + else + setTimeout doIteration, 10 # avoid hitting redis too hard + doIteration() + + # extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b + # or docsInProject:{57fd0b1f53a8396d22b2c24b} (for redis cluster) + _extractIds: (keyList) -> + ids = for key in keyList + m = key.match(/:\{?([0-9a-f]{24})\}?/) # extract object id + m[1] + return ids + + flushAllProjects: (options, callback)-> + ProjectFlusher._getKeys docUpdaterKeys.docsInProject({project_id:"*"}), options.limit, (error, project_keys) -> + if error? + logger.err err:error, "error getting keys for flushing" + return callback(error) + project_ids = ProjectFlusher._extractIds(project_keys) + if options.dryRun + return callback(null, project_ids) + jobs = _.map project_ids, (project_id)-> + return (cb)-> + ProjectManager.flushAndDeleteProjectWithLocks project_id, cb + async.parallelLimit jobs, options.concurrency, (error)-> + return callback(error, project_ids) + + +module.exports = ProjectFlusher \ No newline at end of file diff --git a/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee b/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee index 15b9142647..d7a27db854 100644 --- a/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee +++ b/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee @@ -13,6 +13,7 @@ describe "HttpController", -> flushProjectChangesAsync: sinon.stub() "./ProjectManager": @ProjectManager = {} "logger-sharelatex" : @logger = { log: sinon.stub() } + "./ProjectFlusher": {flushAllProjects:->} "./Metrics": @Metrics = {} "./Errors" : Errors @Metrics.Timer = class Timer