From d5d1736a5efd78145c9ca27215083b766701b25e Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Thu, 2 May 2019 16:30:36 +0100 Subject: [PATCH 1/6] adds /flush_all_projects project --- services/document-updater/app.coffee | 2 + .../app/coffee/HttpController.coffee | 15 +++++ .../app/coffee/ProjectFlusher.coffee | 61 +++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 services/document-updater/app/coffee/ProjectFlusher.coffee diff --git a/services/document-updater/app.coffee b/services/document-updater/app.coffee index 70c3fc875d..66d941b832 100644 --- a/services/document-updater/app.coffee +++ b/services/document-updater/app.coffee @@ -59,6 +59,8 @@ app.post '/project/:project_id/doc/:doc_id/change/:change_id/accept', HttpCont app.post '/project/:project_id/doc/:doc_id/change/accept', HttpController.acceptChanges app.del '/project/:project_id/doc/:doc_id/comment/:comment_id', HttpController.deleteComment +app.get '/flush_all_projects', HttpController.flushAllProjects + app.get '/total', (req, res)-> timer = new Metrics.Timer("http.allDocList") RedisManager.getCountOfDocsInMemory (err, count)-> diff --git a/services/document-updater/app/coffee/HttpController.coffee b/services/document-updater/app/coffee/HttpController.coffee index d0e8e1994b..5a5c248ee9 100644 --- a/services/document-updater/app/coffee/HttpController.coffee +++ b/services/document-updater/app/coffee/HttpController.coffee @@ -4,6 +4,8 @@ ProjectManager = require "./ProjectManager" Errors = require "./Errors" logger = require "logger-sharelatex" Metrics = require "./Metrics" +ProjectFlusher = require("./ProjectFlusher") + TWO_MEGABYTES = 2 * 1024 * 1024 @@ -179,3 +181,16 @@ module.exports = HttpController = return next(error) if error? logger.log {project_id}, "queued project history resync via http" res.send 204 + + flushAllProjects: (req, res, next = (error)-> )-> + res.setTimeout(5 * 60 * 1000) + limit = req.query.limit || 1000 + concurrency = req.query.concurrency || 5 + ProjectFlusher.flushAllProjects limit, concurrency, (err, project_ids)-> + if err? + logger.err err:err, "error bulk flushing projects" + res.send 500 + else + res.send project_ids + + diff --git a/services/document-updater/app/coffee/ProjectFlusher.coffee b/services/document-updater/app/coffee/ProjectFlusher.coffee new file mode 100644 index 0000000000..b6ef3d77ca --- /dev/null +++ b/services/document-updater/app/coffee/ProjectFlusher.coffee @@ -0,0 +1,61 @@ +request = require("request") +Settings = require('settings-sharelatex') +RedisManager = require("./RedisManager") +rclient = RedisManager.rclient +docUpdaterKeys = Settings.redis.documentupdater.key_schema +async = require("async") +ProjectManager = require("./ProjectManager") +_ = require("lodash") + +ProjectFlusher = + + # iterate over keys asynchronously using redis scan (non-blocking) + # handle all the cluster nodes or single redis server + _getKeys: (pattern, limit, callback) -> + nodes = rclient.nodes?('master') || [ rclient ]; + doKeyLookupForNode = (node, cb) -> + ProjectFlusher._getKeysFromNode node, pattern, limit, cb + async.concatSeries nodes, doKeyLookupForNode, callback + + _getKeysFromNode: (node, pattern, limit = 1000, callback) -> + cursor = 0 # redis iterator + keySet = {} # use hash to avoid duplicate results + batchSize = if limit? then Math.min(limit, 1000) else 1000 + # scan over all keys looking for pattern + doIteration = (cb) -> + node.scan cursor, "MATCH", pattern, "COUNT", batchSize, (error, reply) -> + return callback(error) if error? + [cursor, keys] = reply + for key in keys + keySet[key] = true + keys = Object.keys(keySet) + noResults = cursor == "0" # redis returns string results not numeric + limitReached = (limit? && keys.length >= limit) + if noResults || limitReached + return callback(null, keys) + else + setTimeout doIteration, 10 # avoid hitting redis too hard + doIteration() + + # extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b + # or docsInProject:{57fd0b1f53a8396d22b2c24b} (for redis cluster) + _extractIds: (keyList) -> + ids = for key in keyList + m = key.match(/:\{?([0-9a-f]{24})\}?/) # extract object id + m[1] + return ids + + flushAllProjects: (limit, concurrency = 5, callback)-> + ProjectFlusher._getKeys docUpdaterKeys.docsInProject({project_id:"*"}), limit, (error, project_keys) -> + if error? + logger.err err:error, "error getting keys for flushing" + return callback(error) + project_ids = ProjectFlusher._extractIds(project_keys) + jobs = _.map project_ids, (project_id)-> + return (cb)-> + ProjectManager.flushAndDeleteProjectWithLocks project_id, cb + async.parallelLimit jobs, concurrency, (error)-> + return callback(error, project_ids) + + +module.exports = ProjectFlusher \ No newline at end of file From daca83a057c583d2abbc8d7c2b8c750c30a40bd6 Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Thu, 2 May 2019 16:54:22 +0100 Subject: [PATCH 2/6] add dryRun option to flush all projects --- .../document-updater/app/coffee/HttpController.coffee | 8 +++++--- .../document-updater/app/coffee/ProjectFlusher.coffee | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/services/document-updater/app/coffee/HttpController.coffee b/services/document-updater/app/coffee/HttpController.coffee index 5a5c248ee9..54a69deae9 100644 --- a/services/document-updater/app/coffee/HttpController.coffee +++ b/services/document-updater/app/coffee/HttpController.coffee @@ -184,9 +184,11 @@ module.exports = HttpController = flushAllProjects: (req, res, next = (error)-> )-> res.setTimeout(5 * 60 * 1000) - limit = req.query.limit || 1000 - concurrency = req.query.concurrency || 5 - ProjectFlusher.flushAllProjects limit, concurrency, (err, project_ids)-> + options = + limit : req.query.limit || 1000 + concurrency : req.query.concurrency || 5 + dryRun : req.query.dryRun || false + ProjectFlusher.flushAllProjects options, (err, project_ids)-> if err? logger.err err:err, "error bulk flushing projects" res.send 500 diff --git a/services/document-updater/app/coffee/ProjectFlusher.coffee b/services/document-updater/app/coffee/ProjectFlusher.coffee index b6ef3d77ca..fabc334930 100644 --- a/services/document-updater/app/coffee/ProjectFlusher.coffee +++ b/services/document-updater/app/coffee/ProjectFlusher.coffee @@ -45,16 +45,18 @@ ProjectFlusher = m[1] return ids - flushAllProjects: (limit, concurrency = 5, callback)-> - ProjectFlusher._getKeys docUpdaterKeys.docsInProject({project_id:"*"}), limit, (error, project_keys) -> + flushAllProjects: (options, callback)-> + ProjectFlusher._getKeys docUpdaterKeys.docsInProject({project_id:"*"}), options.limit, (error, project_keys) -> if error? logger.err err:error, "error getting keys for flushing" return callback(error) project_ids = ProjectFlusher._extractIds(project_keys) + if options.dryRun + return callback(null, project_ids) jobs = _.map project_ids, (project_id)-> return (cb)-> ProjectManager.flushAndDeleteProjectWithLocks project_id, cb - async.parallelLimit jobs, concurrency, (error)-> + async.parallelLimit jobs, options.concurrency, (error)-> return callback(error, project_ids) From 4b8a27a2207c23a4b1b723822ce438480b789559 Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Tue, 7 May 2019 14:00:45 +0100 Subject: [PATCH 3/6] change github url --- services/document-updater/Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/document-updater/Jenkinsfile b/services/document-updater/Jenkinsfile index 9ec298487b..c8e5e33a50 100644 --- a/services/document-updater/Jenkinsfile +++ b/services/document-updater/Jenkinsfile @@ -4,10 +4,10 @@ pipeline { agent any environment { - GIT_PROJECT = "document-updater-sharelatex" - JENKINS_WORKFLOW = "document-updater-sharelatex" + GIT_PROJECT = "document-updater" + JENKINS_WORKFLOW = "document-updater" TARGET_URL = "${env.JENKINS_URL}blue/organizations/jenkins/${JENKINS_WORKFLOW}/detail/$BRANCH_NAME/$BUILD_NUMBER/pipeline" - GIT_API_URL = "https://api.github.com/repos/sharelatex/${GIT_PROJECT}/statuses/$GIT_COMMIT" + GIT_API_URL = "https://api.github.com/repos/overleaf/${GIT_PROJECT}/statuses/$GIT_COMMIT" } triggers { From e57741cb80185c2b5a2bcdfc5055fe5bb126e41b Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Tue, 7 May 2019 15:46:30 +0100 Subject: [PATCH 4/6] stub out project flusher for unit tests --- .../test/unit/coffee/HttpController/HttpControllerTests.coffee | 1 + 1 file changed, 1 insertion(+) diff --git a/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee b/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee index 15b9142647..d7a27db854 100644 --- a/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee +++ b/services/document-updater/test/unit/coffee/HttpController/HttpControllerTests.coffee @@ -13,6 +13,7 @@ describe "HttpController", -> flushProjectChangesAsync: sinon.stub() "./ProjectManager": @ProjectManager = {} "logger-sharelatex" : @logger = { log: sinon.stub() } + "./ProjectFlusher": {flushAllProjects:->} "./Metrics": @Metrics = {} "./Errors" : Errors @Metrics.Timer = class Timer From d316f172bf55d7c9691ef3e5e308af5787c50ff0 Mon Sep 17 00:00:00 2001 From: Tim Alby Date: Tue, 7 May 2019 16:44:56 +0200 Subject: [PATCH 5/6] update repo URL for Jenkins --- services/document-updater/Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/document-updater/Jenkinsfile b/services/document-updater/Jenkinsfile index 9ec298487b..9abbdc917b 100644 --- a/services/document-updater/Jenkinsfile +++ b/services/document-updater/Jenkinsfile @@ -4,10 +4,10 @@ pipeline { agent any environment { - GIT_PROJECT = "document-updater-sharelatex" - JENKINS_WORKFLOW = "document-updater-sharelatex" + GIT_PROJECT = "document-updater" + JENKINS_WORKFLOW = "document-updater-sharelatex-internal" TARGET_URL = "${env.JENKINS_URL}blue/organizations/jenkins/${JENKINS_WORKFLOW}/detail/$BRANCH_NAME/$BUILD_NUMBER/pipeline" - GIT_API_URL = "https://api.github.com/repos/sharelatex/${GIT_PROJECT}/statuses/$GIT_COMMIT" + GIT_API_URL = "https://api.github.com/repos/overleaf/${GIT_PROJECT}/statuses/$GIT_COMMIT" } triggers { From 8b40da701ee49d3ca4e30a45f97d7718ba287270 Mon Sep 17 00:00:00 2001 From: Tim Alby Date: Tue, 7 May 2019 16:44:48 +0200 Subject: [PATCH 6/6] update README - remove build status badge - change app name - update copyright notice - update links --- services/document-updater/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/services/document-updater/README.md b/services/document-updater/README.md index f86dcda412..f9cf7c76bb 100644 --- a/services/document-updater/README.md +++ b/services/document-updater/README.md @@ -1,13 +1,11 @@ -document-updater-sharelatex +overleaf/document-updater =========================== An API for applying incoming updates to documents in real-time. -[![Build Status](https://travis-ci.org/sharelatex/document-updater-sharelatex.png?branch=master)](https://travis-ci.org/sharelatex/document-updater-sharelatex) - License ------- The code in this repository is released under the GNU AFFERO GENERAL PUBLIC LICENSE, version 3. A copy can be found in the `LICENSE` file. -Copyright (c) ShareLaTeX, 2014. +Copyright (c) Overleaf, 2014-2019.