From 3a4c5a0d0f3799d554ef9403b2c5f8d69a552de7 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Mon, 3 Feb 2025 11:23:59 +0000 Subject: [PATCH] [history-v1] add readOnly lookup for raw chunks (#23318) * [history-v1] add readOnly lookup for raw chunks Co-authored-by: Eric Mc Sween * [history-v1] reduce min poolsize for readOnly pool to 0 Co-authored-by: Brian Gough --------- Co-authored-by: Eric Mc Sween Co-authored-by: Brian Gough GitOrigin-RevId: a711c4ee4f3ea3775bd090e620d1ef52689fa1f4 --- .../config/custom-environment-variables.json | 1 + .../history-v1/api/controllers/projects.js | 3 ++- services/history-v1/api/swagger/projects.js | 7 +++++++ .../config/custom-environment-variables.json | 1 + services/history-v1/config/test.json | 1 + services/history-v1/docker-compose.ci.yml | 2 ++ services/history-v1/docker-compose.yml | 2 ++ .../storage/lib/chunk_store/index.js | 6 ++++-- .../storage/lib/chunk_store/mongo.js | 15 ++++++++++++--- .../storage/lib/chunk_store/postgres.js | 9 +++++++-- .../history-v1/storage/lib/knex_read_only.js | 19 +++++++++++++++++++ .../test/acceptance/js/api/projects.test.js | 17 +++++++++++++++++ .../acceptance/js/storage/chunk_store.test.js | 13 ++++++++++++- .../pg-init/set-up-readOnly-user.sql | 2 ++ .../app/js/HistoryStoreManager.js | 7 +++++-- .../scripts/bulk_resync_file_fix_up.mjs | 3 ++- 16 files changed, 96 insertions(+), 12 deletions(-) create mode 100644 services/history-v1/storage/lib/knex_read_only.js create mode 100644 services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql diff --git a/server-ce/config/custom-environment-variables.json b/server-ce/config/custom-environment-variables.json index f666065b4c..450e92fed9 100644 --- a/server-ce/config/custom-environment-variables.json +++ b/server-ce/config/custom-environment-variables.json @@ -1,5 +1,6 @@ { "databaseUrl": "HISTORY_CONNECTION_STRING", + "databaseUrlReadOnly": "HISTORY_FOLLOWER_CONNECTION_STRING", "herokuDatabaseUrl": "DATABASE_URL", "databasePoolMin": "DATABASE_POOL_MIN", "databasePoolMax": "DATABASE_POOL_MAX", diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js index 80e4926aa8..7dbeb53fae 100644 --- a/services/history-v1/api/controllers/projects.js +++ b/services/history-v1/api/controllers/projects.js @@ -88,9 +88,10 @@ async function getLatestHistory(req, res, next) { async function getLatestHistoryRaw(req, res, next) { const projectId = req.swagger.params.project_id.value + const readOnly = req.swagger.params.readOnly.value try { const { startVersion, endVersion, endTimestamp } = - await chunkStore.loadLatestRaw(projectId) + await chunkStore.loadLatestRaw(projectId, { readOnly }) res.json({ startVersion, endVersion, diff --git a/services/history-v1/api/swagger/projects.js b/services/history-v1/api/swagger/projects.js index 1cea9a2ec3..cd4d2338fa 100644 --- a/services/history-v1/api/swagger/projects.js +++ b/services/history-v1/api/swagger/projects.js @@ -335,6 +335,13 @@ exports.paths = { required: true, type: 'string', }, + { + name: 'readOnly', + in: 'query', + description: 'use read only database connection', + required: false, + type: 'boolean', + }, ], responses: { 200: { diff --git a/services/history-v1/config/custom-environment-variables.json b/services/history-v1/config/custom-environment-variables.json index 15c0a9dc01..fc79743354 100644 --- a/services/history-v1/config/custom-environment-variables.json +++ b/services/history-v1/config/custom-environment-variables.json @@ -1,5 +1,6 @@ { "databaseUrl": "HISTORY_CONNECTION_STRING", + "databaseUrlReadOnly": "HISTORY_FOLLOWER_CONNECTION_STRING", "herokuDatabaseUrl": "DATABASE_URL", "databasePoolMin": "DATABASE_POOL_MIN", "databasePoolMax": "DATABASE_POOL_MAX", diff --git a/services/history-v1/config/test.json b/services/history-v1/config/test.json index ab192b0a92..d9b91ffbc0 100644 --- a/services/history-v1/config/test.json +++ b/services/history-v1/config/test.json @@ -1,5 +1,6 @@ { "databaseUrl": "postgres://overleaf:overleaf@postgres/overleaf-history-v1-test", + "databaseUrlReadOnly": "postgres://read_only:password@postgres/overleaf-history-v1-test", "persistor": { "backend": "gcs", "gcs": { diff --git a/services/history-v1/docker-compose.ci.yml b/services/history-v1/docker-compose.ci.yml index 0e410e99c4..576f598b11 100644 --- a/services/history-v1/docker-compose.ci.yml +++ b/services/history-v1/docker-compose.ci.yml @@ -72,6 +72,8 @@ services: POSTGRES_USER: overleaf POSTGRES_PASSWORD: overleaf POSTGRES_DB: overleaf-history-v1-test + volumes: + - ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/ healthcheck: test: pg_isready --quiet interval: 1s diff --git a/services/history-v1/docker-compose.yml b/services/history-v1/docker-compose.yml index 8aa8594cb5..26a99e8be1 100644 --- a/services/history-v1/docker-compose.yml +++ b/services/history-v1/docker-compose.yml @@ -80,6 +80,8 @@ services: POSTGRES_USER: overleaf POSTGRES_PASSWORD: overleaf POSTGRES_DB: overleaf-history-v1-test + volumes: + - ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/ healthcheck: test: pg_isready --host=localhost --quiet interval: 1s diff --git a/services/history-v1/storage/lib/chunk_store/index.js b/services/history-v1/storage/lib/chunk_store/index.js index 381854b45f..f449979595 100644 --- a/services/history-v1/storage/lib/chunk_store/index.js +++ b/services/history-v1/storage/lib/chunk_store/index.js @@ -82,13 +82,15 @@ async function lazyLoadHistoryFiles(history, batchBlobStore) { * Load the latest Chunk stored for a project, including blob metadata. * * @param {string} projectId + * @param {Object} [opts] + * @param {boolean} [opts.readOnly] * @return {Promise<{id: string, startVersion: number, endVersion: number, endTimestamp: Date}>} */ -async function loadLatestRaw(projectId) { +async function loadLatestRaw(projectId, opts) { assert.projectId(projectId, 'bad projectId') const backend = getBackend(projectId) - const chunkRecord = await backend.getLatestChunk(projectId) + const chunkRecord = await backend.getLatestChunk(projectId, opts) if (chunkRecord == null) { throw new Chunk.NotFoundError(projectId) } diff --git a/services/history-v1/storage/lib/chunk_store/mongo.js b/services/history-v1/storage/lib/chunk_store/mongo.js index 375bb9df08..a49da744ed 100644 --- a/services/history-v1/storage/lib/chunk_store/mongo.js +++ b/services/history-v1/storage/lib/chunk_store/mongo.js @@ -1,4 +1,4 @@ -const { ObjectId } = require('mongodb') +const { ObjectId, ReadPreference } = require('mongodb') const { Chunk } = require('overleaf-editor-core') const OError = require('@overleaf/o-error') const assert = require('../assert') @@ -9,13 +9,22 @@ const DUPLICATE_KEY_ERROR_CODE = 11000 /** * Get the latest chunk's metadata from the database + * @param {string} projectId + * @param {Object} [opts] + * @param {boolean} [opts.readOnly] */ -async function getLatestChunk(projectId) { +async function getLatestChunk(projectId, opts = {}) { assert.mongoId(projectId, 'bad projectId') + const { readOnly = false } = opts const record = await mongodb.chunks.findOne( { projectId: new ObjectId(projectId), state: 'active' }, - { sort: { startVersion: -1 } } + { + sort: { startVersion: -1 }, + readPreference: readOnly + ? ReadPreference.secondaryPreferred + : ReadPreference.primary, + } ) if (record == null) { return null diff --git a/services/history-v1/storage/lib/chunk_store/postgres.js b/services/history-v1/storage/lib/chunk_store/postgres.js index 78b0326ee0..0f2b569260 100644 --- a/services/history-v1/storage/lib/chunk_store/postgres.js +++ b/services/history-v1/storage/lib/chunk_store/postgres.js @@ -1,18 +1,23 @@ const { Chunk } = require('overleaf-editor-core') const assert = require('../assert') const knex = require('../knex') +const knexReadOnly = require('../knex_read_only') const { ChunkVersionConflictError } = require('./errors') const DUPLICATE_KEY_ERROR_CODE = '23505' /** * Get the latest chunk's metadata from the database + * @param {string} projectId + * @param {Object} [opts] + * @param {boolean} [opts.readOnly] */ -async function getLatestChunk(projectId) { +async function getLatestChunk(projectId, opts = {}) { projectId = parseInt(projectId, 10) assert.integer(projectId, 'bad projectId') + const { readOnly = false } = opts - const record = await knex('chunks') + const record = await (readOnly ? knexReadOnly : knex)('chunks') .where('doc_id', projectId) .orderBy('end_version', 'desc') .first() diff --git a/services/history-v1/storage/lib/knex_read_only.js b/services/history-v1/storage/lib/knex_read_only.js new file mode 100644 index 0000000000..a78c4689a4 --- /dev/null +++ b/services/history-v1/storage/lib/knex_read_only.js @@ -0,0 +1,19 @@ +'use strict' + +const config = require('config') +const knexfile = require('../../knexfile') + +const env = process.env.NODE_ENV || 'development' + +if (config.databaseUrlReadOnly) { + module.exports = require('knex')({ + ...knexfile[env], + pool: { + ...knexfile[env].pool, + min: 0, + }, + connection: config.databaseUrlReadOnly, + }) +} else { + module.exports = require('./knex') +} diff --git a/services/history-v1/test/acceptance/js/api/projects.test.js b/services/history-v1/test/acceptance/js/api/projects.test.js index 6358f8505d..7af8b0172f 100644 --- a/services/history-v1/test/acceptance/js/api/projects.test.js +++ b/services/history-v1/test/acceptance/js/api/projects.test.js @@ -138,6 +138,23 @@ describe('project controller', function () { testFiles.STRING_AB_HASH ) }) + describe('getLatestHistoryRaw', function () { + it('should handles read', async function () { + const projectId = fixtures.docs.initializedProject.id + const response = + await testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistoryRaw( + { + project_id: projectId, + readOnly: 'true', + } + ) + expect(response.body).to.deep.equal({ + startVersion: 0, + endVersion: 1, + endTimestamp: '2032-01-01T00:00:00.000Z', + }) + }) + }) }) describe('deleteProject', function () { diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store.test.js index c9c7aa44ed..68b721e1f7 100644 --- a/services/history-v1/test/acceptance/js/storage/chunk_store.test.js +++ b/services/history-v1/test/acceptance/js/storage/chunk_store.test.js @@ -69,7 +69,7 @@ describe('chunkStore', function () { await chunkStore.update(projectId, oldEndVersion, chunk) }) - it('records the correct metadata in db', async function () { + it('records the correct metadata in db readOnly=false', async function () { const raw = await chunkStore.loadLatestRaw(projectId) expect(raw).to.deep.include({ startVersion: 0, @@ -78,6 +78,17 @@ describe('chunkStore', function () { }) }) + it('records the correct metadata in db readOnly=true', async function () { + const raw = await chunkStore.loadLatestRaw(projectId, { + readOnly: true, + }) + expect(raw).to.deep.include({ + startVersion: 0, + endVersion: 2, + endTimestamp: lastChangeTimestamp, + }) + }) + it('records the correct timestamp', async function () { const chunk = await chunkStore.loadLatest(projectId) expect(chunk.getEndTimestamp()).to.deep.equal(lastChangeTimestamp) diff --git a/services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql b/services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql new file mode 100644 index 0000000000..3a1abf3b60 --- /dev/null +++ b/services/history-v1/test/acceptance/pg-init/set-up-readOnly-user.sql @@ -0,0 +1,2 @@ +CREATE USER read_only PASSWORD 'password'; +ALTER DEFAULT PRIVILEGES FOR USER overleaf IN SCHEMA public GRANT SELECT ON TABLES TO read_only; diff --git a/services/project-history/app/js/HistoryStoreManager.js b/services/project-history/app/js/HistoryStoreManager.js index 4ef67860a1..e8fd51d452 100644 --- a/services/project-history/app/js/HistoryStoreManager.js +++ b/services/project-history/app/js/HistoryStoreManager.js @@ -90,15 +90,18 @@ export function getMostRecentVersion(projectId, historyId, callback) { /** * @param {string} projectId * @param {string} historyId + * @param {Object} opts + * @param {boolean} [opts.readOnly] * @param {(error: Error, rawChunk?: { startVersion: number, endVersion: number, endTimestamp: Date}) => void} callback */ -export function getMostRecentVersionRaw(projectId, historyId, callback) { +export function getMostRecentVersionRaw(projectId, historyId, opts, callback) { const path = `projects/${historyId}/latest/history/raw` logger.debug( { projectId, historyId }, 'getting raw chunk from history service' ) - _requestHistoryService({ path, json: true }, (err, body) => { + const qs = opts.readOnly ? { readOnly: true } : {} + _requestHistoryService({ path, json: true, qs }, (err, body) => { if (err) return callback(OError.tag(err)) const { startVersion, endVersion, endTimestamp } = body callback(null, { diff --git a/services/project-history/scripts/bulk_resync_file_fix_up.mjs b/services/project-history/scripts/bulk_resync_file_fix_up.mjs index c4a590fb62..d48e25daef 100644 --- a/services/project-history/scripts/bulk_resync_file_fix_up.mjs +++ b/services/project-history/scripts/bulk_resync_file_fix_up.mjs @@ -117,7 +117,8 @@ function checkFileTreeNeedsResync(folder) { async function getLastEndTimestamp(projectId, historyId) { const raw = await HistoryStoreManager.promises.getMostRecentVersionRaw( projectId, - historyId + historyId, + { readOnly: true } ) if (!raw) throw new Error('bug: history not initialized') return raw.endTimestamp