[history-v1] add readOnly lookup for raw chunks (#23318)

* [history-v1] add readOnly lookup for raw chunks

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* [history-v1] reduce min poolsize for readOnly pool to 0

Co-authored-by: Brian Gough <brian.gough@overleaf.com>

---------

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
Co-authored-by: Brian Gough <brian.gough@overleaf.com>
GitOrigin-RevId: a711c4ee4f3ea3775bd090e620d1ef52689fa1f4
This commit is contained in:
Jakob Ackermann
2025-02-03 11:23:59 +00:00
committed by Copybot
parent 01ed0c10a0
commit 3a4c5a0d0f
16 changed files with 96 additions and 12 deletions

View File

@@ -1,5 +1,6 @@
{
"databaseUrl": "HISTORY_CONNECTION_STRING",
"databaseUrlReadOnly": "HISTORY_FOLLOWER_CONNECTION_STRING",
"herokuDatabaseUrl": "DATABASE_URL",
"databasePoolMin": "DATABASE_POOL_MIN",
"databasePoolMax": "DATABASE_POOL_MAX",

View File

@@ -88,9 +88,10 @@ async function getLatestHistory(req, res, next) {
async function getLatestHistoryRaw(req, res, next) {
const projectId = req.swagger.params.project_id.value
const readOnly = req.swagger.params.readOnly.value
try {
const { startVersion, endVersion, endTimestamp } =
await chunkStore.loadLatestRaw(projectId)
await chunkStore.loadLatestRaw(projectId, { readOnly })
res.json({
startVersion,
endVersion,

View File

@@ -335,6 +335,13 @@ exports.paths = {
required: true,
type: 'string',
},
{
name: 'readOnly',
in: 'query',
description: 'use read only database connection',
required: false,
type: 'boolean',
},
],
responses: {
200: {

View File

@@ -1,5 +1,6 @@
{
"databaseUrl": "HISTORY_CONNECTION_STRING",
"databaseUrlReadOnly": "HISTORY_FOLLOWER_CONNECTION_STRING",
"herokuDatabaseUrl": "DATABASE_URL",
"databasePoolMin": "DATABASE_POOL_MIN",
"databasePoolMax": "DATABASE_POOL_MAX",

View File

@@ -1,5 +1,6 @@
{
"databaseUrl": "postgres://overleaf:overleaf@postgres/overleaf-history-v1-test",
"databaseUrlReadOnly": "postgres://read_only:password@postgres/overleaf-history-v1-test",
"persistor": {
"backend": "gcs",
"gcs": {

View File

@@ -72,6 +72,8 @@ services:
POSTGRES_USER: overleaf
POSTGRES_PASSWORD: overleaf
POSTGRES_DB: overleaf-history-v1-test
volumes:
- ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/
healthcheck:
test: pg_isready --quiet
interval: 1s

View File

@@ -80,6 +80,8 @@ services:
POSTGRES_USER: overleaf
POSTGRES_PASSWORD: overleaf
POSTGRES_DB: overleaf-history-v1-test
volumes:
- ./test/acceptance/pg-init/:/docker-entrypoint-initdb.d/
healthcheck:
test: pg_isready --host=localhost --quiet
interval: 1s

View File

@@ -82,13 +82,15 @@ async function lazyLoadHistoryFiles(history, batchBlobStore) {
* Load the latest Chunk stored for a project, including blob metadata.
*
* @param {string} projectId
* @param {Object} [opts]
* @param {boolean} [opts.readOnly]
* @return {Promise<{id: string, startVersion: number, endVersion: number, endTimestamp: Date}>}
*/
async function loadLatestRaw(projectId) {
async function loadLatestRaw(projectId, opts) {
assert.projectId(projectId, 'bad projectId')
const backend = getBackend(projectId)
const chunkRecord = await backend.getLatestChunk(projectId)
const chunkRecord = await backend.getLatestChunk(projectId, opts)
if (chunkRecord == null) {
throw new Chunk.NotFoundError(projectId)
}

View File

@@ -1,4 +1,4 @@
const { ObjectId } = require('mongodb')
const { ObjectId, ReadPreference } = require('mongodb')
const { Chunk } = require('overleaf-editor-core')
const OError = require('@overleaf/o-error')
const assert = require('../assert')
@@ -9,13 +9,22 @@ const DUPLICATE_KEY_ERROR_CODE = 11000
/**
* Get the latest chunk's metadata from the database
* @param {string} projectId
* @param {Object} [opts]
* @param {boolean} [opts.readOnly]
*/
async function getLatestChunk(projectId) {
async function getLatestChunk(projectId, opts = {}) {
assert.mongoId(projectId, 'bad projectId')
const { readOnly = false } = opts
const record = await mongodb.chunks.findOne(
{ projectId: new ObjectId(projectId), state: 'active' },
{ sort: { startVersion: -1 } }
{
sort: { startVersion: -1 },
readPreference: readOnly
? ReadPreference.secondaryPreferred
: ReadPreference.primary,
}
)
if (record == null) {
return null

View File

@@ -1,18 +1,23 @@
const { Chunk } = require('overleaf-editor-core')
const assert = require('../assert')
const knex = require('../knex')
const knexReadOnly = require('../knex_read_only')
const { ChunkVersionConflictError } = require('./errors')
const DUPLICATE_KEY_ERROR_CODE = '23505'
/**
* Get the latest chunk's metadata from the database
* @param {string} projectId
* @param {Object} [opts]
* @param {boolean} [opts.readOnly]
*/
async function getLatestChunk(projectId) {
async function getLatestChunk(projectId, opts = {}) {
projectId = parseInt(projectId, 10)
assert.integer(projectId, 'bad projectId')
const { readOnly = false } = opts
const record = await knex('chunks')
const record = await (readOnly ? knexReadOnly : knex)('chunks')
.where('doc_id', projectId)
.orderBy('end_version', 'desc')
.first()

View File

@@ -0,0 +1,19 @@
'use strict'
const config = require('config')
const knexfile = require('../../knexfile')
const env = process.env.NODE_ENV || 'development'
if (config.databaseUrlReadOnly) {
module.exports = require('knex')({
...knexfile[env],
pool: {
...knexfile[env].pool,
min: 0,
},
connection: config.databaseUrlReadOnly,
})
} else {
module.exports = require('./knex')
}

View File

@@ -138,6 +138,23 @@ describe('project controller', function () {
testFiles.STRING_AB_HASH
)
})
describe('getLatestHistoryRaw', function () {
it('should handles read', async function () {
const projectId = fixtures.docs.initializedProject.id
const response =
await testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistoryRaw(
{
project_id: projectId,
readOnly: 'true',
}
)
expect(response.body).to.deep.equal({
startVersion: 0,
endVersion: 1,
endTimestamp: '2032-01-01T00:00:00.000Z',
})
})
})
})
describe('deleteProject', function () {

View File

@@ -69,7 +69,7 @@ describe('chunkStore', function () {
await chunkStore.update(projectId, oldEndVersion, chunk)
})
it('records the correct metadata in db', async function () {
it('records the correct metadata in db readOnly=false', async function () {
const raw = await chunkStore.loadLatestRaw(projectId)
expect(raw).to.deep.include({
startVersion: 0,
@@ -78,6 +78,17 @@ describe('chunkStore', function () {
})
})
it('records the correct metadata in db readOnly=true', async function () {
const raw = await chunkStore.loadLatestRaw(projectId, {
readOnly: true,
})
expect(raw).to.deep.include({
startVersion: 0,
endVersion: 2,
endTimestamp: lastChangeTimestamp,
})
})
it('records the correct timestamp', async function () {
const chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getEndTimestamp()).to.deep.equal(lastChangeTimestamp)

View File

@@ -0,0 +1,2 @@
CREATE USER read_only PASSWORD 'password';
ALTER DEFAULT PRIVILEGES FOR USER overleaf IN SCHEMA public GRANT SELECT ON TABLES TO read_only;

View File

@@ -90,15 +90,18 @@ export function getMostRecentVersion(projectId, historyId, callback) {
/**
* @param {string} projectId
* @param {string} historyId
* @param {Object} opts
* @param {boolean} [opts.readOnly]
* @param {(error: Error, rawChunk?: { startVersion: number, endVersion: number, endTimestamp: Date}) => void} callback
*/
export function getMostRecentVersionRaw(projectId, historyId, callback) {
export function getMostRecentVersionRaw(projectId, historyId, opts, callback) {
const path = `projects/${historyId}/latest/history/raw`
logger.debug(
{ projectId, historyId },
'getting raw chunk from history service'
)
_requestHistoryService({ path, json: true }, (err, body) => {
const qs = opts.readOnly ? { readOnly: true } : {}
_requestHistoryService({ path, json: true, qs }, (err, body) => {
if (err) return callback(OError.tag(err))
const { startVersion, endVersion, endTimestamp } = body
callback(null, {

View File

@@ -117,7 +117,8 @@ function checkFileTreeNeedsResync(folder) {
async function getLastEndTimestamp(projectId, historyId) {
const raw = await HistoryStoreManager.promises.getMostRecentVersionRaw(
projectId,
historyId
historyId,
{ readOnly: true }
)
if (!raw) throw new Error('bug: history not initialized')
return raw.endTimestamp