From 27bd613580025d90e5d335bdc90f50ce79e3fdb5 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 14 Apr 2025 09:06:43 +0100 Subject: [PATCH] Merge pull request #24768 from overleaf/bg-history-redis-buffer test redis caching when loading latest chunk in history-v1 GitOrigin-RevId: f0ee09e5e9e1d7605e228913cb8539be4134e1f7 --- services/history-v1/storage/index.js | 1 + services/history-v1/storage/lib/assert.js | 31 +- .../storage/lib/blob_store/postgres.js | 10 +- .../storage/lib/chunk_store/index.js | 13 +- .../storage/lib/chunk_store/postgres.js | 20 +- .../storage/lib/chunk_store/redis.js | 188 ++++++ services/history-v1/storage/lib/redis.js | 19 + .../history-v1/storage/scripts/backup.mjs | 9 + .../storage/scripts/backup_blob.mjs | 2 + services/history-v1/storage/scripts/show.mjs | 4 + .../scripts/verify_backed_up_blobs.mjs | 4 + .../storage/scripts/verify_project.mjs | 2 + .../scripts/verify_sampled_projects.mjs | 2 + .../test/acceptance/js/storage/assert.test.js | 248 +++++++ .../js/storage/blob_store_postgres.test.js | 10 +- .../chunk_store_postgres_backend.test.js | 18 +- .../storage/chunk_store_redis_backend.test.js | 606 ++++++++++++++++++ .../acceptance/js/storage/support/cleanup.js | 9 +- services/history-v1/test/setup.js | 3 +- 19 files changed, 1161 insertions(+), 38 deletions(-) create mode 100644 services/history-v1/storage/lib/chunk_store/redis.js create mode 100644 services/history-v1/storage/lib/redis.js create mode 100644 services/history-v1/test/acceptance/js/storage/assert.test.js create mode 100644 services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js diff --git a/services/history-v1/storage/index.js b/services/history-v1/storage/index.js index 238cd12a38..9e2f7a61dc 100644 --- a/services/history-v1/storage/index.js +++ b/services/history-v1/storage/index.js @@ -5,6 +5,7 @@ exports.chunkStore = require('./lib/chunk_store') exports.historyStore = require('./lib/history_store').historyStore exports.knex = require('./lib/knex') exports.mongodb = require('./lib/mongodb') +exports.redis = require('./lib/redis') exports.persistChanges = require('./lib/persist_changes') exports.persistor = require('./lib/persistor') exports.ProjectArchive = require('./lib/project_archive') diff --git a/services/history-v1/storage/lib/assert.js b/services/history-v1/storage/lib/assert.js index d0ce318b4d..676b1e51ac 100644 --- a/services/history-v1/storage/lib/assert.js +++ b/services/history-v1/storage/lib/assert.js @@ -1,5 +1,7 @@ 'use strict' +const OError = require('@overleaf/o-error') + const check = require('check-types') const { Blob } = require('overleaf-editor-core') @@ -14,15 +16,23 @@ function transaction(transaction, message) { } function blobHash(arg, message) { - assert.match(arg, Blob.HEX_HASH_RX, message) + try { + assert.match(arg, Blob.HEX_HASH_RX, message) + } catch (error) { + throw OError.tag(error, message, { arg }) + } } /** - * A chunk id is a string that contains either an integer (for projects stored in Postgres) or 24 + * A project id is a string that contains either an integer (for projects stored in Postgres) or 24 * hex digits (for projects stored in Mongo) */ function projectId(arg, message) { - assert.match(arg, PROJECT_ID_REGEXP, message) + try { + assert.match(arg, PROJECT_ID_REGEXP, message) + } catch (error) { + throw OError.tag(error, message, { arg }) + } } /** @@ -32,16 +42,25 @@ function projectId(arg, message) { function chunkId(arg, message) { const valid = check.integer(arg) || check.match(arg, MONGO_ID_REGEXP) if (!valid) { - throw new TypeError(message) + const error = new TypeError(message) + throw OError.tag(error, message, { arg }) } } function mongoId(arg, message) { - assert.match(arg, MONGO_ID_REGEXP) + try { + assert.match(arg, MONGO_ID_REGEXP, message) + } catch (error) { + throw OError.tag(error, message, { arg }) + } } function postgresId(arg, message) { - assert.match(arg, POSTGRES_ID_REGEXP, message) + try { + assert.match(arg, POSTGRES_ID_REGEXP, message) + } catch (error) { + throw OError.tag(error, message, { arg }) + } } module.exports = { diff --git a/services/history-v1/storage/lib/blob_store/postgres.js b/services/history-v1/storage/lib/blob_store/postgres.js index 7f66d2d24d..1cedeec5d7 100644 --- a/services/history-v1/storage/lib/blob_store/postgres.js +++ b/services/history-v1/storage/lib/blob_store/postgres.js @@ -13,7 +13,7 @@ async function initialize(projectId) { * Return blob metadata for the given project and hash */ async function findBlob(projectId, hash) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) assert.blobHash(hash, 'bad hash') @@ -35,7 +35,7 @@ async function findBlob(projectId, hash) { * @return {Promise.>} no guarantee on order */ async function findBlobs(projectId, hashes) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) assert.array(hashes, 'bad hashes: not array') hashes.forEach(function (hash) { @@ -57,7 +57,7 @@ async function findBlobs(projectId, hashes) { * Return metadata for all blobs in the given project */ async function getProjectBlobs(projectId) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) const records = await knex('project_blobs') @@ -103,7 +103,7 @@ async function getProjectBlobsBatch(projectIds) { * Add a blob's metadata to the blobs table after it has been uploaded. */ async function insertBlob(projectId, blob) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) await knex('project_blobs') @@ -116,7 +116,7 @@ async function insertBlob(projectId, blob) { * Deletes all blobs for a given project */ async function deleteBlobs(projectId) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) await knex('project_blobs').where('project_id', projectId).delete() diff --git a/services/history-v1/storage/lib/chunk_store/index.js b/services/history-v1/storage/lib/chunk_store/index.js index c1fbb9d607..bae99f020b 100644 --- a/services/history-v1/storage/lib/chunk_store/index.js +++ b/services/history-v1/storage/lib/chunk_store/index.js @@ -30,6 +30,7 @@ const { BlobStore } = require('../blob_store') const { historyStore } = require('../history_store') const mongoBackend = require('./mongo') const postgresBackend = require('./postgres') +const redisBackend = require('./redis') const { ChunkVersionConflictError } = require('./errors') const DEFAULT_DELETE_BATCH_SIZE = parseInt(config.get('maxDeleteKeys'), 10) @@ -104,13 +105,23 @@ async function loadLatestRaw(projectId, opts) { * @return {Promise.} */ async function loadLatest(projectId) { + // Test out the redis caching backend - not in use yet + const cachedChunk = await redisBackend.getCurrentChunk(projectId) const chunkRecord = await loadLatestRaw(projectId) const rawHistory = await historyStore.loadRaw(projectId, chunkRecord.id) const history = History.fromRaw(rawHistory) const blobStore = new BlobStore(projectId) const batchBlobStore = new BatchBlobStore(blobStore) await lazyLoadHistoryFiles(history, batchBlobStore) - return new Chunk(history, chunkRecord.startVersion) + const chunk = new Chunk(history, chunkRecord.startVersion) + // if the cached chunk is no longer valid, update it + const cachedChunkIsValid = redisBackend.checkCacheValidity(cachedChunk, chunk) + if (!cachedChunkIsValid) { + await redisBackend.setCurrentChunk(projectId, chunk) + } else { + await redisBackend.compareChunks(projectId, cachedChunk, chunk) + } + return chunk } /** diff --git a/services/history-v1/storage/lib/chunk_store/postgres.js b/services/history-v1/storage/lib/chunk_store/postgres.js index 0964b0ecca..b89b18d87a 100644 --- a/services/history-v1/storage/lib/chunk_store/postgres.js +++ b/services/history-v1/storage/lib/chunk_store/postgres.js @@ -14,7 +14,7 @@ const DUPLICATE_KEY_ERROR_CODE = '23505' * @param {boolean} [opts.readOnly] */ async function getLatestChunk(projectId, opts = {}) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) const { readOnly = false } = opts @@ -32,7 +32,7 @@ async function getLatestChunk(projectId, opts = {}) { * Get the metadata for the chunk that contains the given version. */ async function getChunkForVersion(projectId, version) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) const record = await knex('chunks') @@ -104,7 +104,7 @@ async function getLastActiveChunkBeforeTimestamp(projectId, timestamp) { * the given timestamp. */ async function getChunkForTimestamp(projectId, timestamp) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) // This query will find the latest chunk after the timestamp (query orders @@ -148,7 +148,7 @@ function chunkFromRecord(record) { * Get all of a project's chunk ids */ async function getProjectChunkIds(projectId) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) const records = await knex('chunks').select('id').where('doc_id', projectId) @@ -159,7 +159,7 @@ async function getProjectChunkIds(projectId) { * Get all of a projects chunks directly */ async function getProjectChunks(projectId) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) const records = await knex('chunks') @@ -173,7 +173,7 @@ async function getProjectChunks(projectId) { * Insert a pending chunk before sending it to object storage. */ async function insertPendingChunk(projectId, chunk) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) const result = await knex.first( @@ -199,7 +199,7 @@ async function confirmCreate( chunkId, earliestChangeTimestamp ) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) await knex.transaction(async tx => { @@ -221,7 +221,7 @@ async function confirmUpdate( newChunkId, earliestChangeTimestamp ) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) await knex.transaction(async tx => { @@ -273,7 +273,7 @@ async function _insertChunk(tx, projectId, chunk, chunkId) { * @return {Promise} */ async function deleteChunk(projectId, chunkId) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) assert.integer(chunkId, 'bad chunkId') @@ -284,7 +284,7 @@ async function deleteChunk(projectId, chunkId) { * Delete all of a project's chunks */ async function deleteProjectChunks(projectId) { - assert.postgresId(projectId, `bad projectId ${projectId}`) + assert.postgresId(projectId, 'bad projectId') projectId = parseInt(projectId, 10) await knex.transaction(async tx => { diff --git a/services/history-v1/storage/lib/chunk_store/redis.js b/services/history-v1/storage/lib/chunk_store/redis.js new file mode 100644 index 0000000000..0dd146f89b --- /dev/null +++ b/services/history-v1/storage/lib/chunk_store/redis.js @@ -0,0 +1,188 @@ +const metrics = require('@overleaf/metrics') +const logger = require('@overleaf/logger') +const redis = require('../redis') +const rclient = redis.rclientHistory // +const { Snapshot, Change, History, Chunk } = require('overleaf-editor-core') + +const TEMPORARY_CACHE_LIFETIME = 300 // 5 minutes + +const keySchema = { + snapshot({ projectId }) { + return `snapshot:{${projectId}}` + }, + startVersion({ projectId }) { + return `snapshot-version:{${projectId}}` + }, + changes({ projectId }) { + return `changes:{${projectId}}` + }, +} + +rclient.defineCommand('get_current_chunk', { + numberOfKeys: 3, + lua: ` + local startVersionValue = redis.call('GET', KEYS[2]) + if not startVersionValue then + return nil -- this is a cache-miss + end + local snapshotValue = redis.call('GET', KEYS[1]) + local changesValues = redis.call('LRANGE', KEYS[3], 0, -1) + return {snapshotValue, startVersionValue, changesValues} + `, +}) + +/** + * Retrieves the current chunk of project history from Redis storage + * @param {string} projectId - The unique identifier of the project + * @returns {Promise} A Promise that resolves to a Chunk object containing project history, + * or null if retrieval fails + * @throws {Error} If Redis operations fail + */ +async function getCurrentChunk(projectId) { + try { + const result = await rclient.get_current_chunk( + keySchema.snapshot({ projectId }), + keySchema.startVersion({ projectId }), + keySchema.changes({ projectId }) + ) + if (!result) { + return null // cache-miss + } + const snapshot = Snapshot.fromRaw(JSON.parse(result[0])) + const startVersion = JSON.parse(result[1]) + const changes = result[2].map(c => Change.fromRaw(JSON.parse(c))) + const history = new History(snapshot, changes) + const chunk = new Chunk(history, startVersion) + metrics.inc('chunk_store.redis.get_current_chunk', 1, { status: 'success' }) + return chunk + } catch (err) { + logger.error({ err, projectId }, 'error getting current chunk from redis') + metrics.inc('chunk_store.redis.get_current_chunk', 1, { status: 'error' }) + return null + } +} + +rclient.defineCommand('get_current_chunk_metadata', { + numberOfKeys: 2, + lua: ` + local startVersionValue = redis.call('GET', KEYS[1]) + local changesCount = redis.call('LLEN', KEYS[2]) + return {startVersionValue, changesCount} + `, +}) + +/** + * Retrieves the current chunk metadata for a given project from Redis + * @param {string} projectId - The ID of the project to get metadata for + * @returns {Promise} Object containing startVersion and changesCount if found, null on error or cache miss + * @property {number} startVersion - The starting version information + * @property {number} changesCount - The number of changes in the chunk + */ +async function getCurrentChunkMetadata(projectId) { + try { + const result = await rclient.get_current_chunk_metadata( + keySchema.startVersion({ projectId }), + keySchema.changes({ projectId }) + ) + if (!result) { + return null // cache-miss + } + const startVersion = JSON.parse(result[0]) + const changesCount = parseInt(result[1], 10) + return { startVersion, changesCount } + } catch (err) { + return null + } +} + +rclient.defineCommand('set_current_chunk', { + numberOfKeys: 3, + lua: ` + local snapshotValue = ARGV[1] + local startVersionValue = ARGV[2] + redis.call('SETEX', KEYS[1], ${TEMPORARY_CACHE_LIFETIME}, snapshotValue) + redis.call('SETEX', KEYS[2], ${TEMPORARY_CACHE_LIFETIME}, startVersionValue) + redis.call('DEL', KEYS[3]) -- clear the old changes list + if #ARGV >= 3 then + redis.call('RPUSH', KEYS[3], unpack(ARGV, 3)) + redis.call('EXPIRE', KEYS[3], ${TEMPORARY_CACHE_LIFETIME}) + end + `, +}) + +/** + * Stores the current chunk of project history in Redis + * @param {string} projectId - The ID of the project + * @param {Chunk} chunk - The chunk object containing history data + * @returns {Promise<*>} Returns the result of the Redis operation, or null if an error occurs + * @throws {Error} May throw Redis-related errors which are caught internally + */ +async function setCurrentChunk(projectId, chunk) { + try { + const snapshotKey = keySchema.snapshot({ projectId }) + const startVersionKey = keySchema.startVersion({ projectId }) + const changesKey = keySchema.changes({ projectId }) + + const snapshot = chunk.history.snapshot + const startVersion = chunk.startVersion + const changes = chunk.history.changes + + await rclient.set_current_chunk( + snapshotKey, + startVersionKey, + changesKey, + JSON.stringify(snapshot.toRaw()), + startVersion, + ...changes.map(c => JSON.stringify(c.toRaw())) + ) + metrics.inc('chunk_store.redis.set_current_chunk', 1, { status: 'success' }) + } catch (err) { + logger.error( + { err, projectId, chunk }, + 'error setting current chunk inredis' + ) + metrics.inc('chunk_store.redis.set_current_chunk', 1, { status: 'error' }) + return null // while testing we will suppress any errors + } +} + +/** + * Checks whether a cached chunk's version metadata matches the current chunk's metadata + * @param {Chunk}} cachedChunk - The chunk retrieved from cache + * @param {Chunk} currentChunk - The current chunk to compare against + * @returns {boolean} - Returns true if the chunks have matching start and end versions, false otherwise + */ +function checkCacheValidity(cachedChunk, currentChunk) { + return Boolean( + cachedChunk && + cachedChunk.getStartVersion() === currentChunk.getStartVersion() && + cachedChunk.getEndVersion() === currentChunk.getEndVersion() + ) +} + +/** + * Compares two chunks for equality using stringified JSON comparison + * @param {string} projectId - The ID of the project + * @param {Chunk} cachedChunk - The cached chunk to compare + * @param {Chunk} currentChunk - The current chunk to compare against + * @returns {boolean} - Returns false if either chunk is null/undefined, otherwise returns the comparison result + */ +function compareChunks(projectId, cachedChunk, currentChunk) { + if (!cachedChunk || !currentChunk) { + return false + } + const identical = JSON.stringify(cachedChunk) === JSON.stringify(currentChunk) + logger.error({ projectId }, 'chunk cache mismatch') + metrics.inc('chunk_store.redis.compare_chunks', 1, { + status: identical ? 'success' : 'fail', + }) + return identical +} + +module.exports = { + getCurrentChunk, + setCurrentChunk, + getCurrentChunkMetadata, + checkCacheValidity, + compareChunks, +} diff --git a/services/history-v1/storage/lib/redis.js b/services/history-v1/storage/lib/redis.js new file mode 100644 index 0000000000..9b00cc0a26 --- /dev/null +++ b/services/history-v1/storage/lib/redis.js @@ -0,0 +1,19 @@ +const config = require('config') +const redis = require('@overleaf/redis-wrapper') + +const historyRedisOptions = config.get('redis.history') +const rclientHistory = redis.createClient(historyRedisOptions) + +const lockRedisOptions = config.get('redis.history') +const rclientLock = redis.createClient(lockRedisOptions) + +async function disconnect() { + await Promise.all([rclientHistory.disconnect(), rclientLock.disconnect()]) +} + +module.exports = { + rclientHistory, + rclientLock, + redis, + disconnect, +} diff --git a/services/history-v1/storage/scripts/backup.mjs b/services/history-v1/storage/scripts/backup.mjs index 474192dc74..9ae6101105 100644 --- a/services/history-v1/storage/scripts/backup.mjs +++ b/services/history-v1/storage/scripts/backup.mjs @@ -9,6 +9,7 @@ import { create, } from '../lib/chunk_store/index.js' import { client } from '../lib/mongodb.js' +import redis from '../lib/redis.js' import knex from '../lib/knex.js' import { historyStore } from '../lib/history_store.js' import pLimit from 'p-limit' @@ -1091,5 +1092,13 @@ if (import.meta.url === `file://${process.argv[1]}`) { .catch(err => { console.error('Error closing MongoDB connection:', err) }) + redis + .disconnect() + .then(() => { + console.log('Redis connection closed') + }) + .catch(err => { + console.error('Error closing Redis connection:', err) + }) }) } diff --git a/services/history-v1/storage/scripts/backup_blob.mjs b/services/history-v1/storage/scripts/backup_blob.mjs index 2a777d0074..314b05313e 100644 --- a/services/history-v1/storage/scripts/backup_blob.mjs +++ b/services/history-v1/storage/scripts/backup_blob.mjs @@ -10,6 +10,7 @@ import { import assert from '../lib/assert.js' import knex from '../lib/knex.js' import { client } from '../lib/mongodb.js' +import redis from '../lib/redis.js' import { setTimeout } from 'node:timers/promises' import fs from 'node:fs' @@ -23,6 +24,7 @@ async function gracefulShutdown() { console.log('Gracefully shutting down') await knex.destroy() await client.close() + await redis.disconnect() await setTimeout(100) process.exit() } diff --git a/services/history-v1/storage/scripts/show.mjs b/services/history-v1/storage/scripts/show.mjs index 04ec6c61a8..b4ae1664e3 100644 --- a/services/history-v1/storage/scripts/show.mjs +++ b/services/history-v1/storage/scripts/show.mjs @@ -6,6 +6,7 @@ import { } from '../lib/chunk_store/index.js' import { client } from '../lib/mongodb.js' import knex from '../lib/knex.js' +import redis from '../lib/redis.js' import { loadGlobalBlobs, BlobStore, @@ -247,4 +248,7 @@ main() .finally(() => { knex.destroy().catch(err => console.error('Error closing Postgres:', err)) client.close().catch(err => console.error('Error closing MongoDB:', err)) + redis + .disconnect() + .catch(err => console.error('Error disconnecting Redis:', err)) }) diff --git a/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs b/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs index f4778c382f..257238aad4 100644 --- a/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs +++ b/services/history-v1/storage/scripts/verify_backed_up_blobs.mjs @@ -16,6 +16,7 @@ import { db, client, } from '../lib/mongodb.js' +import redis from '../lib/redis.js' import commandLineArgs from 'command-line-args' import fs from 'node:fs' @@ -146,4 +147,7 @@ main() console.error('Error closing Postgres connection:', err) }) client.close().catch(err => console.error('Error closing MongoDB:', err)) + redis.disconnect().catch(err => { + console.error('Error disconnecting Redis:', err) + }) }) diff --git a/services/history-v1/storage/scripts/verify_project.mjs b/services/history-v1/storage/scripts/verify_project.mjs index 6e1cb9de89..3c26f9b5da 100644 --- a/services/history-v1/storage/scripts/verify_project.mjs +++ b/services/history-v1/storage/scripts/verify_project.mjs @@ -2,6 +2,7 @@ import commandLineArgs from 'command-line-args' import { verifyProjectWithErrorContext } from '../lib/backupVerifier.mjs' import knex from '../lib/knex.js' import { client } from '../lib/mongodb.js' +import redis from '../lib/redis.js' import { setTimeout } from 'node:timers/promises' import { loadGlobalBlobs } from '../lib/blob_store/index.js' @@ -10,6 +11,7 @@ const { historyId } = commandLineArgs([{ name: 'historyId', type: String }]) async function gracefulShutdown(code = process.exitCode) { await knex.destroy() await client.close() + await redis.disconnect() await setTimeout(1_000) process.exit(code) } diff --git a/services/history-v1/storage/scripts/verify_sampled_projects.mjs b/services/history-v1/storage/scripts/verify_sampled_projects.mjs index e5b2d0c347..a74a8b9798 100644 --- a/services/history-v1/storage/scripts/verify_sampled_projects.mjs +++ b/services/history-v1/storage/scripts/verify_sampled_projects.mjs @@ -14,6 +14,7 @@ import { loadGlobalBlobs } from '../lib/blob_store/index.js' import { getDatesBeforeRPO } from '../../backupVerifier/utils.mjs' import { EventEmitter } from 'node:events' import { mongodb } from '../index.js' +import redis from '../lib/redis.js' logger.logger.level('fatal') @@ -30,6 +31,7 @@ const usageMessage = [ async function gracefulShutdown(code = process.exitCode) { await knex.destroy() await client.close() + await redis.disconnect() await setTimeout(1_000) process.exit(code) } diff --git a/services/history-v1/test/acceptance/js/storage/assert.test.js b/services/history-v1/test/acceptance/js/storage/assert.test.js new file mode 100644 index 0000000000..29a38f7765 --- /dev/null +++ b/services/history-v1/test/acceptance/js/storage/assert.test.js @@ -0,0 +1,248 @@ +'use strict' + +const OError = require('@overleaf/o-error') +const { expect } = require('chai') +const assert = require('../../../../storage/lib/assert') + +describe('assert', function () { + describe('blobHash', function () { + it('should not throw for valid blob hashes', function () { + expect(() => + assert.blobHash( + 'aad321caf77ca6c5ab09e6c638c237705f93b001', + 'should be a blob hash' + ) + ).to.not.throw() + }) + + it('should throw for invalid blob hashes', function () { + try { + assert.blobHash('invalid-hash', 'should be a blob hash') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a blob hash') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-hash' }) + } + }) + + it('should throw for string integer blob hashes', function () { + try { + assert.blobHash('123', 'should be a blob hash') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a blob hash') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '123' }) + } + }) + }) + + describe('projectId', function () { + it('should not throw for valid mongo project ids', function () { + expect(() => + assert.projectId('507f1f77bcf86cd799439011', 'should be a project id') + ).to.not.throw() + }) + + it('should not throw for valid postgres project ids', function () { + expect(() => + assert.projectId('123456789', 'should be a project id') + ).to.not.throw() + }) + + it('should throw for invalid project ids', function () { + try { + assert.projectId('invalid-id', 'should be a project id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a project id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' }) + } + }) + + it('should throw for non-numeric project ids', function () { + try { + assert.projectId('12345x', 'should be a project id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a project id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345x' }) + } + }) + + it('should throw for postgres ids starting with 0', function () { + try { + assert.projectId('0123456', 'should be a project id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a project id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '0123456' }) + } + }) + }) + + describe('chunkId', function () { + it('should not throw for valid mongo chunk ids', function () { + expect(() => + assert.chunkId('507f1f77bcf86cd799439011', 'should be a chunk id') + ).to.not.throw() + }) + + it('should not throw for valid integer chunk ids', function () { + expect(() => + assert.chunkId(123456789, 'should be a chunk id') + ).to.not.throw() + }) + + it('should throw for invalid chunk ids', function () { + try { + assert.chunkId('invalid-id', 'should be a chunk id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a chunk id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' }) + } + }) + + it('should throw for string integer chunk ids', function () { + try { + assert.chunkId('12345', 'should be a chunk id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a chunk id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345' }) + } + }) + }) + + describe('mongoId', function () { + it('should not throw for valid mongo ids', function () { + expect(() => + assert.mongoId('507f1f77bcf86cd799439011', 'should be a mongo id') + ).to.not.throw() + }) + + it('should throw for invalid mongo ids', function () { + try { + assert.mongoId('invalid-id', 'should be a mongo id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a mongo id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' }) + } + }) + + it('should throw for numeric mongo ids', function () { + try { + assert.mongoId('12345', 'should be a mongo id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a mongo id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345' }) + } + }) + + it('should throw for mongo ids that are too short', function () { + try { + assert.mongoId('507f1f77bcf86cd79943901', 'should be a mongo id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a mongo id') + expect(OError.getFullInfo(error)).to.deep.equal({ + arg: '507f1f77bcf86cd79943901', + }) + } + }) + + it('should throw for mongo ids that are too long', function () { + try { + assert.mongoId('507f1f77bcf86cd7994390111', 'should be a mongo id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a mongo id') + expect(OError.getFullInfo(error)).to.deep.equal({ + arg: '507f1f77bcf86cd7994390111', + }) + } + }) + }) + + describe('postgresId', function () { + it('should not throw for valid postgres ids', function () { + expect(() => + assert.postgresId('123456789', 'should be a postgres id') + ).to.not.throw() + expect(() => + assert.postgresId('1', 'should be a postgres id') + ).to.not.throw() + }) + + it('should throw for invalid postgres ids', function () { + try { + assert.postgresId('invalid-id', 'should be a postgres id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a postgres id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' }) + } + }) + + it('should throw for postgres ids starting with 0', function () { + try { + assert.postgresId('0123456', 'should be a postgres id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a postgres id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '0123456' }) + } + }) + + it('should throw for postgres ids that are too long', function () { + try { + assert.postgresId('12345678901', 'should be a postgres id') + expect.fail() + } catch (error) { + expect(error).to.be.instanceOf(TypeError) + expect(error.message).to.equal('should be a postgres id') + expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345678901' }) + } + }) + }) + + describe('regex constants', function () { + it('MONGO_ID_REGEXP should match valid mongo ids', function () { + expect('507f1f77bcf86cd799439011').to.match(assert.MONGO_ID_REGEXP) + expect('abcdef0123456789abcdef01').to.match(assert.MONGO_ID_REGEXP) + }) + + it('MONGO_ID_REGEXP should not match invalid mongo ids', function () { + expect('invalid-id').to.not.match(assert.MONGO_ID_REGEXP) + expect('507f1f77bcf86cd79943901').to.not.match(assert.MONGO_ID_REGEXP) // too short + expect('507f1f77bcf86cd7994390111').to.not.match(assert.MONGO_ID_REGEXP) // too long + expect('507F1F77BCF86CD799439011').to.not.match(assert.MONGO_ID_REGEXP) // uppercase + }) + + it('POSTGRES_ID_REGEXP should match valid postgres ids', function () { + expect('123456789').to.match(assert.POSTGRES_ID_REGEXP) + expect('1').to.match(assert.POSTGRES_ID_REGEXP) + }) + + it('POSTGRES_ID_REGEXP should not match invalid postgres ids', function () { + expect('invalid-id').to.not.match(assert.POSTGRES_ID_REGEXP) + expect('0123456').to.not.match(assert.POSTGRES_ID_REGEXP) // starts with 0 + expect('12345678901').to.not.match(assert.POSTGRES_ID_REGEXP) // too long (> 10 digits) + }) + }) +}) diff --git a/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js b/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js index 0add4fa901..e762c33569 100644 --- a/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js +++ b/services/history-v1/test/acceptance/js/storage/blob_store_postgres.test.js @@ -8,20 +8,20 @@ describe('BlobStore postgres backend', function () { const projectId = new ObjectId().toString() await expect( postgresBackend.insertBlob(projectId, 'hash', 123, 99) - ).to.be.rejectedWith(`bad projectId ${projectId}`) + ).to.be.rejectedWith('bad projectId') }) it('deleteBlobs rejects when called with bad projectId', async function () { const projectId = new ObjectId().toString() await expect(postgresBackend.deleteBlobs(projectId)).to.be.rejectedWith( - `bad projectId ${projectId}` + 'bad projectId' ) }) it('findBlobs rejects when called with bad projectId', async function () { const projectId = new ObjectId().toString() await expect(postgresBackend.findBlobs(projectId)).to.be.rejectedWith( - `bad projectId ${projectId}` + 'bad projectId' ) }) @@ -29,14 +29,14 @@ describe('BlobStore postgres backend', function () { const projectId = new ObjectId().toString() await expect( postgresBackend.findBlob(projectId, 'hash') - ).to.be.rejectedWith(`bad projectId ${projectId}`) + ).to.be.rejectedWith('bad projectId') }) it('getProjectBlobs rejects when called with bad projectId', async function () { const projectId = new ObjectId().toString() await expect( postgresBackend.getProjectBlobs(projectId) - ).to.be.rejectedWith(`bad projectId ${projectId}`) + ).to.be.rejectedWith('bad projectId') }) }) }) diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js index 20a24de3eb..9b7ca4d763 100644 --- a/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js +++ b/services/history-v1/test/acceptance/js/storage/chunk_store_postgres_backend.test.js @@ -11,32 +11,32 @@ describe('chunk store Postgres backend', function () { const invalidProjectId = new ObjectId().toString() await expect(backend.getLatestChunk(invalidProjectId)).to.be.rejectedWith( - `bad projectId ${invalidProjectId}` + 'bad projectId' ) await expect( backend.getChunkForVersion(invalidProjectId, 1) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') await expect( backend.getChunkForTimestamp(invalidProjectId, new Date()) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') await expect( backend.getProjectChunkIds(invalidProjectId) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') await expect( backend.insertPendingChunk(invalidProjectId, makeChunk([], 0)) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') await expect( backend.confirmCreate(invalidProjectId, makeChunk([], 0), 1) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') await expect( backend.confirmUpdate(invalidProjectId, 1, makeChunk([], 0), 2) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') await expect(backend.deleteChunk(invalidProjectId, 1)).to.be.rejectedWith( - `bad projectId ${invalidProjectId}` + 'bad projectId' ) await expect( backend.deleteProjectChunks(invalidProjectId) - ).to.be.rejectedWith(`bad projectId ${invalidProjectId}`) + ).to.be.rejectedWith('bad projectId') }) }) diff --git a/services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js b/services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js new file mode 100644 index 0000000000..a4ce4f9653 --- /dev/null +++ b/services/history-v1/test/acceptance/js/storage/chunk_store_redis_backend.test.js @@ -0,0 +1,606 @@ +'use strict' + +const { expect } = require('chai') +const { + Chunk, + Snapshot, + History, + File, + AddFileOperation, + Origin, + Change, + V2DocVersions, +} = require('overleaf-editor-core') +const cleanup = require('./support/cleanup') +const redisBackend = require('../../../../storage/lib/chunk_store/redis') + +describe('chunk store Redis backend', function () { + beforeEach(cleanup.everything) + const projectId = '123456' + + describe('getCurrentChunk', function () { + it('should return null on cache miss', async function () { + const chunk = await redisBackend.getCurrentChunk(projectId) + expect(chunk).to.be.null + }) + + it('should return the cached chunk', async function () { + // Create a sample chunk + const snapshot = new Snapshot() + const changes = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date(), + [] + ), + ] + const history = new History(snapshot, changes) + const chunk = new Chunk(history, 5) // startVersion 5 + + // Cache the chunk + await redisBackend.setCurrentChunk(projectId, chunk) + + // Retrieve the cached chunk + const cachedChunk = await redisBackend.getCurrentChunk(projectId) + + expect(cachedChunk).to.not.be.null + expect(cachedChunk.getStartVersion()).to.equal(5) + expect(cachedChunk.getEndVersion()).to.equal(6) + expect(cachedChunk).to.deep.equal(chunk) + }) + }) + + describe('setCurrentChunk', function () { + it('should successfully cache a chunk', async function () { + // Create a sample chunk + const snapshot = new Snapshot() + const changes = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date(), + [] + ), + ] + const history = new History(snapshot, changes) + const chunk = new Chunk(history, 5) // startVersion 5 + + // Cache the chunk + await redisBackend.setCurrentChunk(projectId, chunk) + + // Verify the chunk was cached correctly by retrieving it + const cachedChunk = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunk).to.not.be.null + expect(cachedChunk.getStartVersion()).to.equal(5) + expect(cachedChunk.getEndVersion()).to.equal(6) + expect(cachedChunk).to.deep.equal(chunk) + + // Verify that the chunk was stored correctly using the chunk metadata + const chunkMetadata = + await redisBackend.getCurrentChunkMetadata(projectId) + expect(chunkMetadata).to.not.be.null + expect(chunkMetadata.startVersion).to.equal(5) + expect(chunkMetadata.changesCount).to.equal(1) + }) + + it('should correctly handle a chunk with zero changes', async function () { + // Create a sample chunk with no changes + const snapshot = new Snapshot() + const changes = [] + const history = new History(snapshot, changes) + const chunk = new Chunk(history, 10) // startVersion 10 + + // Cache the chunk + await redisBackend.setCurrentChunk(projectId, chunk) + + // Retrieve the cached chunk + const cachedChunk = await redisBackend.getCurrentChunk(projectId) + + expect(cachedChunk).to.not.be.null + expect(cachedChunk.getStartVersion()).to.equal(10) + expect(cachedChunk.getEndVersion()).to.equal(10) // End version should equal start version with no changes + expect(cachedChunk.history.changes.length).to.equal(0) + expect(cachedChunk).to.deep.equal(chunk) + }) + }) + + describe('updating already cached chunks', function () { + it('should replace a chunk with a longer chunk', async function () { + // Set initial chunk with one change + const snapshotA = new Snapshot() + const changesA = [ + new Change( + [ + new AddFileOperation( + 'test.tex', + File.fromString('Initial content') + ), + ], + new Date(), + [] + ), + ] + const historyA = new History(snapshotA, changesA) + const chunkA = new Chunk(historyA, 10) + + await redisBackend.setCurrentChunk(projectId, chunkA) + + // Verify the initial chunk was cached + const cachedChunkA = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkA.getStartVersion()).to.equal(10) + expect(cachedChunkA.getEndVersion()).to.equal(11) + expect(cachedChunkA.history.changes.length).to.equal(1) + + // Create a longer chunk (with more changes) + const snapshotB = new Snapshot() + const changesB = [ + new Change( + [new AddFileOperation('test1.tex', File.fromString('Content 1'))], + new Date(), + [] + ), + new Change( + [new AddFileOperation('test2.tex', File.fromString('Content 2'))], + new Date(), + [] + ), + new Change( + [new AddFileOperation('test3.tex', File.fromString('Content 3'))], + new Date(), + [] + ), + ] + const historyB = new History(snapshotB, changesB) + const chunkB = new Chunk(historyB, 15) + + // Replace the cached chunk + await redisBackend.setCurrentChunk(projectId, chunkB) + + // Verify the new chunk replaced the old one + const cachedChunkB = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkB).to.not.be.null + expect(cachedChunkB.getStartVersion()).to.equal(15) + expect(cachedChunkB.getEndVersion()).to.equal(18) + expect(cachedChunkB.history.changes.length).to.equal(3) + expect(cachedChunkB).to.deep.equal(chunkB) + + // Verify the metadata was updated + const updatedMetadata = + await redisBackend.getCurrentChunkMetadata(projectId) + expect(updatedMetadata.startVersion).to.equal(15) + expect(updatedMetadata.changesCount).to.equal(3) + }) + + it('should replace a chunk with a shorter chunk', async function () { + // Set initial chunk with three changes + const snapshotA = new Snapshot() + const changesA = [ + new Change( + [new AddFileOperation('file1.tex', File.fromString('Content 1'))], + new Date(), + [] + ), + new Change( + [new AddFileOperation('file2.tex', File.fromString('Content 2'))], + new Date(), + [] + ), + new Change( + [new AddFileOperation('file3.tex', File.fromString('Content 3'))], + new Date(), + [] + ), + ] + const historyA = new History(snapshotA, changesA) + const chunkA = new Chunk(historyA, 20) + + await redisBackend.setCurrentChunk(projectId, chunkA) + + // Verify the initial chunk was cached + const cachedChunkA = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkA.getStartVersion()).to.equal(20) + expect(cachedChunkA.getEndVersion()).to.equal(23) + expect(cachedChunkA.history.changes.length).to.equal(3) + + // Create a shorter chunk (with fewer changes) + const snapshotB = new Snapshot() + const changesB = [ + new Change( + [new AddFileOperation('new.tex', File.fromString('New content'))], + new Date(), + [] + ), + ] + const historyB = new History(snapshotB, changesB) + const chunkB = new Chunk(historyB, 30) + + // Replace the cached chunk + await redisBackend.setCurrentChunk(projectId, chunkB) + + // Verify the new chunk replaced the old one + const cachedChunkB = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkB).to.not.be.null + expect(cachedChunkB.getStartVersion()).to.equal(30) + expect(cachedChunkB.getEndVersion()).to.equal(31) + expect(cachedChunkB.history.changes.length).to.equal(1) + expect(cachedChunkB).to.deep.equal(chunkB) + + // Verify the metadata was updated + const updatedMetadata = + await redisBackend.getCurrentChunkMetadata(projectId) + expect(updatedMetadata.startVersion).to.equal(30) + expect(updatedMetadata.changesCount).to.equal(1) + }) + + it('should replace a chunk with a zero-length chunk', async function () { + // Set initial chunk with changes + const snapshotA = new Snapshot() + const changesA = [ + new Change( + [new AddFileOperation('file1.tex', File.fromString('Content 1'))], + new Date(), + [] + ), + new Change( + [new AddFileOperation('file2.tex', File.fromString('Content 2'))], + new Date(), + [] + ), + ] + const historyA = new History(snapshotA, changesA) + const chunkA = new Chunk(historyA, 25) + + await redisBackend.setCurrentChunk(projectId, chunkA) + + // Verify the initial chunk was cached + const cachedChunkA = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkA.getStartVersion()).to.equal(25) + expect(cachedChunkA.getEndVersion()).to.equal(27) + expect(cachedChunkA.history.changes.length).to.equal(2) + + // Create a zero-length chunk (with no changes) + const snapshotB = new Snapshot() + const changesB = [] + const historyB = new History(snapshotB, changesB) + const chunkB = new Chunk(historyB, 40) + + // Replace the cached chunk + await redisBackend.setCurrentChunk(projectId, chunkB) + + // Verify the new chunk replaced the old one + const cachedChunkB = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkB).to.not.be.null + expect(cachedChunkB.getStartVersion()).to.equal(40) + expect(cachedChunkB.getEndVersion()).to.equal(40) // Start version equals end version with no changes + expect(cachedChunkB.history.changes.length).to.equal(0) + expect(cachedChunkB).to.deep.equal(chunkB) + + // Verify the metadata was updated + const updatedMetadata = + await redisBackend.getCurrentChunkMetadata(projectId) + expect(updatedMetadata.startVersion).to.equal(40) + expect(updatedMetadata.changesCount).to.equal(0) + }) + + it('should replace a zero-length chunk with a non-empty chunk', async function () { + // Set initial empty chunk + const snapshotA = new Snapshot() + const changesA = [] + const historyA = new History(snapshotA, changesA) + const chunkA = new Chunk(historyA, 50) + + await redisBackend.setCurrentChunk(projectId, chunkA) + + // Verify the initial chunk was cached + const cachedChunkA = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkA.getStartVersion()).to.equal(50) + expect(cachedChunkA.getEndVersion()).to.equal(50) + expect(cachedChunkA.history.changes.length).to.equal(0) + + // Create a non-empty chunk + const snapshotB = new Snapshot() + const changesB = [ + new Change( + [new AddFileOperation('newfile.tex', File.fromString('New content'))], + new Date(), + [] + ), + new Change( + [ + new AddFileOperation( + 'another.tex', + File.fromString('Another file') + ), + ], + new Date(), + [] + ), + ] + const historyB = new History(snapshotB, changesB) + const chunkB = new Chunk(historyB, 60) + + // Replace the cached chunk + await redisBackend.setCurrentChunk(projectId, chunkB) + + // Verify the new chunk replaced the old one + const cachedChunkB = await redisBackend.getCurrentChunk(projectId) + expect(cachedChunkB).to.not.be.null + expect(cachedChunkB.getStartVersion()).to.equal(60) + expect(cachedChunkB.getEndVersion()).to.equal(62) + expect(cachedChunkB.history.changes.length).to.equal(2) + expect(cachedChunkB).to.deep.equal(chunkB) + + // Verify the metadata was updated + const updatedMetadata = + await redisBackend.getCurrentChunkMetadata(projectId) + expect(updatedMetadata.startVersion).to.equal(60) + expect(updatedMetadata.changesCount).to.equal(2) + }) + }) + + describe('checkCacheValidity', function () { + it('should return true when versions match', function () { + const snapshotA = new Snapshot() + const historyA = new History(snapshotA, []) + const chunkA = new Chunk(historyA, 10) + chunkA.pushChanges([ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello'))], + new Date(), + [] + ), + ]) + + const snapshotB = new Snapshot() + const historyB = new History(snapshotB, []) + const chunkB = new Chunk(historyB, 10) + chunkB.pushChanges([ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello'))], + new Date(), + [] + ), + ]) + + const isValid = redisBackend.checkCacheValidity(chunkA, chunkB) + expect(isValid).to.be.true + }) + + it('should return false when start versions differ', function () { + const snapshotA = new Snapshot() + const historyA = new History(snapshotA, []) + const chunkA = new Chunk(historyA, 10) + + const snapshotB = new Snapshot() + const historyB = new History(snapshotB, []) + const chunkB = new Chunk(historyB, 11) + + const isValid = redisBackend.checkCacheValidity(chunkA, chunkB) + expect(isValid).to.be.false + }) + + it('should return false when end versions differ', function () { + const snapshotA = new Snapshot() + const historyA = new History(snapshotA, []) + const chunkA = new Chunk(historyA, 10) + chunkA.pushChanges([ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello'))], + new Date(), + [] + ), + ]) + + const snapshotB = new Snapshot() + const historyB = new History(snapshotB, []) + const chunkB = new Chunk(historyB, 10) + chunkB.pushChanges([ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello'))], + new Date(), + [] + ), + new Change( + [new AddFileOperation('other.tex', File.fromString('World'))], + new Date(), + [] + ), + ]) + + const isValid = redisBackend.checkCacheValidity(chunkA, chunkB) + expect(isValid).to.be.false + }) + + it('should return false when cached chunk is null', function () { + const snapshotB = new Snapshot() + const historyB = new History(snapshotB, []) + const chunkB = new Chunk(historyB, 10) + + const isValid = redisBackend.checkCacheValidity(null, chunkB) + expect(isValid).to.be.false + }) + }) + + describe('compareChunks', function () { + it('should return true when chunks are identical', function () { + // Create two identical chunks + const snapshot = new Snapshot() + const changes = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date('2025-04-10T12:00:00Z'), // Using fixed date for consistent comparison + [] + ), + ] + const history1 = new History(snapshot, changes) + const chunk1 = new Chunk(history1, 5) + + // Create a separate but identical chunk + const snapshot2 = new Snapshot() + const changes2 = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date('2025-04-10T12:00:00Z'), // Using same fixed date + [] + ), + ] + const history2 = new History(snapshot2, changes2) + const chunk2 = new Chunk(history2, 5) + + const result = redisBackend.compareChunks(projectId, chunk1, chunk2) + expect(result).to.be.true + }) + + it('should return false when chunks differ', function () { + // Create first chunk + const snapshot1 = new Snapshot() + const changes1 = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date('2025-04-10T12:00:00Z'), + [] + ), + ] + const history1 = new History(snapshot1, changes1) + const chunk1 = new Chunk(history1, 5) + + // Create a different chunk (different content) + const snapshot2 = new Snapshot() + const changes2 = [ + new Change( + [ + new AddFileOperation( + 'test.tex', + File.fromString('Different content') + ), + ], + new Date('2025-04-10T12:00:00Z'), + [] + ), + ] + const history2 = new History(snapshot2, changes2) + const chunk2 = new Chunk(history2, 5) + + const result = redisBackend.compareChunks(projectId, chunk1, chunk2) + expect(result).to.be.false + }) + + it('should return false when one chunk is null', function () { + // Create a chunk + const snapshot = new Snapshot() + const changes = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date('2025-04-10T12:00:00Z'), + [] + ), + ] + const history = new History(snapshot, changes) + const chunk = new Chunk(history, 5) + + const resultWithNullCached = redisBackend.compareChunks( + projectId, + null, + chunk + ) + expect(resultWithNullCached).to.be.false + + const resultWithNullCurrent = redisBackend.compareChunks( + projectId, + chunk, + null + ) + expect(resultWithNullCurrent).to.be.false + }) + + it('should return false when chunks have different start versions', function () { + // Create first chunk with start version 5 + const snapshot1 = new Snapshot() + const changes1 = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date('2025-04-10T12:00:00Z'), + [] + ), + ] + const history1 = new History(snapshot1, changes1) + const chunk1 = new Chunk(history1, 5) + + // Create second chunk with identical content but different start version (10) + const snapshot2 = new Snapshot() + const changes2 = [ + new Change( + [new AddFileOperation('test.tex', File.fromString('Hello World'))], + new Date('2025-04-10T12:00:00Z'), + [] + ), + ] + const history2 = new History(snapshot2, changes2) + const chunk2 = new Chunk(history2, 10) + + const result = redisBackend.compareChunks(projectId, chunk1, chunk2) + expect(result).to.be.false + }) + }) + + describe('integration with redis', function () { + it('should store and retrieve complex chunks correctly', async function () { + // Create a more complex chunk + const snapshot = new Snapshot() + const changes = [ + new Change( + [new AddFileOperation('file1.tex', File.fromString('Content 1'))], + new Date(), + [1234] + ), + new Change( + [new AddFileOperation('file2.tex', File.fromString('Content 2'))], + new Date(), + null, + new Origin('test-origin'), + ['5a296963ad5e82432674c839', null], + '123.4', + new V2DocVersions({ + 'random-doc-id': { pathname: 'file2.tex', v: 123 }, + }) + ), + new Change( + [new AddFileOperation('file3.tex', File.fromString('Content 3'))], + new Date(), + [] + ), + ] + const history = new History(snapshot, changes) + const chunk = new Chunk(history, 20) + + // Cache the chunk + await redisBackend.setCurrentChunk(projectId, chunk) + + // Retrieve the cached chunk + const cachedChunk = await redisBackend.getCurrentChunk(projectId) + + expect(cachedChunk.getStartVersion()).to.equal(20) + expect(cachedChunk.getEndVersion()).to.equal(23) + expect(cachedChunk).to.deep.equal(chunk) + expect(cachedChunk.history.changes.length).to.equal(3) + + // Check that the operations were preserved correctly + const retrievedChanges = cachedChunk.history.changes + expect(retrievedChanges[0].getOperations()[0].getPathname()).to.equal( + 'file1.tex' + ) + expect(retrievedChanges[1].getOperations()[0].getPathname()).to.equal( + 'file2.tex' + ) + expect(retrievedChanges[2].getOperations()[0].getPathname()).to.equal( + 'file3.tex' + ) + + // Check that the chunk was stored correctly using the chunk metadata + const chunkMetadata = + await redisBackend.getCurrentChunkMetadata(projectId) + expect(chunkMetadata).to.not.be.null + expect(chunkMetadata.startVersion).to.equal(20) + expect(chunkMetadata.changesCount).to.equal(3) + }) + }) +}) diff --git a/services/history-v1/test/acceptance/js/storage/support/cleanup.js b/services/history-v1/test/acceptance/js/storage/support/cleanup.js index fd0fcbbcb6..55829bef13 100644 --- a/services/history-v1/test/acceptance/js/storage/support/cleanup.js +++ b/services/history-v1/test/acceptance/js/storage/support/cleanup.js @@ -1,6 +1,6 @@ const config = require('config') -const { knex, persistor, mongodb } = require('../../../../../storage') +const { knex, persistor, mongodb, redis } = require('../../../../../storage') const { S3Persistor } = require('@overleaf/object-persistor/src/S3Persistor') const POSTGRES_TABLES = [ @@ -43,6 +43,11 @@ async function cleanupMongo() { } } +async function cleanupRedis() { + await redis.rclientHistory.flushdb() + await redis.rclientLock.flushdb() +} + async function cleanupPersistor() { await Promise.all([ clearBucket(config.get('blobStore.globalBucket')), @@ -82,6 +87,7 @@ async function cleanupEverything() { cleanupMongo(), cleanupPersistor(), cleanupBackup(), + cleanupRedis(), ]) } @@ -90,5 +96,6 @@ module.exports = { mongo: cleanupMongo, persistor: cleanupPersistor, backup: cleanupBackup, + redis: cleanupRedis, everything: cleanupEverything, } diff --git a/services/history-v1/test/setup.js b/services/history-v1/test/setup.js index b38753b280..5b4947affc 100644 --- a/services/history-v1/test/setup.js +++ b/services/history-v1/test/setup.js @@ -2,7 +2,7 @@ const chai = require('chai') const chaiAsPromised = require('chai-as-promised') const config = require('config') const fetch = require('node-fetch') -const { knex, mongodb } = require('../storage') +const { knex, mongodb, redis } = require('../storage') // ensure every ObjectId has the id string as a property for correct comparisons require('mongodb').ObjectId.cacheHexString = true @@ -53,6 +53,7 @@ async function createGcsBuckets() { // can exit. async function tearDownConnectionPool() { await knex.destroy() + await redis.disconnect() } module.exports = {