From 56f41c77ff1ad05485f0892b325e36a2e0107bdd Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Mon, 18 Aug 2025 16:28:21 +0200 Subject: [PATCH] [history-v1] add migrations record after full binary file migration (#27932) * [history-v1] add migrations record after full binary file migration * [server-pro] add hotfix 5.5.5 * [server-ce] test: build hotfix 5.5.5 and use it in tests GitOrigin-RevId: fb84e5710c59f466a3305de5f32f78e0ac9ce15d --- server-ce/hotfix/5.5.5/Dockerfile | 7 ++ server-ce/hotfix/5.5.5/pr_27932.patch | 85 +++++++++++++++++ server-ce/test/Makefile | 6 ++ server-ce/test/filestore-migration.spec.ts | 20 +++- server-ce/test/upgrading.spec.ts | 25 ++++- .../storage/scripts/back_fill_file_hash.mjs | 63 ++++++++++++- .../js/storage/back_fill_file_hash.test.mjs | 94 +++++++++++++++---- .../acceptance/js/storage/support/cleanup.js | 1 + ...519101128_binary_files_migration_check.mjs | 41 ++++++++ services/web/migrations/lib/helpers.mjs | 7 +- 10 files changed, 323 insertions(+), 26 deletions(-) create mode 100644 server-ce/hotfix/5.5.5/Dockerfile create mode 100644 server-ce/hotfix/5.5.5/pr_27932.patch create mode 100644 services/web/migrations/20250519101128_binary_files_migration_check.mjs diff --git a/server-ce/hotfix/5.5.5/Dockerfile b/server-ce/hotfix/5.5.5/Dockerfile new file mode 100644 index 0000000000..9fe1c28064 --- /dev/null +++ b/server-ce/hotfix/5.5.5/Dockerfile @@ -0,0 +1,7 @@ +FROM sharelatex/sharelatex:5.5.4 + +# ../../bin/import_pr_patch.sh 27932 +# Remove web migrations changes +# Remove test changes +COPY *.patch . +RUN bash -ec 'for p in *.patch; do echo "=== Applying $p ==="; patch -p1 < "$p" && rm $p; done' diff --git a/server-ce/hotfix/5.5.5/pr_27932.patch b/server-ce/hotfix/5.5.5/pr_27932.patch new file mode 100644 index 0000000000..f2eabfe706 --- /dev/null +++ b/server-ce/hotfix/5.5.5/pr_27932.patch @@ -0,0 +1,85 @@ +diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs +index 33962c5da7d4..8b25fb8bd603 100644 +--- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs ++++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs +@@ -78,11 +78,29 @@ ObjectId.cacheHexString = true + * @property {Blob} [blob] + */ + ++/** ++ * Start and end for range. ++ * @type {Date} ++ */ ++const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') ++const DEFAULT_BATCH_RANGE_START_DATE = PUBLIC_LAUNCH_DATE ++const DEFAULT_BATCH_RANGE_END_DATE = new Date() ++ ++function usesDefaultBatchRange() { ++ return ( ++ BATCH_RANGE_START === ++ objectIdFromInput( ++ DEFAULT_BATCH_RANGE_START_DATE.toISOString() ++ ).toString() && ++ BATCH_RANGE_END === ++ objectIdFromInput(DEFAULT_BATCH_RANGE_END_DATE.toISOString()).toString() ++ ) ++} ++ + /** + * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, PROCESS_BLOBS: boolean, DRY_RUN: boolean, OUTPUT_FILE: string, DISPLAY_REPORT: boolean, CONCURRENCY: number, CONCURRENT_BATCHES: number, RETRIES: number, RETRY_DELAY_MS: number, RETRY_FILESTORE_404: boolean, BUFFER_DIR_PREFIX: string, STREAM_HIGH_WATER_MARK: number, LOGGING_INTERVAL: number, SLEEP_BEFORE_EXIT: number }} + */ + function parseArgs() { +- const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') + const DEFAULT_OUTPUT_FILE = `/var/log/overleaf/file-migration-${new Date() + .toISOString() + .replace(/[:.]/g, '_')}.log` +@@ -1475,6 +1493,49 @@ try { + ) + code++ + } ++ console.warn('-'.repeat(79)) ++ if (code === 0) { ++ const allProcessed = ++ !DRY_RUN && ++ PROCESS_NON_DELETED_PROJECTS && ++ PROCESS_DELETED_PROJECTS && ++ PROCESS_HASHED_FILES && ++ !PROJECT_IDS_FROM && ++ usesDefaultBatchRange() ++ if (allProcessed) { ++ await db ++ .collection('migrations') ++ .updateOne( ++ { name: '20250519101128_binary_files_migration' }, ++ { $set: { migratedAt: new Date(DEFAULT_BATCH_RANGE_END_DATE) } }, ++ { upsert: true } ++ ) ++ console.warn('The binary files migration succeeded.') ++ console.warn( ++ 'You can now proceed to OVERLEAF_FILESTORE_MIGRATION_LEVEL=2.' ++ ) ++ } else { ++ console.warn( ++ 'The binary files migration succeeded on a subset of files (at least one of --dry-run, --skip-hashed-files, --from-file, --BATCH_RANGE_START or --BATCH_RANGE_END is set and --all is not set).' ++ ) ++ console.warn( ++ 'Once you are done with all the partial runs, you need to run the migration again on all projects/files to ensure that all files are migrated into the full project history system.' ++ ) ++ console.warn('The full run will unlock the upgrade to Server Pro 6.0.') ++ } ++ } else { ++ console.warn('The binary files migration failed, see above.') ++ console.warn( ++ 'Please review the failures and check the docs on remediating the failures.' ++ ) ++ console.warn( ++ 'Docs: https://docs.overleaf.com/on-premises/release-notes/release-notes-5.x.x/binary-files-migration#troubleshooting' ++ ) ++ console.warn( ++ 'In case there is not solution available, please reach out to support as detailed in the docs.' ++ ) ++ } ++ console.warn('-'.repeat(79)) + await setTimeout(SLEEP_BEFORE_EXIT) + process.exit(code) + } catch (err) { diff --git a/server-ce/test/Makefile b/server-ce/test/Makefile index 6b77986185..8a821845c0 100644 --- a/server-ce/test/Makefile +++ b/server-ce/test/Makefile @@ -82,6 +82,12 @@ prefetch_old_5_0: docker pull $(IMAGE_TAG_PRO:main=5.0.1-RC1) docker pull $(IMAGE_TAG_PRO:main=5.0) +prefetch_custom: build_hotfix_5_5_5 +build_hotfix_5_5_5: + docker pull $(IMAGE_TAG_PRO:main=5.5.4) + docker tag $(IMAGE_TAG_PRO:main=5.5.4) quay.io/sharelatex/sharelatex-pro:5.5.4 + cd ../../server-pro/hotfix/5.5.5 && docker build -t $(IMAGE_TAG_PRO:main=5.5.5) . + # Google Cloud Build runs on a very ancient Docker version that does not support the subdir flag. # Use services -> mailtrap -> build -> context = https://github.com/dbck/docker-mailtrap.git#v1.5.0:build in docker-compose.yml eventually. prefetch_default_compose_build: build_mailtrap diff --git a/server-ce/test/filestore-migration.spec.ts b/server-ce/test/filestore-migration.spec.ts index 65170230b6..68dbb9b72a 100644 --- a/server-ce/test/filestore-migration.spec.ts +++ b/server-ce/test/filestore-migration.spec.ts @@ -232,11 +232,13 @@ describe('filestore migration', function () { // ------------------------------------------ // Server Pro 5.x + mongo upgrade 6 -> 7 -> 8 startWith({ + version: '5.5.5', pro: true, withDataDir: true, mongoVersion: '6.0', }) startWith({ + version: '5.5.5', pro: true, withDataDir: true, mongoVersion: '7.0', @@ -245,6 +247,7 @@ describe('filestore migration', function () { await setMongoFeatureCompatibilityVersion('7.0') }) startWith({ + version: '5.5.5', pro: true, withDataDir: true, // implicit mongo upgrade to 8.0 @@ -254,7 +257,7 @@ describe('filestore migration', function () { }) } else { // 5.x - startWith({ pro: true, withDataDir: true }) + startWith({ version: '5.5.5', pro: true, withDataDir: true }) defaultImage = 'frog.jpg' ensureUserExists({ email }) before(function () { @@ -337,12 +340,13 @@ describe('filestore migration', function () { } describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL not set', function () { - startWith({ pro: true, withDataDir: true, vars: {} }) + startWith({ version: '5.5.5', pro: true, withDataDir: true, vars: {} }) checkFilesAreAccessible() }) describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL=0', function () { startWith({ + version: '5.5.5', pro: true, withDataDir: true, vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '0' }, @@ -351,6 +355,7 @@ describe('filestore migration', function () { describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL=1', function () { startWith({ + version: '5.5.5', pro: true, withDataDir: true, vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '1' }, @@ -359,6 +364,7 @@ describe('filestore migration', function () { describe('OVERLEAF_FILESTORE_MIGRATION_LEVEL=2', function () { startWith({ + version: '5.5.5', pro: true, withDataDir: true, vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '1' }, @@ -371,6 +377,7 @@ describe('filestore migration', function () { }) }) startWith({ + version: '5.5.5', pro: true, withDataDir: true, vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '2' }, @@ -382,6 +389,15 @@ describe('filestore migration', function () { await purgeFilestoreData() }) checkFilesAreAccessible() + + describe('latest', function () { + startWith({ + pro: true, + withDataDir: true, + vars: { OVERLEAF_FILESTORE_MIGRATION_LEVEL: '2' }, + }) + checkFilesAreAccessible() + }) }) }) }) diff --git a/server-ce/test/upgrading.spec.ts b/server-ce/test/upgrading.spec.ts index 00390cd80e..5edc260d5f 100644 --- a/server-ce/test/upgrading.spec.ts +++ b/server-ce/test/upgrading.spec.ts @@ -155,11 +155,31 @@ describe('Upgrading', function () { }, newProjectButtonMatcher: /create first project/i, } + const optionsBinaryFilesMigration = { + version: '5.5.5', + hook() { + before(async function () { + await runScript({ + cwd: 'services/history-v1', + script: 'storage/scripts/back_fill_file_hash.mjs', + args: ['--all'], + }) + }) + }, + } describe('from 4.2 to latest', () => { - testUpgrade([optionsFourDotTwo, { version: 'latest' }]) + testUpgrade([ + optionsFourDotTwo, + optionsBinaryFilesMigration, + { version: 'latest' }, + ]) }) describe('from 5.0 to latest', () => { - testUpgrade([{ version: '5.0' }, { version: 'latest' }]) + testUpgrade([ + { version: '5.0' }, + optionsBinaryFilesMigration, + { version: 'latest' }, + ]) }) describe('doc version recovery', () => { testUpgrade([ @@ -190,6 +210,7 @@ describe('Upgrading', function () { }) }, }, + optionsBinaryFilesMigration, { version: 'latest', hook() { diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs index 33962c5da7..8b25fb8bd6 100644 --- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs +++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs @@ -78,11 +78,29 @@ ObjectId.cacheHexString = true * @property {Blob} [blob] */ +/** + * Start and end for range. + * @type {Date} + */ +const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') +const DEFAULT_BATCH_RANGE_START_DATE = PUBLIC_LAUNCH_DATE +const DEFAULT_BATCH_RANGE_END_DATE = new Date() + +function usesDefaultBatchRange() { + return ( + BATCH_RANGE_START === + objectIdFromInput( + DEFAULT_BATCH_RANGE_START_DATE.toISOString() + ).toString() && + BATCH_RANGE_END === + objectIdFromInput(DEFAULT_BATCH_RANGE_END_DATE.toISOString()).toString() + ) +} + /** * @return {{PROJECT_IDS_FROM: string, PROCESS_HASHED_FILES: boolean, LOGGING_IDENTIFIER: string, BATCH_RANGE_START: string, BATCH_RANGE_END: string, PROCESS_NON_DELETED_PROJECTS: boolean, PROCESS_DELETED_PROJECTS: boolean, PROCESS_BLOBS: boolean, DRY_RUN: boolean, OUTPUT_FILE: string, DISPLAY_REPORT: boolean, CONCURRENCY: number, CONCURRENT_BATCHES: number, RETRIES: number, RETRY_DELAY_MS: number, RETRY_FILESTORE_404: boolean, BUFFER_DIR_PREFIX: string, STREAM_HIGH_WATER_MARK: number, LOGGING_INTERVAL: number, SLEEP_BEFORE_EXIT: number }} */ function parseArgs() { - const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') const DEFAULT_OUTPUT_FILE = `/var/log/overleaf/file-migration-${new Date() .toISOString() .replace(/[:.]/g, '_')}.log` @@ -1475,6 +1493,49 @@ try { ) code++ } + console.warn('-'.repeat(79)) + if (code === 0) { + const allProcessed = + !DRY_RUN && + PROCESS_NON_DELETED_PROJECTS && + PROCESS_DELETED_PROJECTS && + PROCESS_HASHED_FILES && + !PROJECT_IDS_FROM && + usesDefaultBatchRange() + if (allProcessed) { + await db + .collection('migrations') + .updateOne( + { name: '20250519101128_binary_files_migration' }, + { $set: { migratedAt: new Date(DEFAULT_BATCH_RANGE_END_DATE) } }, + { upsert: true } + ) + console.warn('The binary files migration succeeded.') + console.warn( + 'You can now proceed to OVERLEAF_FILESTORE_MIGRATION_LEVEL=2.' + ) + } else { + console.warn( + 'The binary files migration succeeded on a subset of files (at least one of --dry-run, --skip-hashed-files, --from-file, --BATCH_RANGE_START or --BATCH_RANGE_END is set and --all is not set).' + ) + console.warn( + 'Once you are done with all the partial runs, you need to run the migration again on all projects/files to ensure that all files are migrated into the full project history system.' + ) + console.warn('The full run will unlock the upgrade to Server Pro 6.0.') + } + } else { + console.warn('The binary files migration failed, see above.') + console.warn( + 'Please review the failures and check the docs on remediating the failures.' + ) + console.warn( + 'Docs: https://docs.overleaf.com/on-premises/release-notes/release-notes-5.x.x/binary-files-migration#troubleshooting' + ) + console.warn( + 'In case there is not solution available, please reach out to support as detailed in the docs.' + ) + } + console.warn('-'.repeat(79)) await setTimeout(SLEEP_BEFORE_EXIT) process.exit(code) } catch (err) { diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs index bfcf93228d..9f9168dddb 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs @@ -519,7 +519,7 @@ describe('back_fill_file_hash script', function () { /** * @param {Array} args * @param {boolean} shouldHaveWritten - * @return {Promise<{result, stats: any}>} + * @return {Promise<{result, stats: any, migrationCreated: boolean}>} */ async function tryRunScript(args = [], shouldHaveWritten) { const result = await rawRunScript([ @@ -553,7 +553,10 @@ describe('back_fill_file_hash script', function () { 'should not have any remaining deferred batches' ) delete stats.deferredBatches - return { stats, result } + const migrationCreated = !!(await db + .collection('migrations') + .findOne({ name: '20250519101128_binary_files_migration' })) + return { stats, result, migrationCreated } } /** @@ -562,12 +565,15 @@ describe('back_fill_file_hash script', function () { * @return {Promise<{result, stats: any}>} */ async function runScript(args = [], shouldHaveWritten = true) { - const { stats, result } = await tryRunScript(args, shouldHaveWritten) + const { stats, result, migrationCreated } = await tryRunScript( + args, + shouldHaveWritten + ) if (result.status !== 0) { console.log(result) expect(result).to.have.property('status', 0) } - return { stats, result } + return { stats, result, migrationCreated } } /** @@ -813,21 +819,7 @@ describe('back_fill_file_hash script', function () { !processHashedFiles ? ['--skip-hashed-files'] : [], false ) - let stats = { - ...STATS_ALL_ZERO, - // We still need to iterate over all the projects and blobs. - projects: 10, - blobs: 10, - - badFileTrees: 4, - } - if (processHashedFiles) { - stats = sumStats(stats, { - ...STATS_ALL_ZERO, - blobs: 2, - }) - } - expect(rerun.stats).deep.equal(stats) + expect(rerun.stats).deep.equal(statsForRerun(processHashedFiles)) }) } @@ -929,6 +921,24 @@ describe('back_fill_file_hash script', function () { STATS_UP_FROM_PROJECT1_ONWARD ) + function statsForRerun(processHashedFiles = true) { + let stats = { + ...STATS_ALL_ZERO, + // We still need to iterate over all the projects and blobs. + projects: 10, + blobs: 10, + + badFileTrees: 4, + } + if (processHashedFiles) { + stats = sumStats(stats, { + ...STATS_ALL_ZERO, + blobs: 2, + }) + } + return stats + } + describe('error cases', function () { beforeEach('prepare environment', prepareEnvironment) @@ -957,6 +967,9 @@ describe('back_fill_file_hash script', function () { 'failed to process file, trying again' ) expect(t1 - t0).to.be.below(10_000) + expect(result.stderr).to.include( + 'The binary files migration failed, see above.' + ) }) it('should retry on error', async function () { @@ -1021,6 +1034,14 @@ describe('back_fill_file_hash script', function () { it('should print stats', function () { expect(output.stats).deep.equal(STATS_ALL) }) + it('should print a warning message', () => { + expect(output.result.stderr).to.include( + 'The binary files migration succeeded on a subset of files' + ) + }) + it('should not create the migration', () => { + expect(output.migrationCreated).to.equal(false) + }) it('should have logged the bad file-tree', function () { expectBadFileTreeMessage( projectIdBadFileTree0, @@ -1081,6 +1102,14 @@ describe('back_fill_file_hash script', function () { badFileTrees: 4, }) }) + it('should print a success message', () => { + expect(output2.result.stderr).to.include( + 'The binary files migration succeeded.' + ) + }) + it('should create the migration', () => { + expect(output2.migrationCreated).to.equal(true) + }) commonAssertions(true) }) describe('report mode', function () { @@ -1260,7 +1289,30 @@ Sampled stats for deleted projects: it('should print stats for part 1', function () { expect(outputPart1.stats).to.deep.equal(STATS_UP_FROM_PROJECT1_ONWARD) }) + it('should warn about split run', () => { + expect(outputPart0.result.stderr).to.include( + 'The binary files migration succeeded on a subset of files' + ) + expect(outputPart1.result.stderr).to.include( + 'The binary files migration succeeded on a subset of files' + ) + }) commonAssertions() + + describe('with a full run afterwards', () => { + let output + before('run script', async function () { + output = await runScript([]) + }) + it('should print stats', function () { + expect(output.stats).to.deep.equal( + sumStats(statsForRerun(false), STATS_FILES_HASHED_EXTRA) + ) + }) + it('should create the migration', () => { + expect(output.migrationCreated).to.equal(true) + }) + }) }) describe('projectIds from file', function () { @@ -1337,6 +1389,10 @@ Sampled stats for deleted projects: expect(outputPart0.stats).to.deep.equal(STATS_UP_TO_PROJECT1) expect(outputPart1.stats).to.deep.equal(STATS_UP_FROM_PROJECT1_ONWARD) }) + it('should not create the migration ', () => { + expect(outputPart0.migrationCreated).to.equal(false) + expect(outputPart1.migrationCreated).to.equal(false) + }) commonAssertions() }) }) diff --git a/services/history-v1/test/acceptance/js/storage/support/cleanup.js b/services/history-v1/test/acceptance/js/storage/support/cleanup.js index 4df985d613..7612dc71c4 100644 --- a/services/history-v1/test/acceptance/js/storage/support/cleanup.js +++ b/services/history-v1/test/acceptance/js/storage/support/cleanup.js @@ -20,6 +20,7 @@ const MONGO_COLLECTIONS = [ 'deletedProjects', 'projects', 'projectHistoryBackedUpBlobs', + 'migrations', ] // make sure we don't delete the wrong data by accident diff --git a/services/web/migrations/20250519101128_binary_files_migration_check.mjs b/services/web/migrations/20250519101128_binary_files_migration_check.mjs new file mode 100644 index 0000000000..9720448b16 --- /dev/null +++ b/services/web/migrations/20250519101128_binary_files_migration_check.mjs @@ -0,0 +1,41 @@ +import { setTimeout } from 'node:timers/promises' +import { db } from '../app/src/infrastructure/mongodb.js' +import Helpers from './lib/helpers.mjs' + +const tags = ['server-ce', 'server-pro'] + +const migrate = async () => { + const nActiveProjects = await db.projects.estimatedDocumentCount() + const nDeletedProjects = await db.deletedProjects.estimatedDocumentCount() + if (nActiveProjects === 0 && nDeletedProjects === 0) { + // Empty database. Skip binary files migration check. + return + } + try { + await Helpers.assertDependency('20250519101128_binary_files_migration') + } catch (err) { + if (err instanceof Helpers.BadMigrationOrder) { + console.warn('-'.repeat(79)) + console.warn( + 'Please follow the binary files migration before upgrading to Server Pro/CE 6.0.' + ) + console.warn() + console.warn( + ' Docs: https://docs.overleaf.com/on-premises/release-notes/release-notes-5.x.x/binary-files-migration' + ) + console.warn() + console.warn('-'.repeat(79)) + await setTimeout(5_000) + process.exit(1) + } + throw err + } +} + +const rollback = async () => {} + +export default { + tags, + migrate, + rollback, +} diff --git a/services/web/migrations/lib/helpers.mjs b/services/web/migrations/lib/helpers.mjs index bcca1e2d26..f342ee333e 100644 --- a/services/web/migrations/lib/helpers.mjs +++ b/services/web/migrations/lib/helpers.mjs @@ -61,6 +61,8 @@ async function dropCollection(collectionName) { await collection.drop() } +class BadMigrationOrder extends Error {} + /** * Asserts that a dependent migration has run. Throws an error otherwise. * @@ -70,13 +72,14 @@ async function assertDependency(migrationName) { const migrations = await getCollectionInternal('migrations') const migration = await migrations.findOne({ name: migrationName }) if (migration == null) { - throw new Error( - `Bad migration order: ${migrationName} should run before this migration` + throw new BadMigrationOrder( + `${migrationName} should run before this migration` ) } } export default { + BadMigrationOrder, addIndexesToCollection, dropIndexesFromCollection, dropCollection,