diff --git a/services/web/scripts/history/HistoryUpgradeHelper.js b/services/web/scripts/history/HistoryUpgradeHelper.js index 85cb1ba7d4..e3249ab8fa 100644 --- a/services/web/scripts/history/HistoryUpgradeHelper.js +++ b/services/web/scripts/history/HistoryUpgradeHelper.js @@ -1,9 +1,12 @@ const { ReadPreference, ObjectId } = require('mongodb') const { db } = require('../../app/src/infrastructure/mongodb') +const Settings = require('@overleaf/settings') const ProjectHistoryHandler = require('../../app/src/Features/Project/ProjectHistoryHandler') const HistoryManager = require('../../app/src/Features/History/HistoryManager') const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController') +const ProjectEntityHandler = require('../../app/src/Features/Project/ProjectEntityHandler') +const ProjectEntityUpdateHandler = require('../../app/src/Features/Project/ProjectEntityUpdateHandler') // Timestamp of when 'Enable history for SL in background' release const ID_WHEN_FULL_PROJECT_HISTORY_ENABLED = '5a8d8a370000000000000000' @@ -292,8 +295,45 @@ async function anyDocHistoryIndexExists(project) { ) } +async function convertLargeDocsToFile(projectId, userId) { + const docs = await ProjectEntityHandler.promises.getAllDocs(projectId) + let convertedDocCount = 0 + for (const doc of Object.values(docs)) { + const sizeBound = JSON.stringify(doc.lines) + if (docIsTooLarge(sizeBound, doc.lines, Settings.max_doc_length)) { + await ProjectEntityUpdateHandler.promises.convertDocToFile( + projectId, + doc._id, + userId + ) + convertedDocCount++ + } + } + return convertedDocCount +} + +// check whether the total size of the document in characters exceeds the +// maxDocLength. +// +// Copied from document-updater: +// https://github.com/overleaf/internal/blob/74adfbebda5f3c2c37d9937f0db5c4106ecde492/services/document-updater/app/js/Limits.js#L18 +function docIsTooLarge(estimatedSize, lines, maxDocLength) { + if (estimatedSize <= maxDocLength) { + return false // definitely under the limit, no need to calculate the total size + } + // calculate the total size, bailing out early if the size limit is reached + let size = 0 + for (const line of lines) { + size += line.length + 1 // include the newline + if (size > maxDocLength) return true + } + // since we didn't hit the limit in the loop, the document is within the allowed length + return false +} + module.exports = { determineProjectHistoryType, getUpgradeFunctionForType, upgradeProject, + convertLargeDocsToFile, } diff --git a/services/web/scripts/history/upgrade_none_with_conversion_if_sl_history.js b/services/web/scripts/history/upgrade_none_with_conversion_if_sl_history.js index 206bd5eb95..d62a114bb0 100644 --- a/services/web/scripts/history/upgrade_none_with_conversion_if_sl_history.js +++ b/services/web/scripts/history/upgrade_none_with_conversion_if_sl_history.js @@ -1,4 +1,4 @@ -const SCRIPT_VERSION = 3 +const SCRIPT_VERSION = 4 const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true' const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10 const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 100 @@ -16,11 +16,16 @@ process.env.MONGO_SOCKET_TIMEOUT = const PROJECT_ID = process.env.PROJECT_ID +// User id is required to move large documents to filestore +const USER_ID = process.env.PROJECT_ID +const CONVERT_LARGE_DOCS_TO_FILE = process.env.DRY_RUN === 'true' + const { ObjectId, ReadPreference } = require('mongodb') const { db, waitForDb } = require('../../app/src/infrastructure/mongodb') const { promiseMapWithLimit } = require('../../app/src/util/promises') const { batchedUpdate } = require('../helpers/batchedUpdate') const ProjectHistoryController = require('../../modules/admin-panel/app/src/ProjectHistoryController') +const HistoryUpgradeHelper = require('./HistoryUpgradeHelper') console.log({ DRY_RUN, @@ -110,6 +115,13 @@ async function doUpgradeForNoneWithConversion(project) { const projectIdString = project._id.toString() if (!DRY_RUN) { try { + if (CONVERT_LARGE_DOCS_TO_FILE) { + const convertedDocCount = + await HistoryUpgradeHelper.convertLargeDocsToFile(projectId, USER_ID) + console.log( + `converted ${convertedDocCount} large docs to binary files for project ${projectId}` + ) + } await ProjectHistoryController.migrateProjectHistory(projectIdString) } catch (err) { // if migrateProjectHistory fails, it cleans up by deleting