From 4d265f862b30c80c1682ffd3bd8e01fafda858a1 Mon Sep 17 00:00:00 2001 From: Antoine Clausse Date: Mon, 7 Apr 2025 11:08:29 +0200 Subject: [PATCH] [web] Add another partial fix for `fix_malformed_filetree`: use `_id` instead of `path` to locate data (#24101) * Fix `fix_malformed_filetree`'s `fixName` * Fix findUniqueName with missing names in siblings * Add test showcasing another bug: shifted arrays in filetree folder * Update `removeNulls` to use `_id` * Update services/web/app/src/Features/Project/ProjectLocator.js Co-authored-by: Jakob Ackermann * Add FIXME about file names uniqueness * Rename `obj` to `project` --------- Co-authored-by: Jakob Ackermann GitOrigin-RevId: 3ed795ae0621800603395f7b50626ac89c39199d --- .../src/Features/Project/ProjectLocator.js | 23 +++ .../web/scripts/fix_malformed_filetree.mjs | 51 ++++-- .../src/MalformedFiletreesTests.mjs | 148 ++++++++++++++++-- 3 files changed, 193 insertions(+), 29 deletions(-) diff --git a/services/web/app/src/Features/Project/ProjectLocator.js b/services/web/app/src/Features/Project/ProjectLocator.js index c78dac1dbf..2feaa0cebf 100644 --- a/services/web/app/src/Features/Project/ProjectLocator.js +++ b/services/web/app/src/Features/Project/ProjectLocator.js @@ -7,6 +7,28 @@ const Errors = require('../Errors/Errors') const { promisifyMultiResult } = require('@overleaf/promise-utils') const { iterablePaths } = require('./IterablePath') +/** + * @param project + * @param predicate + * @returns {{path: string, value: *}} + */ +function findDeep(project, predicate) { + function find(value, path) { + if (predicate(value)) { + return { value, path: path.join('.') } + } + if (typeof value === 'object' && value !== null) { + for (const [childKey, childVal] of Object.entries(value)) { + const found = find(childVal, [...path, childKey]) + if (found) { + return found + } + } + } + } + return find(project.rootFolder, ['rootFolder']) +} + function findElement(options, _callback) { // The search algorithm below potentially invokes the callback multiple // times. @@ -308,6 +330,7 @@ module.exports = { findElementByPath, findRootDoc, findElementByMongoPath, + findDeep, promises: { findElement: promisifyMultiResult(findElement, [ 'element', diff --git a/services/web/scripts/fix_malformed_filetree.mjs b/services/web/scripts/fix_malformed_filetree.mjs index 491da4762d..2c2971ecfd 100644 --- a/services/web/scripts/fix_malformed_filetree.mjs +++ b/services/web/scripts/fix_malformed_filetree.mjs @@ -8,7 +8,9 @@ */ import mongodb from 'mongodb-legacy' import { db } from '../app/src/infrastructure/mongodb.js' -import ProjectLocator from '../app/src/Features/Project/ProjectLocator.js' +import ProjectLocator, { + findDeep, +} from '../app/src/Features/Project/ProjectLocator.js' import minimist from 'minimist' import readline from 'node:readline' import fs from 'node:fs' @@ -72,7 +74,7 @@ async function processBadPath(projectId, mongoPath, _id) { if (isRootFolder(mongoPath)) { modifiedCount = await fixRootFolder(projectId) } else if (isArrayElement(mongoPath)) { - modifiedCount = await removeNulls(projectId, parentPath(mongoPath)) + modifiedCount = await removeNulls(projectId, _id) } else if (isArray(mongoPath)) { modifiedCount = await fixArray(projectId, mongoPath) } else if (isFolderId(mongoPath)) { @@ -83,7 +85,7 @@ async function processBadPath(projectId, mongoPath, _id) { parentPath(parentPath(mongoPath)) ) } else if (isName(mongoPath)) { - modifiedCount = await fixName(projectId, mongoPath) + modifiedCount = await fixName(projectId, _id) } else if (isHash(mongoPath)) { console.error(`Missing file hash: ${projectId}/${_id} (${mongoPath})`) console.error('SaaS: likely needs filestore restore') @@ -164,10 +166,26 @@ async function fixRootFolder(projectId) { /** * Remove all nulls from the given docs/files/folders array */ -async function removeNulls(projectId, path) { +async function removeNulls(projectId, _id) { + if (!_id) { + throw new Error('missing _id') + } + const project = await db.projects.findOne( + { _id: new ObjectId(projectId) }, + { projection: { rootFolder: 1 } } + ) + const foundResult = findDeep(project, obj => obj?._id?.toString() === _id) + if (!foundResult) return + const { path } = foundResult const result = await db.projects.updateOne( - { _id: new ObjectId(projectId), [path]: { $type: 'array' } }, - { $pull: { [path]: null } } + { _id: new ObjectId(projectId) }, + { + $pull: { + [`${path}.folders`]: null, + [`${path}.docs`]: null, + [`${path}.fileRefs`]: null, + }, + } ) return result.modifiedCount } @@ -208,19 +226,26 @@ async function removeElementsWithoutIds(projectId, path) { /** * Give a name to a file/doc/folder that doesn't have one */ -async function fixName(projectId, path) { +async function fixName(projectId, _id) { + if (!_id) { + throw new Error('missing _id') + } const project = await db.projects.findOne( { _id: new ObjectId(projectId) }, { projection: { rootFolder: 1 } } ) - const arrayPath = parentPath(parentPath(path)) - const array = ProjectLocator.findElementByMongoPath(project, arrayPath) - const existingNames = new Set(array.map(x => x.name)) + const foundResult = findDeep(project, obj => obj?._id?.toString() === _id) + if (!foundResult) return + const { path } = foundResult + const array = ProjectLocator.findElementByMongoPath(project, parentPath(path)) const name = - path === 'rootFolder.0.name' ? 'rootFolder' : findUniqueName(existingNames) + path === 'rootFolder.0' + ? 'rootFolder' + : findUniqueName(new Set(array.map(x => x?.name))) + const pathToName = `${path}.name` const result = await db.projects.updateOne( - { _id: new ObjectId(projectId), [path]: { $in: [null, ''] } }, - { $set: { [path]: name } } + { _id: new ObjectId(projectId), [pathToName]: { $in: [null, ''] } }, + { $set: { [pathToName]: name } } ) return result.modifiedCount } diff --git a/services/web/test/acceptance/src/MalformedFiletreesTests.mjs b/services/web/test/acceptance/src/MalformedFiletreesTests.mjs index d1dd8ea937..744781f8c8 100644 --- a/services/web/test/acceptance/src/MalformedFiletreesTests.mjs +++ b/services/web/test/acceptance/src/MalformedFiletreesTests.mjs @@ -222,7 +222,7 @@ const testCases = [ msg: 'bad file-tree path', })), expectFixStdout: - '"gracefulShutdownInitiated":false,"processedLines":4,"success":3,"alreadyProcessed":1,"hash":0,"failed":0,"unmatched":0', + '"gracefulShutdownInitiated":false,"processedLines":4,"success":1,"alreadyProcessed":3,"hash":0,"failed":0,"unmatched":0', expectProject: updatedProject => { expect(updatedProject).to.deep.equal({ _id: projectId, @@ -495,7 +495,109 @@ const testCases = [ msg: 'bad file-tree path', })), expectFixStdout: - '"gracefulShutdownInitiated":false,"processedLines":9,"success":6,"alreadyProcessed":3,"hash":0,"failed":0,"unmatched":0', + '"gracefulShutdownInitiated":false,"processedLines":9,"success":4,"alreadyProcessed":5,"hash":0,"failed":0,"unmatched":0', + expectProject: updatedProject => { + expect(updatedProject).to.deep.equal({ + _id: projectId, + rootFolder: [ + { + _id: rootFolderId, + name: 'rootFolder', + folders: [{ ...wellFormedFolder('f02'), name: 'untitled' }], + docs: [{ ...wellFormedDoc('d02'), name: 'untitled' }], + fileRefs: [{ ...wellFormedFileRef('fr02'), name: 'untitled' }], + }, + ], + }) + }, + }, + { + name: 'bug: shifted arrays in filetree folder', + project: { + _id: projectId, + rootFolder: [ + { + _id: rootFolderId, + name: 'rootFolder', + folders: [ + null, + null, + { + ...wellFormedFolder('f02'), + name: 'folder 1', + folders: [null, null, { ...wellFormedFolder('f022') }], + docs: [null, null, { ...wellFormedDoc('d022'), name: null }], + fileRefs: [ + null, + null, + { ...wellFormedFileRef('fr022'), name: null }, + ], + }, + ], + + docs: [], + fileRefs: [], + }, + ], + }, + expectFind: [ + { + _id: rootFolderId.toString(), + path: 'rootFolder.0.folders.0', + reason: 'bad folder', + }, + { + _id: rootFolderId.toString(), + path: 'rootFolder.0.folders.1', + reason: 'bad folder', + }, + { + _id: strId('f02'), + path: 'rootFolder.0.folders.2.folders.0', + reason: 'bad folder', + }, + { + _id: strId('f02'), + path: 'rootFolder.0.folders.2.folders.1', + reason: 'bad folder', + }, + { + _id: strId('f02'), + path: 'rootFolder.0.folders.2.docs.0', + reason: 'bad doc', + }, + { + _id: strId('f02'), + path: 'rootFolder.0.folders.2.docs.1', + reason: 'bad doc', + }, + { + _id: strId('d022'), + path: 'rootFolder.0.folders.2.docs.2.name', + reason: 'bad doc name', + }, + { + _id: strId('f02'), + path: 'rootFolder.0.folders.2.fileRefs.0', + reason: 'bad file', + }, + { + _id: strId('f02'), + path: 'rootFolder.0.folders.2.fileRefs.1', + reason: 'bad file', + }, + { + _id: strId('fr022'), + path: 'rootFolder.0.folders.2.fileRefs.2.name', + reason: 'bad file name', + }, + ].map(entry => ({ + ...entry, + projectId: projectId.toString(), + msg: 'bad file-tree path', + })), + expectFixStdout: + '"gracefulShutdownInitiated":false,"processedLines":10,"success":4,"alreadyProcessed":6,"hash":0,"failed":0,"unmatched":0', expectProject: updatedProject => { expect(updatedProject).to.deep.equal({ _id: projectId, @@ -503,22 +605,36 @@ const testCases = [ { _id: rootFolderId, name: 'rootFolder', - // FIXME: The 3 arrays should only contain 1 item: the well-formed item with the name 'untitled'. folders: [ - { ...wellFormedFolder('f02'), name: null }, - null, - { name: 'untitled' }, - ], - docs: [ - { ...wellFormedDoc('d02'), name: null }, - null, - { name: 'untitled' }, - ], - fileRefs: [ - { ...wellFormedFileRef('fr02'), name: null }, - null, - { name: 'untitled' }, + { + ...wellFormedFolder('f02'), + name: 'folder 1', + docs: [ + { + ...wellFormedDoc('d022'), + name: 'untitled', + }, + ], + fileRefs: [ + { + ...wellFormedFileRef('fr022'), + // FIXME: Make the names unique across different file types + name: 'untitled', + }, + ], + folders: [ + { + ...wellFormedFolder('f022'), + name: 'f022', + folders: [], + docs: [], + fileRefs: [], + }, + ], + }, ], + docs: [], + fileRefs: [], }, ], })