From ec0ff484f1f3964393c641b2dbebb3ab2516d93c Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Thu, 11 Nov 2021 15:10:46 +0000 Subject: [PATCH] Fix up duplicate filenames during resync (#5742) * Fix up duplicate filenames during resync * Unit test for project history resync with duplicate paths * Decaf cleanup: unnecessary returns * Decaf cleanup: remove initClass() * Decaf cleanup: unused variables * Decaf cleanup: camel case variables * Support duplicate folder names when traversing project folders We have some broken projects that contain duplicate folder names. In order to fix them, we need to be able to find them in the project. The use of an object to store folders by path meant that only one of the duplicate folders would be found. * Keep docupdater up to date when renaming during a resync Co-authored-by: Eric Mc Sween GitOrigin-RevId: bf3dcdc4c1d4f5418ff9f5e68b255438620349ca --- .../Features/Project/ProjectEntityHandler.js | 28 +--- .../Project/ProjectEntityUpdateHandler.js | 128 +++++++++++++++--- .../src/Project/ProjectEntityHandlerTests.js | 100 +++++--------- .../ProjectEntityUpdateHandlerTests.js | 107 ++++++++++++++- 4 files changed, 252 insertions(+), 111 deletions(-) diff --git a/services/web/app/src/Features/Project/ProjectEntityHandler.js b/services/web/app/src/Features/Project/ProjectEntityHandler.js index fb219382ee..0b8a2e3b1a 100644 --- a/services/web/app/src/Features/Project/ProjectEntityHandler.js +++ b/services/web/app/src/Features/Project/ProjectEntityHandler.js @@ -20,15 +20,11 @@ const ProjectEntityHandler = { } ProjectEntityHandler._getAllFolders(projectId, (error, folders) => { - if (folders == null) { - folders = {} - } if (error != null) { return callback(error) } const docs = {} - for (const folderPath in folders) { - const folder = folders[folderPath] + for (const { path: folderPath, folder } of folders) { for (const doc of folder.docs || []) { const content = docContents[doc._id.toString()] if (content != null) { @@ -49,15 +45,11 @@ const ProjectEntityHandler = { getAllFiles(projectId, callback) { ProjectEntityHandler._getAllFolders(projectId, (err, folders) => { - if (folders == null) { - folders = {} - } if (err != null) { return callback(err) } const files = {} - for (const folderPath in folders) { - const folder = folders[folderPath] + for (const { path: folderPath, folder } of folders) { for (const file of folder.fileRefs || []) { if (file != null) { files[path.join(folderPath, file.name)] = file @@ -83,16 +75,12 @@ const ProjectEntityHandler = { getAllEntitiesFromProject(project, callback) { ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => { - if (folders == null) { - folders = {} - } if (err != null) { return callback(err) } const docs = [] const files = [] - for (const folderPath in folders) { - const folder = folders[folderPath] + for (const { path: folderPath, folder } of folders) { for (const doc of folder.docs || []) { if (doc != null) { docs.push({ path: path.join(folderPath, doc.name), doc }) @@ -122,15 +110,11 @@ const ProjectEntityHandler = { getAllDocPathsFromProject(project, callback) { ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => { - if (folders == null) { - folders = {} - } if (err != null) { return callback(err) } const docPath = {} - for (const folderPath in folders) { - const folder = folders[folderPath] + for (const { path: folderPath, folder } of folders) { for (const doc of folder.docs || []) { docPath[doc._id] = path.join(folderPath, doc.name) } @@ -226,9 +210,9 @@ const ProjectEntityHandler = { }, _getAllFoldersFromProject(project, callback) { - const folders = {} + const folders = [] function processFolder(basePath, folder) { - folders[basePath] = folder + folders.push({ path: basePath, folder }) for (const childFolder of folder.folders || []) { if (childFolder.name != null) { processFolder(path.join(basePath, childFolder.name), childFolder) diff --git a/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js b/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js index 369f2ca492..50e97b6a2d 100644 --- a/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js +++ b/services/web/app/src/Features/Project/ProjectEntityUpdateHandler.js @@ -1355,25 +1355,34 @@ const ProjectEntityUpdateHandler = { if (error != null) { return callback(error) } - - docs = _.map(docs, doc => ({ - doc: doc.doc._id, - path: doc.path, - })) - - files = _.map(files, file => ({ - file: file.file._id, - path: file.path, - url: FileStoreHandler._buildUrl(projectId, file.file._id), - _hash: file.file.hash, - })) - - DocumentUpdaterHandler.resyncProjectHistory( + ProjectEntityUpdateHandler._checkFiletree( projectId, projectHistoryId, - docs, - files, - callback + [...docs, ...files], + error => { + if (error) { + return callback(error) + } + docs = _.map(docs, doc => ({ + doc: doc.doc._id, + path: doc.path, + })) + + files = _.map(files, file => ({ + file: file.file._id, + path: file.path, + url: FileStoreHandler._buildUrl(projectId, file.file._id), + _hash: file.file.hash, + })) + + DocumentUpdaterHandler.resyncProjectHistory( + projectId, + projectHistoryId, + docs, + files, + callback + ) + } ) } ) @@ -1381,6 +1390,91 @@ const ProjectEntityUpdateHandler = { ) ), + _checkFiletree(projectId, projectHistoryId, entities, callback) { + const renames = [] + const paths = new Map() + for (const entity of entities) { + // if the path is invalid (i.e. not going to be accepted in + // project-history due to filename rules) we could also flag it for a + // rename here. + if (paths.has(entity.path)) { + renames.push(entity) + } else { + paths.set(entity.path, entity) + } + } + if (renames.length === 0) { + return callback() + } + logger.warn({ projectId, renames }, 'found conflict in filetree') + // find new names for duplicate files + for (const entity of renames) { + const { + newPath, + newName, + } = ProjectEntityUpdateHandler.findNextAvailableName(paths, entity) + logger.debug({ projectId, newName, newPath }, 'found new name') + entity.newPath = newPath + entity.newName = newName + } + // rename the duplicate files + const doRename = (entity, cb) => { + const entityId = entity.doc ? entity.doc._id : entity.file._id + const entityType = entity.doc ? 'doc' : 'file' + ProjectEntityMongoUpdateHandler.renameEntity( + projectId, + entityId, + entityType, + entity.newName, + (err, project, startPath, endPath, rev, changes) => { + if (err) { + return cb(err) + } + // update the renamed entity for the resync + entity.path = entity.newPath + if (entityType === 'doc') { + entity.doc.name = entity.newName + } else { + entity.file.name = entity.newName + } + delete entity.newPath + delete entity.newname + DocumentUpdaterHandler.updateProjectStructure( + projectId, + projectHistoryId, + null, + changes, + cb + ) + } + ) + } + async.eachSeries(renames, doRename, callback) + }, + + findNextAvailableName(allPaths, entity) { + const incrementReplacer = (match, p1) => { + return ' (' + (parseInt(p1, 10) + 1) + ')' + } + let candidatePath = entity.path + // if the filename was invalid we should normalise it here too. Currently + // this only handles renames in the same folder, so we will be out of luck + // if it is the folder name which in invalid. We could handle folder + // renames by returning the folders list from getAllEntitiesFromProject + do { + // does the filename look like "foo (1)" if so, increment the number in parentheses + if (/ \(\d+\)$/.test(candidatePath)) { + candidatePath = candidatePath.replace(/ \((\d+)\)$/, incrementReplacer) + } else { + // otherwise, add a ' (1)' suffix to the name + candidatePath = candidatePath + ' (1)' + } + } while (allPaths.has(candidatePath)) // keep going until the name is unique + // add the new name to the set + allPaths.set(candidatePath, entity) + return { newPath: candidatePath, newName: candidatePath.split('/').pop() } + }, + isPathValidForRootDoc(docPath) { const docExtension = Path.extname(docPath) return VALID_ROOT_DOC_REGEXP.test(docExtension) diff --git a/services/web/test/unit/src/Project/ProjectEntityHandlerTests.js b/services/web/test/unit/src/Project/ProjectEntityHandlerTests.js index ecc4ff48bc..186a76f838 100644 --- a/services/web/test/unit/src/Project/ProjectEntityHandlerTests.js +++ b/services/web/test/unit/src/Project/ProjectEntityHandlerTests.js @@ -1,53 +1,26 @@ -/* eslint-disable - camelcase, - max-len, - no-return-assign, - no-unused-vars, -*/ -// TODO: This file was created by bulk-decaffeinate. -// Fix any style issues and re-enable lint. -/* - * decaffeinate suggestions: - * DS102: Remove unnecessary code created because of implicit returns - * DS205: Consider reworking code to avoid use of IIFEs - * DS206: Consider reworking classes to avoid initClass - * Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md - */ -const { assert, expect } = require('chai') +const { expect } = require('chai') const sinon = require('sinon') const modulePath = '../../../../app/src/Features/Project/ProjectEntityHandler' const SandboxedModule = require('sandboxed-module') const Errors = require('../../../../app/src/Features/Errors/Errors') describe('ProjectEntityHandler', function () { - const project_id = '4eecb1c1bffa66588e0000a1' - const doc_id = '4eecb1c1bffa66588e0000a2' - const folder_id = '4eecaffcbffa66588e000008' - const rootFolderId = '4eecaffcbffa66588e000007' - const userId = 1234 + const projectId = '4eecb1c1bffa66588e0000a1' + const docId = '4eecb1c1bffa66588e0000a2' beforeEach(function () { - let Project this.TpdsUpdateSender = { addDoc: sinon.stub().callsArg(1), addFile: sinon.stub().callsArg(1), } - this.ProjectModel = Project = (function () { - Project = class Project { - static initClass() { - this.prototype.rootFolder = [this.rootFolder] - } - - constructor(options) { - this._id = project_id - this.name = 'project_name_here' - this.rev = 0 - } + this.ProjectModel = class Project { + constructor(options) { + this._id = projectId + this.name = 'project_name_here' + this.rev = 0 + this.rootFolder = [this.rootFolder] } - Project.initClass() - return Project - })() - + } this.project = new this.ProjectModel() this.ProjectLocator = { findElement: sinon.stub() } @@ -56,7 +29,7 @@ describe('ProjectEntityHandler', function () { } this.callback = sinon.stub() - return (this.ProjectEntityHandler = SandboxedModule.require(modulePath, { + this.ProjectEntityHandler = SandboxedModule.require(modulePath, { requires: { '../Docstore/DocstoreManager': (this.DocstoreManager = {}), '../../Features/DocumentUpdater/DocumentUpdaterHandler': this @@ -68,7 +41,7 @@ describe('ProjectEntityHandler', function () { './ProjectGetter': (this.ProjectGetter = {}), '../ThirdPartyDataStore/TpdsUpdateSender': this.TpdsUpdateSender, }, - })) + }) }) describe('getting folders, docs and files', function () { @@ -131,13 +104,11 @@ describe('ProjectEntityHandler', function () { this.DocstoreManager.getAllDocs = sinon .stub() .callsArgWith(1, null, this.docs) - this.ProjectEntityHandler.getAllDocs(project_id, this.callback) + this.ProjectEntityHandler.getAllDocs(projectId, this.callback) }) it('should get the doc lines and rev from the docstore', function () { - this.DocstoreManager.getAllDocs - .calledWith(project_id) - .should.equal(true) + this.DocstoreManager.getAllDocs.calledWith(projectId).should.equal(true) }) it('should call the callback with the docs with the lines and rev included', function () { @@ -163,7 +134,7 @@ describe('ProjectEntityHandler', function () { describe('getAllFiles', function () { beforeEach(function () { this.callback = sinon.stub() - this.ProjectEntityHandler.getAllFiles(project_id, this.callback) + this.ProjectEntityHandler.getAllFiles(projectId, this.callback) }) it('should call the callback with the files', function () { @@ -197,7 +168,7 @@ describe('ProjectEntityHandler', function () { ) }) - it('should call the callback with the path for each doc_id', function () { + it('should call the callback with the path for each docId', function () { this.expected = {} this.expected[this.doc1._id] = `/${this.doc1.name}` this.expected[this.doc2._id] = `/folder1/${this.doc2.name}` @@ -211,7 +182,7 @@ describe('ProjectEntityHandler', function () { }) it('should call the callback with the path for an existing doc id at the root level', function () { this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( - project_id, + projectId, this.doc1._id, this.callback ) @@ -220,7 +191,7 @@ describe('ProjectEntityHandler', function () { it('should call the callback with the path for an existing doc id nested within a folder', function () { this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( - project_id, + projectId, this.doc2._id, this.callback ) @@ -231,7 +202,7 @@ describe('ProjectEntityHandler', function () { it('should call the callback with a NotFoundError for a non-existing doc', function () { this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( - project_id, + projectId, 'non-existing-id', this.callback ) @@ -242,7 +213,7 @@ describe('ProjectEntityHandler', function () { it('should call the callback with a NotFoundError for an existing file', function () { this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( - project_id, + projectId, this.file1._id, this.callback ) @@ -255,21 +226,21 @@ describe('ProjectEntityHandler', function () { describe('_getAllFolders', function () { beforeEach(function () { this.callback = sinon.stub() - this.ProjectEntityHandler._getAllFolders(project_id, this.callback) + this.ProjectEntityHandler._getAllFolders(projectId, this.callback) }) it('should get the project without the docs lines', function () { this.ProjectGetter.getProjectWithoutDocLines - .calledWith(project_id) + .calledWith(projectId) .should.equal(true) }) it('should call the callback with the folders', function () { this.callback - .calledWith(null, { - '/': this.project.rootFolder[0], - '/folder1': this.folder1, - }) + .calledWith(null, [ + { path: '/', folder: this.project.rootFolder[0] }, + { path: '/folder1', folder: this.folder1 }, + ]) .should.equal(true) }) }) @@ -285,10 +256,10 @@ describe('ProjectEntityHandler', function () { it('should call the callback with the folders', function () { this.callback - .calledWith(null, { - '/': this.project.rootFolder[0], - '/folder1': this.folder1, - }) + .calledWith(null, [ + { path: '/', folder: this.project.rootFolder[0] }, + { path: '/folder1', folder: this.folder1 }, + ]) .should.equal(true) }) }) @@ -304,12 +275,12 @@ describe('ProjectEntityHandler', function () { this.DocstoreManager.getDoc = sinon .stub() .callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges) - this.ProjectEntityHandler.getDoc(project_id, doc_id, this.callback) + this.ProjectEntityHandler.getDoc(projectId, docId, this.callback) }) it('should call the docstore', function () { this.DocstoreManager.getDoc - .calledWith(project_id, doc_id) + .calledWith(projectId, docId) .should.equal(true) }) @@ -332,15 +303,12 @@ describe('ProjectEntityHandler', function () { this.DocstoreManager.getDoc = sinon .stub() .callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges) - result = await this.ProjectEntityHandler.promises.getDoc( - project_id, - doc_id - ) + result = await this.ProjectEntityHandler.promises.getDoc(projectId, docId) }) it('should call the docstore', function () { this.DocstoreManager.getDoc - .calledWith(project_id, doc_id) + .calledWith(projectId, docId) .should.equal(true) }) diff --git a/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js b/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js index 01e602d553..2a29b280d0 100644 --- a/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js +++ b/services/web/test/unit/src/Project/ProjectEntityUpdateHandlerTests.js @@ -78,7 +78,7 @@ describe('ProjectEntityUpdateHandler', function () { flushDocToMongo: sinon.stub().yields(), updateProjectStructure: sinon.stub().yields(), setDocument: sinon.stub(), - resyncProjectHistory: sinon.stub(), + resyncProjectHistory: sinon.stub().yields(), deleteDoc: sinon.stub().yields(), } this.fs = { @@ -118,7 +118,7 @@ describe('ProjectEntityUpdateHandler', function () { replaceFileWithNew: sinon.stub(), mkdirp: sinon.stub(), moveEntity: sinon.stub(), - renameEntity: sinon.stub(), + renameEntity: sinon.stub().yields(), deleteEntity: sinon.stub(), replaceDocWithFile: sinon.stub(), replaceFileWithDoc: sinon.stub(), @@ -136,6 +136,11 @@ describe('ProjectEntityUpdateHandler', function () { copyFile: sinon.stub(), uploadFileFromDisk: sinon.stub(), deleteFile: sinon.stub(), + _buildUrl: sinon + .stub() + .callsFake( + (projectId, fileId) => `www.filestore.test/${projectId}/${fileId}` + ), } this.FileWriter = { writeLinesToDisk: sinon.stub(), @@ -1940,10 +1945,6 @@ describe('ProjectEntityUpdateHandler', function () { docs, files ) - this.FileStoreHandler._buildUrl = (projectId, fileId) => - `www.filestore.test/${projectId}/${fileId}` - this.DocumentUpdaterHandler.resyncProjectHistory.yields() - this.ProjectEntityUpdateHandler.resyncProjectHistory( projectId, this.callback @@ -1984,6 +1985,100 @@ describe('ProjectEntityUpdateHandler', function () { this.callback.called.should.equal(true) }) }) + + describe('a project with duplicate filenames', function () { + beforeEach(function (done) { + this.ProjectGetter.getProject.yields(null, this.project) + this.docs = [ + { doc: { _id: 'doc1' }, path: 'main.tex' }, + { doc: { _id: 'doc2' }, path: 'a/b/c/duplicate.tex' }, + { doc: { _id: 'doc3' }, path: 'a/b/c/duplicate.tex' }, + { doc: { _id: 'doc4' }, path: 'another dupe (22)' }, + { doc: { _id: 'doc5' }, path: 'a/b/c/duplicate.tex' }, + ] + this.files = [ + { file: { _id: 'file1', hash: 'hash1' }, path: 'image.jpg' }, + { file: { _id: 'file2', hash: 'hash2' }, path: 'duplicate.jpg' }, + { file: { _id: 'file3', hash: 'hash3' }, path: 'duplicate.jpg' }, + { file: { _id: 'file4', hash: 'hash4' }, path: 'another dupe (22)' }, + ] + this.ProjectEntityHandler.getAllEntitiesFromProject.yields( + null, + this.docs, + this.files + ) + this.ProjectEntityUpdateHandler.resyncProjectHistory(projectId, done) + }) + + it('renames the duplicate files', function () { + const renameEntity = this.ProjectEntityMongoUpdateHandler.renameEntity + expect(renameEntity).to.have.callCount(4) + expect(renameEntity).to.have.been.calledWith( + projectId, + 'doc3', + 'doc', + 'duplicate.tex (1)' + ) + expect(renameEntity).to.have.been.calledWith( + projectId, + 'doc5', + 'doc', + 'duplicate.tex (2)' + ) + expect(renameEntity).to.have.been.calledWith( + projectId, + 'file3', + 'file', + 'duplicate.jpg (1)' + ) + expect(renameEntity).to.have.been.calledWith( + projectId, + 'file4', + 'file', + 'another dupe (23)' + ) + }) + + it('tells the doc updater to resync the project', function () { + const docs = [ + { doc: 'doc1', path: 'main.tex' }, + { doc: 'doc2', path: 'a/b/c/duplicate.tex' }, + { doc: 'doc3', path: 'a/b/c/duplicate.tex (1)' }, + { doc: 'doc4', path: 'another dupe (22)' }, + { doc: 'doc5', path: 'a/b/c/duplicate.tex (2)' }, + ] + const urlPrefix = `www.filestore.test/${projectId}` + const files = [ + { + file: 'file1', + path: 'image.jpg', + url: `${urlPrefix}/file1`, + _hash: 'hash1', + }, + { + file: 'file2', + path: 'duplicate.jpg', + url: `${urlPrefix}/file2`, + _hash: 'hash2', + }, + { + file: 'file3', + path: 'duplicate.jpg (1)', + url: `${urlPrefix}/file3`, + _hash: 'hash3', + }, + { + file: 'file4', + path: 'another dupe (23)', + url: `${urlPrefix}/file4`, + _hash: 'hash4', + }, + ] + expect( + this.DocumentUpdaterHandler.resyncProjectHistory + ).to.have.been.calledWith(projectId, projectHistoryId, docs, files) + }) + }) }) describe('_cleanUpEntity', function () {