Merge pull request #5770 from overleaf/em-resync-clean-filename

Clean filenames before resync

GitOrigin-RevId: 46a408a8b7eb067b431db04951d92c029b833054
This commit is contained in:
Eric Mc Sween
2021-11-15 09:47:35 -05:00
committed by Copybot
parent f351a7af0a
commit 642764fccf
2 changed files with 139 additions and 48 deletions
@@ -1392,53 +1392,72 @@ const ProjectEntityUpdateHandler = {
_checkFiletree(projectId, projectHistoryId, entities, callback) {
const renames = []
const paths = new Map()
const paths = new Set()
for (const entity of entities) {
// if the path is invalid (i.e. not going to be accepted in
// project-history due to filename rules) we could also flag it for a
// rename here.
if (paths.has(entity.path)) {
renames.push(entity)
const filename = entity.doc ? entity.doc.name : entity.file.name
const cleanFilename = SafePath.clean(filename)
if (cleanFilename !== filename) {
// File name needs to be cleaned
let newPath = Path.join(
entity.path.slice(0, -filename.length),
cleanFilename
)
if (paths.has(newPath)) {
newPath = ProjectEntityUpdateHandler.findNextAvailablePath(
paths,
newPath
)
}
renames.push({ entity, newName: cleanFilename, newPath })
} else if (paths.has(entity.path)) {
// Duplicate filename
const newPath = ProjectEntityUpdateHandler.findNextAvailablePath(
paths,
entity.path
)
const newName = newPath.split('/').pop()
renames.push({ entity, newName, newPath })
paths.add(newPath)
} else {
paths.set(entity.path, entity)
paths.add(entity.path)
}
}
if (renames.length === 0) {
return callback()
}
logger.warn({ projectId, renames }, 'found conflict in filetree')
// find new names for duplicate files
for (const entity of renames) {
const {
newPath,
newName,
} = ProjectEntityUpdateHandler.findNextAvailableName(paths, entity)
logger.debug({ projectId, newName, newPath }, 'found new name')
entity.newPath = newPath
entity.newName = newName
}
logger.warn(
{
projectId,
renames: renames.map(rename => ({
oldPath: rename.entity.path,
newPath: rename.newPath,
})),
},
'found conflicts or bad filenames in filetree'
)
// rename the duplicate files
const doRename = (entity, cb) => {
const doRename = (rename, cb) => {
const entity = rename.entity
const entityId = entity.doc ? entity.doc._id : entity.file._id
const entityType = entity.doc ? 'doc' : 'file'
ProjectEntityMongoUpdateHandler.renameEntity(
projectId,
entityId,
entityType,
entity.newName,
rename.newName,
(err, project, startPath, endPath, rev, changes) => {
if (err) {
return cb(err)
}
// update the renamed entity for the resync
entity.path = entity.newPath
entity.path = rename.newPath
if (entityType === 'doc') {
entity.doc.name = entity.newName
entity.doc.name = rename.newName
} else {
entity.file.name = entity.newName
entity.file.name = rename.newName
}
delete entity.newPath
delete entity.newname
DocumentUpdaterHandler.updateProjectStructure(
projectId,
projectHistoryId,
@@ -1452,11 +1471,10 @@ const ProjectEntityUpdateHandler = {
async.eachSeries(renames, doRename, callback)
},
findNextAvailableName(allPaths, entity) {
findNextAvailablePath(allPaths, candidatePath) {
const incrementReplacer = (match, p1) => {
return ' (' + (parseInt(p1, 10) + 1) + ')'
}
let candidatePath = entity.path
// if the filename was invalid we should normalise it here too. Currently
// this only handles renames in the same folder, so we will be out of luck
// if it is the folder name which in invalid. We could handle folder
@@ -1471,8 +1489,8 @@ const ProjectEntityUpdateHandler = {
}
} while (allPaths.has(candidatePath)) // keep going until the name is unique
// add the new name to the set
allPaths.set(candidatePath, entity)
return { newPath: candidatePath, newName: candidatePath.split('/').pop() }
allPaths.add(candidatePath)
return candidatePath
},
isPathValidForRootDoc(docPath) {
@@ -1924,19 +1924,11 @@ describe('ProjectEntityUpdateHandler', function () {
beforeEach(function () {
this.ProjectGetter.getProject.yields(null, this.project)
const docs = [
{
doc: {
_id: docId,
},
path: 'main.tex',
},
{ doc: { _id: docId, name: 'main.tex' }, path: 'main.tex' },
]
const files = [
{
file: {
_id: fileId,
hash: '123456',
},
file: { _id: fileId, name: 'universe.png', hash: '123456' },
path: 'universe.png',
},
]
@@ -1990,17 +1982,41 @@ describe('ProjectEntityUpdateHandler', function () {
beforeEach(function (done) {
this.ProjectGetter.getProject.yields(null, this.project)
this.docs = [
{ doc: { _id: 'doc1' }, path: 'main.tex' },
{ doc: { _id: 'doc2' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc3' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc4' }, path: 'another dupe (22)' },
{ doc: { _id: 'doc5' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc1', name: 'main.tex' }, path: 'main.tex' },
{
doc: { _id: 'doc2', name: 'duplicate.tex' },
path: 'a/b/c/duplicate.tex',
},
{
doc: { _id: 'doc3', name: 'duplicate.tex' },
path: 'a/b/c/duplicate.tex',
},
{
doc: { _id: 'doc4', name: 'another dupe (22)' },
path: 'another dupe (22)',
},
{
doc: { _id: 'doc5', name: 'duplicate.tex' },
path: 'a/b/c/duplicate.tex',
},
]
this.files = [
{ file: { _id: 'file1', hash: 'hash1' }, path: 'image.jpg' },
{ file: { _id: 'file2', hash: 'hash2' }, path: 'duplicate.jpg' },
{ file: { _id: 'file3', hash: 'hash3' }, path: 'duplicate.jpg' },
{ file: { _id: 'file4', hash: 'hash4' }, path: 'another dupe (22)' },
{
file: { _id: 'file1', name: 'image.jpg', hash: 'hash1' },
path: 'image.jpg',
},
{
file: { _id: 'file2', name: 'duplicate.jpg', hash: 'hash2' },
path: 'duplicate.jpg',
},
{
file: { _id: 'file3', name: 'duplicate.jpg', hash: 'hash3' },
path: 'duplicate.jpg',
},
{
file: { _id: 'file4', name: 'another dupe (22)', hash: 'hash4' },
path: 'another dupe (22)',
},
]
this.ProjectEntityHandler.getAllEntitiesFromProject.yields(
null,
@@ -2079,6 +2095,63 @@ describe('ProjectEntityUpdateHandler', function () {
).to.have.been.calledWith(projectId, projectHistoryId, docs, files)
})
})
describe('a project with bad filenames', function () {
beforeEach(function (done) {
this.ProjectGetter.getProject.yields(null, this.project)
this.docs = [
{
doc: { _id: 'doc1', name: '/d/e/f/test.tex' },
path: 'a/b/c/d/e/f/test.tex',
},
]
this.files = [
{
file: { _id: 'file1', name: 'A*.png', hash: 'hash1' },
path: 'A*.png',
},
]
this.ProjectEntityHandler.getAllEntitiesFromProject.yields(
null,
this.docs,
this.files
)
this.ProjectEntityUpdateHandler.resyncProjectHistory(projectId, done)
})
it('renames the files', function () {
const renameEntity = this.ProjectEntityMongoUpdateHandler.renameEntity
expect(renameEntity).to.have.callCount(2)
expect(renameEntity).to.have.been.calledWith(
projectId,
'doc1',
'doc',
'_d_e_f_test.tex'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'file1',
'file',
'A_.png'
)
})
it('tells the doc updater to resync the project', function () {
const docs = [{ doc: 'doc1', path: 'a/b/c/_d_e_f_test.tex' }]
const urlPrefix = `www.filestore.test/${projectId}`
const files = [
{
file: 'file1',
path: 'A_.png',
url: `${urlPrefix}/file1`,
_hash: 'hash1',
},
]
expect(
this.DocumentUpdaterHandler.resyncProjectHistory
).to.have.been.calledWith(projectId, projectHistoryId, docs, files)
})
})
})
describe('_cleanUpEntity', function () {