From 971afb734208cd3da9b4e702259726681bdddbdc Mon Sep 17 00:00:00 2001 From: James Allen Date: Thu, 4 Sep 2014 13:00:41 +0100 Subject: [PATCH] Treat large text files as binary --- .../ThirdPartyDataStore/UpdateMerger.coffee | 2 +- .../Uploads/FileSystemImportManager.coffee | 2 +- .../Features/Uploads/FileTypeManager.coffee | 18 +++++++++-- .../UpdateMergerTests.coffee | 8 ++--- .../FileSystemImportManagerTests.coffee | 4 +-- .../Uploads/FileTypeManagerTests.coffee | 32 ++++++++++++------- 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee b/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee index 16bc6508ad..823b4ba7e0 100644 --- a/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee +++ b/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee @@ -22,7 +22,7 @@ module.exports = FileTypeManager.shouldIgnore path, (err, shouldIgnore)-> if shouldIgnore return callback() - FileTypeManager.isBinary path, (err, isFile)-> + FileTypeManager.isBinary path, fsPath, (err, isFile)-> if isFile self.p.processFile project_id, elementId, fsPath, path, callback #TODO clean up the stream written to disk here else diff --git a/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee b/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee index 561653f645..0562255fa2 100644 --- a/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee +++ b/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee @@ -56,7 +56,7 @@ module.exports = FileSystemImportManager = if isDirectory @addFolder project_id, folder_id, name, path, replace, callback else - FileTypeManager.isBinary name, (error, isBinary) => + FileTypeManager.isBinary name, path, (error, isBinary) => return callback(error) if error? if isBinary @addFile project_id, folder_id, name, path, replace, callback diff --git a/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee b/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee index 54096d29f8..44eed12db5 100644 --- a/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee +++ b/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee @@ -19,17 +19,29 @@ module.exports = FileTypeManager = IGNORE_FILENAMES : [ "__MACOSX" ] + + MAX_TEXT_FILE_SIZE: 1 * 1024 * 1024 # 1 MB isDirectory: (path, callback = (error, result) ->) -> fs.stat path, (error, stats) -> callback(error, stats.isDirectory()) - isBinary: (path, callback = (error, result) ->) -> - parts = path.split(".") + isBinary: (name, fsPath, callback = (error, result) ->) -> + parts = name.split(".") extension = parts.slice(-1)[0] if extension? extension = extension.toLowerCase() - callback null, @TEXT_EXTENSIONS.indexOf(extension) == -1 or parts.length <= 1 + binaryFile = (@TEXT_EXTENSIONS.indexOf(extension) == -1 or parts.length <= 1) + + if binaryFile + return callback null, true + + fs.stat fsPath, (error, stat) -> + return callback(error) if error? + if stat.size > FileTypeManager.MAX_TEXT_FILE_SIZE + return callback null, true # Treat large text file as binary + else + return callback null, false shouldIgnore: (path, callback = (error, result) ->) -> name = Path.basename(path) diff --git a/services/web/test/UnitTests/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee b/services/web/test/UnitTests/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee index 309fa3b7df..4088c0791e 100644 --- a/services/web/test/UnitTests/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee +++ b/services/web/test/UnitTests/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee @@ -56,28 +56,28 @@ describe 'UpdateMerger :', -> it 'should process update as doc when it is a doc', (done)-> doc_id = "231312s" - @FileTypeManager.isBinary.callsArgWith(1, null, false) + @FileTypeManager.isBinary.callsArgWith(2, null, false) @projectLocator.findElementByPath = (_, __, cb)->cb(null, {_id:doc_id}) @FileTypeManager.shouldIgnore.callsArgWith(1, null, false) @updateMerger.p.processDoc = sinon.stub().callsArgWith(5) filePath = "/folder/doc.tex" @updateMerger.mergeUpdate @project_id, filePath, @update, "", => - @FileTypeManager.isBinary.calledWith(filePath).should.equal true + @FileTypeManager.isBinary.calledWith(filePath, @fsPath).should.equal true @updateMerger.p.processDoc.calledWith(@project_id, doc_id, @fsPath).should.equal true done() it 'should process update as file when it is not a doc', (done)-> file_id = "1231" @projectLocator.findElementByPath = (_, __, cb)->cb(null, {_id:file_id}) - @FileTypeManager.isBinary.callsArgWith(1, null, true) + @FileTypeManager.isBinary.callsArgWith(2, null, true) @FileTypeManager.shouldIgnore.callsArgWith(1, null, false) @updateMerger.p.processFile = sinon.stub().callsArgWith(4) filePath = "/folder/file1.png" @updateMerger.mergeUpdate @project_id, filePath, @update, "", => @updateMerger.p.processFile.calledWith(@project_id, file_id, @fsPath).should.equal true - @FileTypeManager.isBinary.calledWith(filePath).should.equal true + @FileTypeManager.isBinary.calledWith(filePath, @fsPath).should.equal true done() diff --git a/services/web/test/UnitTests/coffee/Uploads/FileSystemImportManagerTests.coffee b/services/web/test/UnitTests/coffee/Uploads/FileSystemImportManagerTests.coffee index 2803ff47ca..e0a38c974b 100644 --- a/services/web/test/UnitTests/coffee/Uploads/FileSystemImportManagerTests.coffee +++ b/services/web/test/UnitTests/coffee/Uploads/FileSystemImportManagerTests.coffee @@ -151,7 +151,7 @@ describe "FileSystemImportManager", -> describe "with binary file", -> beforeEach -> @FileTypeManager.isDirectory = sinon.stub().callsArgWith(1, null, false) - @FileTypeManager.isBinary = sinon.stub().callsArgWith(1, null, true) + @FileTypeManager.isBinary = sinon.stub().callsArgWith(2, null, true) @FileSystemImportManager.addFile = sinon.stub().callsArg(5) @FileSystemImportManager.addEntity @project_id, @folder_id, @name, @path_on_disk, @replace, @callback @@ -162,7 +162,7 @@ describe "FileSystemImportManager", -> describe "with text file", -> beforeEach -> @FileTypeManager.isDirectory = sinon.stub().callsArgWith(1, null, false) - @FileTypeManager.isBinary = sinon.stub().callsArgWith(1, null, false) + @FileTypeManager.isBinary = sinon.stub().callsArgWith(2, null, false) @FileSystemImportManager.addDoc = sinon.stub().callsArg(5) @FileSystemImportManager.addEntity @project_id, @folder_id, @name, @path_on_disk, @replace, @callback diff --git a/services/web/test/UnitTests/coffee/Uploads/FileTypeManagerTests.coffee b/services/web/test/UnitTests/coffee/Uploads/FileTypeManagerTests.coffee index 746d224d0d..c6fdf64829 100644 --- a/services/web/test/UnitTests/coffee/Uploads/FileTypeManagerTests.coffee +++ b/services/web/test/UnitTests/coffee/Uploads/FileTypeManagerTests.coffee @@ -36,50 +36,58 @@ describe "FileTypeManager", -> @callback.calledWith(null, false).should.equal true describe "isBinary", -> + beforeEach -> + @stat = { size: 100 } + @fs.stat = sinon.stub().callsArgWith(1, null, @stat) + it "should return .tex files as not binary", -> - @FileTypeManager.isBinary "file.tex", (error, binary) -> + @FileTypeManager.isBinary "file.tex", "/path/on/disk", (error, binary) -> binary.should.equal false it "should return .bib files as not binary", -> - @FileTypeManager.isBinary "file.bib", (error, binary) -> + @FileTypeManager.isBinary "file.bib", "/path/on/disk", (error, binary) -> binary.should.equal false it "should return .bibtex files as not binary", -> - @FileTypeManager.isBinary "file.bibtex", (error, binary) -> + @FileTypeManager.isBinary "file.bibtex", "/path/on/disk", (error, binary) -> binary.should.equal false it "should return .cls files as not binary", -> - @FileTypeManager.isBinary "file.cls", (error, binary) -> + @FileTypeManager.isBinary "file.cls", "/path/on/disk", (error, binary) -> binary.should.equal false it "should return .sty files as not binary", -> - @FileTypeManager.isBinary "file.sty", (error, binary) -> + @FileTypeManager.isBinary "file.sty", "/path/on/disk", (error, binary) -> binary.should.equal false it "should return .bst files as not binary", -> - @FileTypeManager.isBinary "file.bst", (error, binary) -> + @FileTypeManager.isBinary "file.bst", "/path/on/disk", (error, binary) -> binary.should.equal false it "should return .eps files as binary", -> - @FileTypeManager.isBinary "file.eps", (error, binary) -> + @FileTypeManager.isBinary "file.eps", "/path/on/disk", (error, binary) -> binary.should.equal true it "should return .dvi files as binary", -> - @FileTypeManager.isBinary "file.dvi", (error, binary) -> + @FileTypeManager.isBinary "file.dvi", "/path/on/disk", (error, binary) -> binary.should.equal true it "should return .png files as binary", -> - @FileTypeManager.isBinary "file.png", (error, binary) -> + @FileTypeManager.isBinary "file.png", "/path/on/disk", (error, binary) -> binary.should.equal true it "should return files without extensions as binary", -> - @FileTypeManager.isBinary "tex", (error, binary) -> + @FileTypeManager.isBinary "tex", "/path/on/disk", (error, binary) -> binary.should.equal true it "should ignore the case of an extension", -> - @FileTypeManager.isBinary "file.TEX", (error, binary) -> + @FileTypeManager.isBinary "file.TEX", "/path/on/disk", (error, binary) -> binary.should.equal false - + + it "should return large text files as binary", -> + @stat.size = 2 * 1024 * 1024 # 2Mb + @FileTypeManager.isBinary "file.tex", "/path/on/disk", (error, binary) -> + binary.should.equal true describe "shouldIgnore", -> it "should ignore tex auxiliary files", ->