Merge pull request #16438 from overleaf/jpa-em-replace-find-subprocess

[clsi] replace find subprocess for listing compile dir contents

GitOrigin-RevId: 36c8230ea6d787b1d948407d6473c14af8d6b5f6
This commit is contained in:
Jakob Ackermann
2024-01-10 12:02:06 +00:00
committed by Copybot
parent 1e86897556
commit 5aeb1f1459
6 changed files with 148 additions and 227 deletions

2
package-lock.json generated
View File

@@ -43503,6 +43503,7 @@
"@overleaf/logger": "*", "@overleaf/logger": "*",
"@overleaf/metrics": "*", "@overleaf/metrics": "*",
"@overleaf/o-error": "*", "@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*", "@overleaf/settings": "*",
"async": "3.2.2", "async": "3.2.2",
"body-parser": "^1.19.0", "body-parser": "^1.19.0",
@@ -53802,6 +53803,7 @@
"@overleaf/logger": "*", "@overleaf/logger": "*",
"@overleaf/metrics": "*", "@overleaf/metrics": "*",
"@overleaf/o-error": "*", "@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*", "@overleaf/settings": "*",
"async": "3.2.2", "async": "3.2.2",
"body-parser": "^1.19.0", "body-parser": "^1.19.0",

View File

@@ -1,8 +1,7 @@
const childProcess = require('child_process')
const fsPromises = require('fs/promises') const fsPromises = require('fs/promises')
const os = require('os') const os = require('os')
const Path = require('path') const Path = require('path')
const { callbackify, promisify } = require('util') const { callbackify } = require('util')
const Settings = require('@overleaf/settings') const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger') const logger = require('@overleaf/logger')
@@ -21,8 +20,6 @@ const CommandRunner = require('./CommandRunner')
const { emitPdfStats } = require('./ContentCacheMetrics') const { emitPdfStats } = require('./ContentCacheMetrics')
const SynctexOutputParser = require('./SynctexOutputParser') const SynctexOutputParser = require('./SynctexOutputParser')
const execFile = promisify(childProcess.execFile)
const COMPILE_TIME_BUCKETS = [ const COMPILE_TIME_BUCKETS = [
// NOTE: These buckets are locked in per metric name. // NOTE: These buckets are locked in per metric name.
// If you want to change them, you will need to rename metrics. // If you want to change them, you will need to rename metrics.
@@ -211,7 +208,7 @@ async function doCompile(request) {
Metrics.inc('compiles-timeout', 1, request.metricsOpts) Metrics.inc('compiles-timeout', 1, request.metricsOpts)
} }
const outputFiles = await _saveOutputFiles({ const { outputFiles, allEntries } = await _saveOutputFiles({
request, request,
compileDir, compileDir,
resourceList, resourceList,
@@ -222,7 +219,11 @@ async function doCompile(request) {
// Clear project if this compile was abruptly terminated // Clear project if this compile was abruptly terminated
if (error.terminated || error.timedout) { if (error.terminated || error.timedout) {
await clearProject(request.project_id, request.user_id) await clearProjectWithListing(
request.project_id,
request.user_id,
allEntries
)
} }
throw error throw error
@@ -279,7 +280,7 @@ async function doCompile(request) {
// Emit compile time. // Emit compile time.
timings.compile = ts timings.compile = ts
const outputFiles = await _saveOutputFiles({ const { outputFiles } = await _saveOutputFiles({
request, request,
compileDir, compileDir,
resourceList, resourceList,
@@ -312,10 +313,8 @@ async function _saveOutputFiles({
) )
const outputDir = getOutputDir(request.project_id, request.user_id) const outputDir = getOutputDir(request.project_id, request.user_id)
let { outputFiles } = await OutputFileFinder.promises.findOutputFiles( let { outputFiles, allEntries } =
resourceList, await OutputFileFinder.promises.findOutputFiles(resourceList, compileDir)
compileDir
)
try { try {
outputFiles = await OutputCacheManager.promises.saveOutputFiles( outputFiles = await OutputCacheManager.promises.saveOutputFiles(
@@ -330,7 +329,7 @@ async function _saveOutputFiles({
} }
timings.output = timer.done() timings.output = timer.done()
return outputFiles return { outputFiles, allEntries }
} }
async function stopCompile(projectId, userId) { async function stopCompile(projectId, userId) {
@@ -340,6 +339,11 @@ async function stopCompile(projectId, userId) {
async function clearProject(projectId, userId) { async function clearProject(projectId, userId) {
const compileDir = getCompileDir(projectId, userId) const compileDir = getCompileDir(projectId, userId)
await fsPromises.rm(compileDir, { force: true, recursive: true })
}
async function clearProjectWithListing(projectId, userId, allEntries) {
const compileDir = getCompileDir(projectId, userId)
const exists = await _checkDirectory(compileDir) const exists = await _checkDirectory(compileDir)
if (!exists) { if (!exists) {
@@ -347,12 +351,15 @@ async function clearProject(projectId, userId) {
return return
} }
try { for (const pathInProject of allEntries) {
await execFile('rm', ['-r', '-f', '--', compileDir]) const path = Path.join(compileDir, pathInProject)
} catch (err) { if (path.endsWith('/')) {
OError.tag(err, `rm -r failed`, { compileDir, stderr: err.stderr }) await fsPromises.rmdir(path)
throw err } else {
await fsPromises.unlink(path)
}
} }
await fsPromises.rmdir(compileDir)
} }
async function _findAllDirs() { async function _findAllDirs() {

View File

@@ -1,95 +1,54 @@
let OutputFileFinder
const Path = require('path') const Path = require('path')
const _ = require('lodash') const fs = require('fs')
const { spawn } = require('child_process') const { callbackifyMultiResult } = require('@overleaf/promise-utils')
const logger = require('@overleaf/logger')
module.exports = OutputFileFinder = { async function walkFolder(compileDir, d, files, allEntries) {
findOutputFiles(resources, directory, callback) { const dirents = await fs.promises.readdir(Path.join(compileDir, d), {
const incomingResources = new Set(resources.map(resource => resource.path)) withFileTypes: true,
})
OutputFileFinder._getAllFiles(directory, function (error, allFiles) { for (const dirent of dirents) {
if (allFiles == null) { const p = Path.join(d, dirent.name)
allFiles = [] if (dirent.isDirectory()) {
} await walkFolder(compileDir, p, files, allEntries)
if (error) { allEntries.push(p + '/')
logger.err({ err: error }, 'error finding all output files') } else if (dirent.isFile()) {
return callback(error) files.push(p)
} allEntries.push(p)
const outputFiles = [] } else {
for (const file of allFiles) { allEntries.push(p)
if (!incomingResources.has(file)) { }
outputFiles.push({ }
path: file,
type: Path.extname(file).replace(/^\./, '') || undefined,
})
}
}
callback(null, outputFiles, allFiles)
})
},
_getAllFiles(directory, callback) {
callback = _.once(callback)
// don't include clsi-specific files/directories in the output list
const EXCLUDE_DIRS = [
'-name',
'.cache',
'-o',
'-name',
'.archive',
'-o',
'-name',
'.project-*',
]
const args = [
directory,
'(',
...EXCLUDE_DIRS,
')',
'-prune',
'-o',
'-type',
'f',
'-print',
]
logger.debug({ args }, 'running find command')
const proc = spawn('find', args)
let stdout = ''
proc.stdout.setEncoding('utf8').on('data', chunk => (stdout += chunk))
proc.on('error', callback)
proc.on('close', function (code) {
if (code !== 0) {
logger.warn(
{ directory, code },
"find returned error, directory likely doesn't exist"
)
return callback(null, [])
}
let fileList = stdout.trim().split('\n')
fileList = fileList.map(function (file) {
// Strip leading directory
return Path.relative(directory, file)
})
callback(null, fileList)
})
},
} }
module.exports.promises = { async function findOutputFiles(resources, directory) {
findOutputFiles: (resources, directory) => const files = []
new Promise((resolve, reject) => { const allEntries = []
OutputFileFinder.findOutputFiles( await walkFolder(directory, '', files, allEntries)
resources,
directory, const incomingResources = new Set(resources.map(resource => resource.path))
(err, outputFiles, allFiles) => {
if (err) { const outputFiles = []
reject(err) for (const path of files) {
} else { if (incomingResources.has(path)) continue
resolve({ outputFiles, allFiles }) if (path === '.project-sync-state') continue
} if (path === '.project-lock') continue
} outputFiles.push({
) path,
}), type: Path.extname(path).replace(/^\./, '') || undefined,
})
}
return {
outputFiles,
allEntries,
}
}
module.exports = {
findOutputFiles: callbackifyMultiResult(findOutputFiles, [
'outputFiles',
'allEntries',
]),
promises: {
findOutputFiles,
},
} }

View File

@@ -20,6 +20,7 @@
"@overleaf/logger": "*", "@overleaf/logger": "*",
"@overleaf/metrics": "*", "@overleaf/metrics": "*",
"@overleaf/o-error": "*", "@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*", "@overleaf/settings": "*",
"async": "3.2.2", "async": "3.2.2",
"body-parser": "^1.19.0", "body-parser": "^1.19.0",

View File

@@ -53,7 +53,10 @@ describe('CompileManager', function () {
} }
this.OutputFileFinder = { this.OutputFileFinder = {
promises: { promises: {
findOutputFiles: sinon.stub().resolves(this.outputFiles), findOutputFiles: sinon.stub().resolves({
outputFiles: this.outputFiles,
allEntries: this.outputFiles.map(f => f.path).concat(['main.tex']),
}),
}, },
} }
this.OutputCacheManager = { this.OutputCacheManager = {
@@ -117,6 +120,9 @@ describe('CompileManager', function () {
stat: sinon.stub(), stat: sinon.stub(),
readFile: sinon.stub(), readFile: sinon.stub(),
mkdir: sinon.stub().resolves(), mkdir: sinon.stub().resolves(),
rm: sinon.stub().resolves(),
unlink: sinon.stub().resolves(),
rmdir: sinon.stub().resolves(),
} }
this.fsPromises.lstat.withArgs(this.compileDir).resolves(this.dirStats) this.fsPromises.lstat.withArgs(this.compileDir).resolves(this.dirStats)
this.fsPromises.stat this.fsPromises.stat
@@ -319,12 +325,15 @@ describe('CompileManager', function () {
}) })
it('should clear the compile directory', function () { it('should clear the compile directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [ for (const { path } of this.buildFiles) {
'-r', expect(this.fsPromises.unlink).to.have.been.calledWith(
'-f', this.compileDir + '/' + path
'--', )
this.compileDir, }
]) expect(this.fsPromises.unlink).to.have.been.calledWith(
this.compileDir + '/main.tex'
)
expect(this.fsPromises.rmdir).to.have.been.calledWith(this.compileDir)
}) })
}) })
@@ -339,50 +348,29 @@ describe('CompileManager', function () {
}) })
it('should clear the compile directory', function () { it('should clear the compile directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [ for (const { path } of this.buildFiles) {
'-r', expect(this.fsPromises.unlink).to.have.been.calledWith(
'-f', this.compileDir + '/' + path
'--', )
this.compileDir, }
]) expect(this.fsPromises.unlink).to.have.been.calledWith(
this.compileDir + '/main.tex'
)
expect(this.fsPromises.rmdir).to.have.been.calledWith(this.compileDir)
}) })
}) })
}) })
describe('clearProject', function () { describe('clearProject', function () {
describe('successfully', function () { it('should clear the compile directory', async function () {
beforeEach(async function () { await this.CompileManager.promises.clearProject(
await this.CompileManager.promises.clearProject( this.projectId,
this.projectId, this.userId
this.userId )
)
})
it('should remove the project directory', function () { expect(this.fsPromises.rm).to.have.been.calledWith(this.compileDir, {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [ force: true,
'-r', recursive: true,
'-f',
'--',
this.compileDir,
])
})
})
describe('with a non-success status code', function () {
beforeEach(async function () {
this.child_process.execFile.yields(new Error('oops'))
await expect(
this.CompileManager.promises.clearProject(this.projectId, this.userId)
).to.be.rejected
})
it('should remove the project directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [
'-r',
'-f',
'--',
this.compileDir,
])
}) })
}) })
}) })

View File

@@ -1,61 +1,55 @@
/* eslint-disable
no-return-assign,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const SandboxedModule = require('sandboxed-module') const SandboxedModule = require('sandboxed-module')
const sinon = require('sinon') const sinon = require('sinon')
const modulePath = require('path').join( const modulePath = require('path').join(
__dirname, __dirname,
'../../../app/js/OutputFileFinder' '../../../app/js/OutputFileFinder'
) )
const path = require('path')
const { expect } = require('chai') const { expect } = require('chai')
const { EventEmitter } = require('events') const mockFs = require('mock-fs')
describe('OutputFileFinder', function () { describe('OutputFileFinder', function () {
beforeEach(function () { beforeEach(function () {
this.OutputFileFinder = SandboxedModule.require(modulePath, { this.OutputFileFinder = SandboxedModule.require(modulePath, {})
requires: { this.directory = '/test/dir'
fs: (this.fs = {}), this.callback = sinon.stub()
child_process: { spawn: (this.spawn = sinon.stub()) },
}, mockFs({
globals: { [this.directory]: {
Math, // used by lodash resource: {
'path.tex': 'a source file',
},
'output.pdf': 'a generated pdf file',
extra: {
'file.tex': 'a generated tex file',
},
'sneaky-file': mockFs.symlink({
path: '../foo',
}),
}, },
}) })
this.directory = '/test/dir' })
return (this.callback = sinon.stub())
afterEach(function () {
mockFs.restore()
}) })
describe('findOutputFiles', function () { describe('findOutputFiles', function () {
beforeEach(function (done) { beforeEach(async function () {
this.resource_path = 'resource/path.tex' this.resource_path = 'resource/path.tex'
this.output_paths = ['output.pdf', 'extra/file.tex'] this.output_paths = ['output.pdf', 'extra/file.tex']
this.all_paths = this.output_paths.concat([this.resource_path]) this.all_paths = this.output_paths.concat([this.resource_path])
this.resources = [{ path: (this.resource_path = 'resource/path.tex') }] this.resources = [{ path: (this.resource_path = 'resource/path.tex') }]
this.OutputFileFinder._getAllFiles = sinon const { outputFiles, allEntries } =
.stub() await this.OutputFileFinder.promises.findOutputFiles(
.callsArgWith(1, null, this.all_paths) this.resources,
return this.OutputFileFinder.findOutputFiles( this.directory
this.resources, )
this.directory, this.outputFiles = outputFiles
(error, outputFiles) => { this.allEntries = allEntries
if (error) return done(error)
this.outputFiles = outputFiles
done()
}
)
}) })
return it('should only return the output files, not directories or resource paths', function () { it('should only return the output files, not directories or resource paths', function () {
return expect(this.outputFiles).to.deep.equal([ expect(this.outputFiles).to.have.deep.members([
{ {
path: 'output.pdf', path: 'output.pdf',
type: 'pdf', type: 'pdf',
@@ -65,44 +59,14 @@ describe('OutputFileFinder', function () {
type: 'tex', type: 'tex',
}, },
]) ])
}) expect(this.allEntries).to.deep.equal([
}) 'extra/file.tex',
'extra/',
return describe('_getAllFiles', function () { 'output.pdf',
beforeEach(function () { 'resource/path.tex',
this.proc = new EventEmitter() 'resource/',
this.proc.stdout = new EventEmitter() 'sneaky-file',
this.proc.stdout.setEncoding = sinon.stub().returns(this.proc.stdout) ])
this.spawn.returns(this.proc)
this.directory = '/base/dir'
return this.OutputFileFinder._getAllFiles(this.directory, this.callback)
})
describe('successfully', function () {
beforeEach(function () {
this.proc.stdout.emit(
'data',
['/base/dir/main.tex', '/base/dir/chapters/chapter1.tex'].join('\n') +
'\n'
)
return this.proc.emit('close', 0)
})
return it('should call the callback with the relative file paths', function () {
return this.callback
.calledWith(null, ['main.tex', 'chapters/chapter1.tex'])
.should.equal(true)
})
})
return describe("when the directory doesn't exist", function () {
beforeEach(function () {
return this.proc.emit('close', 1)
})
return it('should call the callback with a blank array', function () {
return this.callback.calledWith(null, []).should.equal(true)
})
}) })
}) })
}) })