Merge pull request #30045 from overleaf/bg-object-persistor-make-list-directory-safer

Improve safety of object persistor

GitOrigin-RevId: bced9814de6613b388ca288a5f72cd42cff6c1d3
This commit is contained in:
Brian Gough
2025-12-05 08:53:54 +00:00
committed by Copybot
parent 6f34fa869d
commit 7a57ef00cb
10 changed files with 550 additions and 40 deletions

View File

@@ -177,4 +177,21 @@ module.exports = class AbstractPersistor {
prefix,
})
}
/**
* List objects in a directory, returning key and size information.
*
* Suitable only for directories where the number of keys is known to be small.
*
* @param {string} location
* @param {string} prefix
* @returns {Promise<Array<{key: string, size: number}>>}
*/
async listDirectoryStats(location, prefix) {
throw new NotImplementedError('method not implemented in persistor', {
method: 'listDirectoryStats',
location,
prefix,
})
}
}

View File

@@ -196,7 +196,46 @@ module.exports = class FSPersistor extends AbstractPersistor {
async listDirectoryKeys(location, name) {
const fsPath = this._getFsPath(location, name)
return await this._listDirectory(fsPath)
const paths = await this._listDirectory(fsPath)
// Filter to only return files, not directories
const files = []
for (const path of paths) {
try {
const stat = await fsPromises.stat(path)
if (stat.isFile()) {
files.push(path)
}
} catch (err) {
// ignore files that may have just been deleted
if (err.code !== 'ENOENT') {
throw err
}
}
}
return files
}
async listDirectoryStats(location, name) {
const fsPath = this._getFsPath(location, name)
const paths = await this._listDirectory(fsPath)
// Filter to only return files, not directories, with their sizes
const stats = []
for (const path of paths) {
try {
const stat = await fsPromises.stat(path)
if (stat.isFile()) {
stats.push({ key: path, size: stat.size })
}
} catch (err) {
// ignore files that may have just been deleted
if (err.code !== 'ENOENT') {
throw err
}
}
}
return stats
}
async checkIfObjectExists(location, name) {

View File

@@ -322,6 +322,17 @@ module.exports = class GcsPersistor extends AbstractPersistor {
return files.map(file => file.name)
}
async listDirectoryStats(bucketName, prefix) {
const files = await this.#listDirectory(
bucketName,
ensurePrefixIsDirectory(prefix)
)
return files.map(file => ({
key: file.name,
size: parseInt(file.metadata.size, 10),
}))
}
async checkIfObjectExists(bucketName, key) {
try {
const [response] = await this.storage

View File

@@ -417,10 +417,20 @@ class CachedPerProjectEncryptedS3Persistor {
*
* @param {string} bucketName
* @param {string} path
* @return {Promise<ListDirectoryResult>}
* @return {Promise<string[]>}
*/
async listDirectory(bucketName, path) {
return await this.#parent.listDirectory(bucketName, path)
async listDirectoryKeys(bucketName, path) {
return await this.#parent.listDirectoryKeys(bucketName, path)
}
/**
*
* @param {string} bucketName
* @param {string} path
* @return {Promise<Array<{key: string, size: number}>>}
*/
async listDirectoryStats(bucketName, path) {
return await this.#parent.listDirectoryStats(bucketName, path)
}
/**

View File

@@ -266,7 +266,7 @@ class S3Persistor extends AbstractPersistor {
* @return {Promise<void>}
*/
async deleteDirectory(bucketName, key, continuationToken) {
const { contents, response } = await this.listDirectory(
const { contents, response } = await this.#listDirectory(
bucketName,
key,
continuationToken
@@ -309,7 +309,7 @@ class S3Persistor extends AbstractPersistor {
* @param {string} [continuationToken]
* @return {Promise<ListDirectoryResult>}
*/
async listDirectory(bucketName, key, continuationToken) {
async #listDirectory(bucketName, key, continuationToken) {
let response
const options = { Bucket: bucketName, Prefix: key }
if (continuationToken) {
@@ -535,6 +535,59 @@ class S3Persistor extends AbstractPersistor {
}
}
/**
* @param {string} bucketName
* @param {string} prefix
* @return {Promise<Array<string>>}
*/
async listDirectoryKeys(bucketName, prefix) {
const keys = []
let continuationToken
do {
const { contents, response } = await this.#listDirectory(
bucketName,
prefix,
continuationToken
)
keys.push(...contents.map(item => item.Key || ''))
continuationToken = response.IsTruncated
? response.NextContinuationToken
: undefined
} while (continuationToken)
return keys
}
/**
* @param {string} bucketName
* @param {string} prefix
* @return {Promise<Array<{key: string, size: number}>>}
*/
async listDirectoryStats(bucketName, prefix) {
const stats = []
let continuationToken
do {
const { contents, response } = await this.#listDirectory(
bucketName,
prefix,
continuationToken
)
stats.push(
...contents.map(item => ({
key: item.Key || '',
size: item.Size ?? -1,
}))
)
continuationToken = response.IsTruncated
? response.NextContinuationToken
: undefined
} while (continuationToken)
return stats
}
/**
* @param {string} bucket
* @return {S3Client}

View File

@@ -400,6 +400,56 @@ describe('FSPersistorTests', function () {
).to.equal(0)
})
})
describe('listDirectoryKeys', function () {
beforeEach(async function () {
for (const file of Object.values(files)) {
await persistor.sendFile(location, file, '/uploads/info.txt')
}
})
it('should list directory keys', async function () {
const keys = await persistor.listDirectoryKeys(location, 'animals')
expect(keys).to.have.lengthOf(2)
expect(keys).to.include(scenario.fsPath(files.wombat))
expect(keys).to.include(scenario.fsPath(files.giraffe))
})
it('should return empty array for non-existing directories', async function () {
const keys = await persistor.listDirectoryKeys(
location,
'does-not-exist'
)
expect(keys).to.deep.equal([])
})
})
describe('listDirectoryStats', function () {
beforeEach(async function () {
for (const file of Object.values(files)) {
await persistor.sendFile(location, file, '/uploads/info.txt')
}
})
it('should list directory stats', async function () {
const stats = await persistor.listDirectoryStats(location, 'animals')
expect(stats).to.have.lengthOf(2)
const keys = stats.map(s => s.key)
expect(keys).to.include(scenario.fsPath(files.wombat))
expect(keys).to.include(scenario.fsPath(files.giraffe))
for (const stat of stats) {
expect(stat.size).to.equal(localFiles['/uploads/info.txt'].length)
}
})
it('should return empty array for non-existing directories', async function () {
const stats = await persistor.listDirectoryStats(
location,
'does-not-exist'
)
expect(stats).to.deep.equal([])
})
})
})
}
})

View File

@@ -723,4 +723,127 @@ describe('GcsPersistorTests', function () {
})
})
})
describe('listDirectoryKeys', function () {
describe('with valid parameters', function () {
let keys
beforeEach(async function () {
const filesWithNames = files.map((file, i) => ({
...file,
name: i === 0 ? 'llama' : 'hippo',
}))
GcsBucket.getFiles.resolves([filesWithNames])
keys = await GcsPersistor.listDirectoryKeys(bucket, key)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({
prefix: `${key}/`,
})
})
it('should return the keys', function () {
expect(keys).to.deep.equal(['llama', 'hippo'])
})
})
describe('when there are no files', function () {
let keys
beforeEach(async function () {
GcsBucket.getFiles.resolves([[]])
keys = await GcsPersistor.listDirectoryKeys(bucket, key)
})
it('should return an empty array', function () {
expect(keys).to.deep.equal([])
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
GcsBucket.getFiles.rejects(genericError)
try {
await GcsPersistor.listDirectoryKeys(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('listDirectoryStats', function () {
describe('with valid parameters', function () {
let stats
beforeEach(async function () {
const filesWithNames = files.map((file, i) => ({
...file,
name: i === 0 ? 'llama' : 'hippo',
}))
GcsBucket.getFiles.resolves([filesWithNames])
stats = await GcsPersistor.listDirectoryStats(bucket, key)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({
prefix: `${key}/`,
})
})
it('should return the stats', function () {
expect(stats).to.deep.equal([
{ key: 'llama', size: 11 },
{ key: 'hippo', size: 22 },
])
})
})
describe('when there are no files', function () {
let stats
beforeEach(async function () {
GcsBucket.getFiles.resolves([[]])
stats = await GcsPersistor.listDirectoryStats(bucket, key)
})
it('should return an empty array', function () {
expect(stats).to.deep.equal([])
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
GcsBucket.getFiles.rejects(genericError)
try {
await GcsPersistor.listDirectoryStats(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
})

View File

@@ -1046,4 +1046,215 @@ describe('S3PersistorTests', function () {
})
})
})
describe('listDirectoryKeys', function () {
describe('with valid parameters', function () {
let keys
beforeEach(async function () {
S3.mockSend(S3.ListObjectsV2Command, {
Contents: files.map(file => ({ Key: file.Key })),
IsTruncated: false,
})
keys = await S3Persistor.listDirectoryKeys(bucket, key)
})
it('should list the objects in the directory', function () {
S3.assertSendCalledWith(S3.ListObjectsV2Command, {
Bucket: bucket,
Prefix: key,
})
})
it('should return the keys', function () {
expect(keys).to.deep.equal(['llama', 'hippo'])
})
})
describe('when there are no files', function () {
let keys
beforeEach(async function () {
S3.mockSend(S3.ListObjectsV2Command, {
Contents: [],
IsTruncated: false,
})
keys = await S3Persistor.listDirectoryKeys(bucket, key)
})
it('should return an empty array', function () {
expect(keys).to.deep.equal([])
})
})
describe('when there are more files available', function () {
const continuationToken = 'wombat'
let keys
beforeEach(async function () {
S3.mockSend(
S3.ListObjectsV2Command,
{
Contents: [files[0]].map(file => ({ Key: file.Key })),
IsTruncated: true,
NextContinuationToken: continuationToken,
},
{
nextResponses: [
{
Contents: [files[1]].map(file => ({ Key: file.Key })),
IsTruncated: false,
},
],
}
)
keys = await S3Persistor.listDirectoryKeys(bucket, key)
})
it('should list the objects in multiple calls', function () {
S3.assertSendCallCount(S3.ListObjectsV2Command, 2)
expect(S3.s3ClientStub.send.firstCall.args[0].payload).to.deep.equal({
Bucket: bucket,
Prefix: key,
})
expect(S3.s3ClientStub.send.secondCall.args[0].payload).to.deep.equal({
Bucket: bucket,
Prefix: key,
ContinuationToken: continuationToken,
})
})
it('should return all keys', function () {
expect(keys).to.deep.equal(['llama', 'hippo'])
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
S3.mockSend(S3.ListObjectsV2Command, genericError, { rejects: true })
try {
await S3Persistor.listDirectoryKeys(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.exist
})
})
})
describe('listDirectoryStats', function () {
describe('with valid parameters', function () {
let stats
beforeEach(async function () {
S3.mockSend(S3.ListObjectsV2Command, {
Contents: files.map(file => ({ Key: file.Key, Size: file.Size })),
IsTruncated: false,
})
stats = await S3Persistor.listDirectoryStats(bucket, key)
})
it('should list the objects in the directory', function () {
S3.assertSendCalledWith(S3.ListObjectsV2Command, {
Bucket: bucket,
Prefix: key,
})
})
it('should return the stats', function () {
expect(stats).to.deep.equal([
{ key: 'llama', size: 11 },
{ key: 'hippo', size: 22 },
])
})
})
describe('when there are no files', function () {
let stats
beforeEach(async function () {
S3.mockSend(S3.ListObjectsV2Command, {
Contents: [],
IsTruncated: false,
})
stats = await S3Persistor.listDirectoryStats(bucket, key)
})
it('should return an empty array', function () {
expect(stats).to.deep.equal([])
})
})
describe('when there are more files available', function () {
const continuationToken = 'wombat'
let stats
beforeEach(async function () {
S3.mockSend(
S3.ListObjectsV2Command,
{
Contents: [files[0]].map(file => ({
Key: file.Key,
Size: file.Size,
})),
IsTruncated: true,
NextContinuationToken: continuationToken,
},
{
nextResponses: [
{
Contents: [files[1]].map(file => ({
Key: file.Key,
Size: file.Size,
})),
IsTruncated: false,
},
],
}
)
stats = await S3Persistor.listDirectoryStats(bucket, key)
})
it('should list the objects in multiple calls', function () {
S3.assertSendCallCount(S3.ListObjectsV2Command, 2)
})
it('should return all stats', function () {
expect(stats).to.deep.equal([
{ key: 'llama', size: 11 },
{ key: 'hippo', size: 22 },
])
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
S3.mockSend(S3.ListObjectsV2Command, genericError, { rejects: true })
try {
await S3Persistor.listDirectoryStats(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.exist
})
})
})
})