mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-06-01 13:21:37 +02:00
Merge pull request #28533 from overleaf/mj-blob-ranges-maxsize
[web+history-v1] Use range request for binary blobs in project snapshot GitOrigin-RevId: e72a2d481bf163b3e85361b42e30e82cf867f1cf
This commit is contained in:
committed by
Copybot
parent
6ed14c56ae
commit
6c79ce97cc
@@ -284,21 +284,22 @@ async function headProjectBlob(req, res) {
|
||||
}
|
||||
|
||||
// Support simple, singular ranges starting from zero only, up-to 2MB = 2_000_000, 7 digits
|
||||
const RANGE_HEADER = /^bytes=0-(\d{1,7})$/
|
||||
const RANGE_HEADER = /^bytes=(\d{1,7})-(\d{1,7})$/
|
||||
|
||||
/**
|
||||
* @param {string} header
|
||||
* @return {{}|{start: number, end: number}}
|
||||
* @return {undefined | {start: number, end: number}}
|
||||
* @private
|
||||
*/
|
||||
function _getRangeOpts(header) {
|
||||
if (!header) return {}
|
||||
if (!header) return undefined
|
||||
const match = header.match(RANGE_HEADER)
|
||||
if (match) {
|
||||
const end = parseInt(match[1], 10)
|
||||
return { start: 0, end }
|
||||
const start = parseInt(match[1], 10)
|
||||
const end = parseInt(match[2], 10)
|
||||
return { start, end }
|
||||
}
|
||||
return {}
|
||||
return undefined
|
||||
}
|
||||
|
||||
async function getProjectBlob(req, res, next) {
|
||||
@@ -311,6 +312,31 @@ async function getProjectBlob(req, res, next) {
|
||||
try {
|
||||
let stream
|
||||
try {
|
||||
if (opts) {
|
||||
// This is a range request, so we need to set the appropriate headers
|
||||
// Browser caching only works if the total size is known, so we have
|
||||
// to fetch the blob metadata first.
|
||||
const metaData = await blobStore.getBlob(hash)
|
||||
if (metaData) {
|
||||
const blobLength = metaData.getByteLength()
|
||||
if (opts.start > opts.end || opts.start >= blobLength) {
|
||||
return res
|
||||
.status(416) // Requested Range Not Satisfiable
|
||||
.set('Content-Range', `bytes */${blobLength}`)
|
||||
.set('Content-Length', '0')
|
||||
.end()
|
||||
}
|
||||
// Valid range request
|
||||
const actualEnd = Math.min(opts.end, blobLength - 1)
|
||||
const returnedSize = actualEnd - opts.start + 1
|
||||
res.set('Content-Length', returnedSize)
|
||||
res.set(
|
||||
'Content-Range',
|
||||
`bytes ${opts.start}-${actualEnd}/${blobLength}`
|
||||
)
|
||||
res.status(206)
|
||||
}
|
||||
}
|
||||
stream = await blobStore.getStream(hash, opts)
|
||||
} catch (err) {
|
||||
if (err instanceof Blob.NotFoundError) {
|
||||
|
||||
@@ -190,6 +190,12 @@ exports.paths = {
|
||||
type: 'file',
|
||||
},
|
||||
},
|
||||
206: {
|
||||
description: 'Success',
|
||||
schema: {
|
||||
type: 'file',
|
||||
},
|
||||
},
|
||||
404: {
|
||||
description: 'Not Found',
|
||||
schema: {
|
||||
|
||||
@@ -116,7 +116,7 @@ describe('Project blobs API', function () {
|
||||
expect(payload).to.equal(fileContents.toString())
|
||||
})
|
||||
|
||||
it('supports range request', async function () {
|
||||
it('supports range request from beginning', async function () {
|
||||
const url = new URL(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
|
||||
@@ -125,7 +125,127 @@ describe('Project blobs API', function () {
|
||||
url.searchParams.append('token', token)
|
||||
const response = await fetch(url, { headers: { Range: 'bytes=0-4' } })
|
||||
const payload = await response.text()
|
||||
expect(payload).to.equal(fileContents.toString().slice(0, 4))
|
||||
// 0-4 is inclusive, so 5 bytes
|
||||
expect(payload).to.equal(fileContents.slice(0, 5).toString())
|
||||
expect(response.headers.get('Content-Range')).to.equal(
|
||||
`bytes 0-4/${testFiles.HELLO_TXT_BYTE_LENGTH}`
|
||||
)
|
||||
expect(response.headers.get('Content-Length')).to.equal('5')
|
||||
expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT)
|
||||
})
|
||||
|
||||
it('supports range request in middle', async function () {
|
||||
const url = new URL(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
|
||||
)
|
||||
)
|
||||
url.searchParams.append('token', token)
|
||||
const response = await fetch(url, { headers: { Range: 'bytes=5-9' } })
|
||||
const payload = await response.text()
|
||||
// 5-9 is inclusive, so 5 bytes
|
||||
expect(payload).to.equal(fileContents.slice(5, 10).toString())
|
||||
expect(response.headers.get('Content-Range')).to.equal(
|
||||
`bytes 5-9/${testFiles.HELLO_TXT_BYTE_LENGTH}`
|
||||
)
|
||||
expect(response.headers.get('Content-Length')).to.equal('5')
|
||||
expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT)
|
||||
})
|
||||
|
||||
it('supports range request past end', async function () {
|
||||
const url = new URL(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
|
||||
)
|
||||
)
|
||||
url.searchParams.append('token', token)
|
||||
const response = await fetch(url, { headers: { Range: 'bytes=5-100' } })
|
||||
const payload = await response.text()
|
||||
expect(payload).to.equal(fileContents.slice(5).toString())
|
||||
// Response range should be capped to the actual file size
|
||||
expect(response.headers.get('Content-Range')).to.equal(
|
||||
`bytes 5-10/${testFiles.HELLO_TXT_BYTE_LENGTH}`
|
||||
)
|
||||
expect(response.headers.get('Content-Length')).to.equal('6')
|
||||
expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT)
|
||||
})
|
||||
|
||||
describe('with invalid request ranges', async function () {
|
||||
async function testInvalidRange(range) {
|
||||
const url = new URL(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
|
||||
)
|
||||
)
|
||||
url.searchParams.append('token', token)
|
||||
const response = await fetch(url, { headers: { Range: range } })
|
||||
|
||||
expect(response.headers.get('Content-Range')).to.equal(
|
||||
`bytes */${testFiles.HELLO_TXT_BYTE_LENGTH}`
|
||||
)
|
||||
expect(response.headers.get('Content-Length')).to.equal('0')
|
||||
expect(response.status).to.equal(
|
||||
HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE
|
||||
)
|
||||
}
|
||||
|
||||
it('should fail on invalid range where start > end', async function () {
|
||||
testInvalidRange('bytes=10-5')
|
||||
})
|
||||
|
||||
it('should fail on invalid range where start = length', async function () {
|
||||
testInvalidRange(`bytes=${testFiles.HELLO_TXT_BYTE_LENGTH + 1}-105`)
|
||||
})
|
||||
|
||||
it('should fail on invalid range where start > length', async function () {
|
||||
testInvalidRange(`bytes=${testFiles.HELLO_TXT_BYTE_LENGTH}-105`)
|
||||
})
|
||||
})
|
||||
|
||||
describe('with an empty blob', async function () {
|
||||
beforeEach(async function () {
|
||||
const response = await fetch(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}`
|
||||
),
|
||||
{
|
||||
method: 'PUT',
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
body: '',
|
||||
}
|
||||
)
|
||||
expect(response.ok).to.be.true
|
||||
})
|
||||
|
||||
it('should not handle range requests', async function () {
|
||||
const url = new URL(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}`
|
||||
)
|
||||
)
|
||||
url.searchParams.append('token', token)
|
||||
const response = await fetch(url, { headers: { Range: 'bytes=0-0' } })
|
||||
expect(response.headers.get('Content-Range')).to.equal(
|
||||
`bytes */${testFiles.EMPTY_FILE_BYTE_LENGTH}`
|
||||
)
|
||||
expect(response.headers.get('Content-Length')).to.equal('0')
|
||||
expect(response.status).to.equal(
|
||||
HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle full requests', async function () {
|
||||
const url = new URL(
|
||||
testServer.url(
|
||||
`/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}`
|
||||
)
|
||||
)
|
||||
url.searchParams.append('token', token)
|
||||
const response = await fetch(url)
|
||||
const payload = await response.text()
|
||||
expect(payload).to.equal('')
|
||||
expect(response.status).to.equal(HTTPStatus.OK)
|
||||
})
|
||||
})
|
||||
|
||||
it('supports HEAD request', async function () {
|
||||
|
||||
@@ -19,6 +19,9 @@ exports.NON_BMP_TXT_BYTE_LENGTH = 57
|
||||
exports.NULL_CHARACTERS_TXT_HASH = '4227ca4e8736af63036e7457e2db376ddf7e5795'
|
||||
exports.NULL_CHARACTERS_TXT_BYTE_LENGTH = 3
|
||||
|
||||
exports.EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
|
||||
exports.EMPTY_FILE_BYTE_LENGTH = 0
|
||||
|
||||
// git hashes of some short strings for testing
|
||||
exports.STRING_A_HASH = '2e65efe2a145dda7ee51d1741299f848e5bf752e'
|
||||
exports.STRING_AB_HASH = '9ae9e86b7bd6cb1472d9373702d8249973da0832'
|
||||
|
||||
@@ -66,9 +66,9 @@ async function requestBlob(method, req, res) {
|
||||
}
|
||||
|
||||
const range = req.get('Range')
|
||||
let stream, contentLength
|
||||
let stream, contentLength, contentRange
|
||||
try {
|
||||
;({ stream, contentLength } =
|
||||
;({ stream, contentLength, contentRange } =
|
||||
await HistoryManager.promises.requestBlobWithProjectId(
|
||||
projectId,
|
||||
hash,
|
||||
@@ -80,7 +80,11 @@ async function requestBlob(method, req, res) {
|
||||
throw err
|
||||
}
|
||||
|
||||
if (contentLength) res.setHeader('Content-Length', contentLength) // set on HEAD
|
||||
if (contentLength) res.setHeader('Content-Length', contentLength)
|
||||
if (contentRange) {
|
||||
res.setHeader('Content-Range', contentRange)
|
||||
res.status(206) // Partial Content
|
||||
}
|
||||
res.setHeader('Content-Type', 'application/octet-stream')
|
||||
setBlobCacheHeaders(res, hash)
|
||||
|
||||
|
||||
@@ -207,6 +207,7 @@ async function requestBlob(historyId, hash, method = 'GET', range = '') {
|
||||
url,
|
||||
stream,
|
||||
contentLength: parseInt(response.headers.get('Content-Length'), 10),
|
||||
contentRange: response.headers.get('Content-Range'),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -143,10 +143,18 @@ export class ProjectSnapshot {
|
||||
): Promise<any> {
|
||||
const file = this.snapshot.getFile(path)
|
||||
const hash = file?.getHash()
|
||||
const byteLength = file?.getByteLength()
|
||||
if (hash == null) {
|
||||
return null
|
||||
}
|
||||
return await this.blobStore.getString(hash, options)
|
||||
if (byteLength == null) {
|
||||
return null
|
||||
}
|
||||
let blobStoreOptions
|
||||
if (options?.maxSize != null && byteLength > options?.maxSize) {
|
||||
blobStoreOptions = { maxSize: options.maxSize }
|
||||
}
|
||||
return await this.blobStore.getString(hash, blobStoreOptions)
|
||||
}
|
||||
|
||||
getDocs(): Map<string, File> {
|
||||
@@ -323,44 +331,20 @@ async function fetchBlob(
|
||||
options?: { maxSize?: number }
|
||||
): Promise<string> {
|
||||
const url = `/project/${projectId}/blob/${hash}`
|
||||
if (options?.maxSize) {
|
||||
return await fetchTextFileWithSizeLimit(url, options.maxSize)
|
||||
let fetchOpts
|
||||
if (options?.maxSize === 0) {
|
||||
return ''
|
||||
}
|
||||
const res = await fetch(url)
|
||||
if (options?.maxSize) {
|
||||
fetchOpts = {
|
||||
headers: {
|
||||
Range: `bytes=0-${options.maxSize - 1}`,
|
||||
},
|
||||
}
|
||||
}
|
||||
const res = await fetch(url, fetchOpts)
|
||||
if (!res.ok) {
|
||||
throw new FetchError('Failed to fetch blob', url, undefined, res)
|
||||
}
|
||||
return await res.text()
|
||||
}
|
||||
|
||||
async function fetchTextFileWithSizeLimit(url: string, maxSize: number) {
|
||||
let result = ''
|
||||
try {
|
||||
const abortController = new AbortController()
|
||||
const response = await fetch(url, {
|
||||
signal: abortController.signal,
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to fetch blob')
|
||||
}
|
||||
if (!response.body) {
|
||||
throw new Error('Response body is empty')
|
||||
}
|
||||
|
||||
const reader = response.body.pipeThrough(new TextDecoderStream())
|
||||
for await (const chunk of reader) {
|
||||
result += chunk
|
||||
if (result.length > maxSize) {
|
||||
abortController.abort()
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error?.name === 'AbortError') {
|
||||
// This is fine, we just return the result we have so far
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
return result.slice(0, maxSize)
|
||||
}
|
||||
|
||||
@@ -44,6 +44,10 @@ describe('ProjectSnapshot', function () {
|
||||
), // 6.5MB
|
||||
hash: 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
|
||||
},
|
||||
'empty.png': {
|
||||
contents: '',
|
||||
hash: 'ffffffffffffffffffffffffffffffffffffffff',
|
||||
},
|
||||
}
|
||||
|
||||
const chunk = {
|
||||
@@ -82,6 +86,13 @@ describe('ProjectSnapshot', function () {
|
||||
byteLength: files['bibliography.bib'].contents.length,
|
||||
},
|
||||
},
|
||||
{
|
||||
pathname: 'empty.png',
|
||||
file: {
|
||||
hash: files['empty.png'].hash,
|
||||
byteLength: files['empty.png'].contents.length,
|
||||
},
|
||||
},
|
||||
],
|
||||
timestamp: '2025-01-01T12:00:00.000Z',
|
||||
},
|
||||
@@ -145,10 +156,24 @@ describe('ProjectSnapshot', function () {
|
||||
})
|
||||
}
|
||||
|
||||
// fetch-mock doesn't seem to expose the header to the response function,
|
||||
// so we just use a constant here
|
||||
const MOCKED_MAX_SIZE = 100
|
||||
|
||||
function mockBlobs(paths = Object.keys(files) as (keyof typeof files)[]) {
|
||||
for (const path of paths) {
|
||||
const file = files[path]
|
||||
fetchMock.get(`/project/${projectId}/blob/${file.hash}`, file.contents)
|
||||
fetchMock
|
||||
.get({
|
||||
url: `/project/${projectId}/blob/${file.hash}`,
|
||||
missingHeaders: ['Range'],
|
||||
response: file.contents,
|
||||
})
|
||||
.get({
|
||||
url: `/project/${projectId}/blob/${file.hash}`,
|
||||
headers: { Range: `bytes=0-${MOCKED_MAX_SIZE - 1}` },
|
||||
response: file.contents.slice(0, MOCKED_MAX_SIZE),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,13 +268,18 @@ describe('ProjectSnapshot', function () {
|
||||
hash: files['bibliography.bib'].hash,
|
||||
size: files['bibliography.bib'].contents.length,
|
||||
},
|
||||
{
|
||||
path: 'empty.png',
|
||||
hash: 'ffffffffffffffffffffffffffffffffffffffff',
|
||||
size: 0,
|
||||
},
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('getBinaryFileContents', function () {
|
||||
beforeEach(function () {
|
||||
mockBlobs(['bibliography.bib'])
|
||||
mockBlobs(['bibliography.bib', 'empty.png'])
|
||||
})
|
||||
|
||||
it('can fetch whole file', async function () {
|
||||
@@ -257,14 +287,20 @@ describe('ProjectSnapshot', function () {
|
||||
expect(blob).to.equal(files['bibliography.bib'].contents)
|
||||
})
|
||||
|
||||
// NOTE: fetch-mock does not support the .response.body.pipeThrough API,
|
||||
// so this test is skipped for now.
|
||||
// eslint-disable-next-line mocha/no-skipped-tests
|
||||
it.skip('can fetch part of file', async function () {
|
||||
it('can fetch part of file', async function () {
|
||||
const blob = await snapshot.getBinaryFileContents('bibliography.bib', {
|
||||
maxSize: 100,
|
||||
maxSize: MOCKED_MAX_SIZE,
|
||||
})
|
||||
expect(blob).to.equal(files['bibliography.bib'].contents.slice(0, 100))
|
||||
expect(blob).to.equal(
|
||||
files['bibliography.bib'].contents.slice(0, MOCKED_MAX_SIZE)
|
||||
)
|
||||
})
|
||||
|
||||
it('can fetch empty file with maxSize', async function () {
|
||||
const blob = await snapshot.getBinaryFileContents('empty.png', {
|
||||
maxSize: 200,
|
||||
})
|
||||
expect(blob).to.equal(files['empty.png'].contents)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -32,6 +32,7 @@ describe('HistoryController', function () {
|
||||
ctx.HistoryManager = {
|
||||
promises: {
|
||||
injectUserDetails: sinon.stub(),
|
||||
requestBlobWithProjectId: sinon.stub(),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -297,4 +298,81 @@ describe('HistoryController', function () {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('requestBlob', function () {
|
||||
describe('With Range header', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.req = {
|
||||
params: { project_id: ctx.project_id, hash: 'foo' },
|
||||
body: {},
|
||||
get: sinon.stub(),
|
||||
}
|
||||
ctx.req.get.withArgs('Range').returns('bytes=0-42')
|
||||
ctx.res = { setHeader: sinon.stub(), status: sinon.stub() }
|
||||
ctx.HistoryManager.promises.requestBlobWithProjectId.resolves({
|
||||
stream: null,
|
||||
contentLength: '43',
|
||||
contentRange: 'bytes 0-42/100',
|
||||
})
|
||||
await ctx.HistoryController.getBlob(ctx.req, ctx.res, ctx.next)
|
||||
})
|
||||
|
||||
it('should forward the range request', function (ctx) {
|
||||
ctx.HistoryManager.promises.requestBlobWithProjectId.should.have.been.calledWith(
|
||||
ctx.project_id,
|
||||
'foo',
|
||||
'GET',
|
||||
'bytes=0-42'
|
||||
)
|
||||
})
|
||||
|
||||
it('should forward the Content-Range header', function (ctx) {
|
||||
ctx.res.setHeader.should.have.been.calledWith(
|
||||
'Content-Range',
|
||||
'bytes 0-42/100'
|
||||
)
|
||||
})
|
||||
|
||||
it('should forward the Content-Length header', function (ctx) {
|
||||
ctx.res.setHeader.should.have.been.calledWith('Content-Length', '43')
|
||||
})
|
||||
|
||||
it('should have status 206', function (ctx) {
|
||||
ctx.res.status.should.have.been.calledWith(206)
|
||||
})
|
||||
})
|
||||
|
||||
describe('Without Range header', function () {
|
||||
beforeEach(async function (ctx) {
|
||||
ctx.req = {
|
||||
params: { project_id: ctx.project_id, hash: 'foo' },
|
||||
body: {},
|
||||
get: sinon.stub(),
|
||||
}
|
||||
ctx.req.get.withArgs('Range').returns(null)
|
||||
ctx.res = { setHeader: sinon.stub(), status: sinon.stub() }
|
||||
ctx.HistoryManager.promises.requestBlobWithProjectId.resolves({
|
||||
stream: null,
|
||||
contentLength: '100',
|
||||
range: null,
|
||||
})
|
||||
await ctx.HistoryController.getBlob(ctx.req, ctx.res, ctx.next)
|
||||
})
|
||||
|
||||
it('should not have a Content-Range header', function (ctx) {
|
||||
expect(ctx.res.setHeader).to.not.have.been.calledWith(
|
||||
'Content-Range',
|
||||
sinon.match.string
|
||||
)
|
||||
})
|
||||
|
||||
it('should forward the Content-Length header', function (ctx) {
|
||||
ctx.res.setHeader.should.have.been.calledWith('Content-Length', '100')
|
||||
})
|
||||
|
||||
it('should not have status 206', function (ctx) {
|
||||
ctx.res.status.should.not.have.been.calledWith(206)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user