diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js index d5a6fe3afc..2ebe5d1771 100644 --- a/services/history-v1/api/controllers/projects.js +++ b/services/history-v1/api/controllers/projects.js @@ -284,21 +284,22 @@ async function headProjectBlob(req, res) { } // Support simple, singular ranges starting from zero only, up-to 2MB = 2_000_000, 7 digits -const RANGE_HEADER = /^bytes=0-(\d{1,7})$/ +const RANGE_HEADER = /^bytes=(\d{1,7})-(\d{1,7})$/ /** * @param {string} header - * @return {{}|{start: number, end: number}} + * @return {undefined | {start: number, end: number}} * @private */ function _getRangeOpts(header) { - if (!header) return {} + if (!header) return undefined const match = header.match(RANGE_HEADER) if (match) { - const end = parseInt(match[1], 10) - return { start: 0, end } + const start = parseInt(match[1], 10) + const end = parseInt(match[2], 10) + return { start, end } } - return {} + return undefined } async function getProjectBlob(req, res, next) { @@ -311,6 +312,31 @@ async function getProjectBlob(req, res, next) { try { let stream try { + if (opts) { + // This is a range request, so we need to set the appropriate headers + // Browser caching only works if the total size is known, so we have + // to fetch the blob metadata first. + const metaData = await blobStore.getBlob(hash) + if (metaData) { + const blobLength = metaData.getByteLength() + if (opts.start > opts.end || opts.start >= blobLength) { + return res + .status(416) // Requested Range Not Satisfiable + .set('Content-Range', `bytes */${blobLength}`) + .set('Content-Length', '0') + .end() + } + // Valid range request + const actualEnd = Math.min(opts.end, blobLength - 1) + const returnedSize = actualEnd - opts.start + 1 + res.set('Content-Length', returnedSize) + res.set( + 'Content-Range', + `bytes ${opts.start}-${actualEnd}/${blobLength}` + ) + res.status(206) + } + } stream = await blobStore.getStream(hash, opts) } catch (err) { if (err instanceof Blob.NotFoundError) { diff --git a/services/history-v1/api/swagger/projects.js b/services/history-v1/api/swagger/projects.js index fd68d570e9..b2aa875df0 100644 --- a/services/history-v1/api/swagger/projects.js +++ b/services/history-v1/api/swagger/projects.js @@ -190,6 +190,12 @@ exports.paths = { type: 'file', }, }, + 206: { + description: 'Success', + schema: { + type: 'file', + }, + }, 404: { description: 'Not Found', schema: { diff --git a/services/history-v1/test/acceptance/js/api/project_blobs.test.js b/services/history-v1/test/acceptance/js/api/project_blobs.test.js index f2677e4d5b..b325d11428 100644 --- a/services/history-v1/test/acceptance/js/api/project_blobs.test.js +++ b/services/history-v1/test/acceptance/js/api/project_blobs.test.js @@ -116,7 +116,7 @@ describe('Project blobs API', function () { expect(payload).to.equal(fileContents.toString()) }) - it('supports range request', async function () { + it('supports range request from beginning', async function () { const url = new URL( testServer.url( `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` @@ -125,7 +125,127 @@ describe('Project blobs API', function () { url.searchParams.append('token', token) const response = await fetch(url, { headers: { Range: 'bytes=0-4' } }) const payload = await response.text() - expect(payload).to.equal(fileContents.toString().slice(0, 4)) + // 0-4 is inclusive, so 5 bytes + expect(payload).to.equal(fileContents.slice(0, 5).toString()) + expect(response.headers.get('Content-Range')).to.equal( + `bytes 0-4/${testFiles.HELLO_TXT_BYTE_LENGTH}` + ) + expect(response.headers.get('Content-Length')).to.equal('5') + expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT) + }) + + it('supports range request in middle', async function () { + const url = new URL( + testServer.url( + `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` + ) + ) + url.searchParams.append('token', token) + const response = await fetch(url, { headers: { Range: 'bytes=5-9' } }) + const payload = await response.text() + // 5-9 is inclusive, so 5 bytes + expect(payload).to.equal(fileContents.slice(5, 10).toString()) + expect(response.headers.get('Content-Range')).to.equal( + `bytes 5-9/${testFiles.HELLO_TXT_BYTE_LENGTH}` + ) + expect(response.headers.get('Content-Length')).to.equal('5') + expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT) + }) + + it('supports range request past end', async function () { + const url = new URL( + testServer.url( + `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` + ) + ) + url.searchParams.append('token', token) + const response = await fetch(url, { headers: { Range: 'bytes=5-100' } }) + const payload = await response.text() + expect(payload).to.equal(fileContents.slice(5).toString()) + // Response range should be capped to the actual file size + expect(response.headers.get('Content-Range')).to.equal( + `bytes 5-10/${testFiles.HELLO_TXT_BYTE_LENGTH}` + ) + expect(response.headers.get('Content-Length')).to.equal('6') + expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT) + }) + + describe('with invalid request ranges', async function () { + async function testInvalidRange(range) { + const url = new URL( + testServer.url( + `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` + ) + ) + url.searchParams.append('token', token) + const response = await fetch(url, { headers: { Range: range } }) + + expect(response.headers.get('Content-Range')).to.equal( + `bytes */${testFiles.HELLO_TXT_BYTE_LENGTH}` + ) + expect(response.headers.get('Content-Length')).to.equal('0') + expect(response.status).to.equal( + HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE + ) + } + + it('should fail on invalid range where start > end', async function () { + testInvalidRange('bytes=10-5') + }) + + it('should fail on invalid range where start = length', async function () { + testInvalidRange(`bytes=${testFiles.HELLO_TXT_BYTE_LENGTH + 1}-105`) + }) + + it('should fail on invalid range where start > length', async function () { + testInvalidRange(`bytes=${testFiles.HELLO_TXT_BYTE_LENGTH}-105`) + }) + }) + + describe('with an empty blob', async function () { + beforeEach(async function () { + const response = await fetch( + testServer.url( + `/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}` + ), + { + method: 'PUT', + headers: { Authorization: `Bearer ${token}` }, + body: '', + } + ) + expect(response.ok).to.be.true + }) + + it('should not handle range requests', async function () { + const url = new URL( + testServer.url( + `/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}` + ) + ) + url.searchParams.append('token', token) + const response = await fetch(url, { headers: { Range: 'bytes=0-0' } }) + expect(response.headers.get('Content-Range')).to.equal( + `bytes */${testFiles.EMPTY_FILE_BYTE_LENGTH}` + ) + expect(response.headers.get('Content-Length')).to.equal('0') + expect(response.status).to.equal( + HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE + ) + }) + + it('should handle full requests', async function () { + const url = new URL( + testServer.url( + `/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}` + ) + ) + url.searchParams.append('token', token) + const response = await fetch(url) + const payload = await response.text() + expect(payload).to.equal('') + expect(response.status).to.equal(HTTPStatus.OK) + }) }) it('supports HEAD request', async function () { diff --git a/services/history-v1/test/acceptance/js/storage/support/test_files.js b/services/history-v1/test/acceptance/js/storage/support/test_files.js index c28443c4c8..5632aea753 100644 --- a/services/history-v1/test/acceptance/js/storage/support/test_files.js +++ b/services/history-v1/test/acceptance/js/storage/support/test_files.js @@ -19,6 +19,9 @@ exports.NON_BMP_TXT_BYTE_LENGTH = 57 exports.NULL_CHARACTERS_TXT_HASH = '4227ca4e8736af63036e7457e2db376ddf7e5795' exports.NULL_CHARACTERS_TXT_BYTE_LENGTH = 3 +exports.EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' +exports.EMPTY_FILE_BYTE_LENGTH = 0 + // git hashes of some short strings for testing exports.STRING_A_HASH = '2e65efe2a145dda7ee51d1741299f848e5bf752e' exports.STRING_AB_HASH = '9ae9e86b7bd6cb1472d9373702d8249973da0832' diff --git a/services/web/app/src/Features/History/HistoryController.mjs b/services/web/app/src/Features/History/HistoryController.mjs index a393ab5702..047e860eb2 100644 --- a/services/web/app/src/Features/History/HistoryController.mjs +++ b/services/web/app/src/Features/History/HistoryController.mjs @@ -66,9 +66,9 @@ async function requestBlob(method, req, res) { } const range = req.get('Range') - let stream, contentLength + let stream, contentLength, contentRange try { - ;({ stream, contentLength } = + ;({ stream, contentLength, contentRange } = await HistoryManager.promises.requestBlobWithProjectId( projectId, hash, @@ -80,7 +80,11 @@ async function requestBlob(method, req, res) { throw err } - if (contentLength) res.setHeader('Content-Length', contentLength) // set on HEAD + if (contentLength) res.setHeader('Content-Length', contentLength) + if (contentRange) { + res.setHeader('Content-Range', contentRange) + res.status(206) // Partial Content + } res.setHeader('Content-Type', 'application/octet-stream') setBlobCacheHeaders(res, hash) diff --git a/services/web/app/src/Features/History/HistoryManager.js b/services/web/app/src/Features/History/HistoryManager.js index 30be65db38..e8f852780e 100644 --- a/services/web/app/src/Features/History/HistoryManager.js +++ b/services/web/app/src/Features/History/HistoryManager.js @@ -207,6 +207,7 @@ async function requestBlob(historyId, hash, method = 'GET', range = '') { url, stream, contentLength: parseInt(response.headers.get('Content-Length'), 10), + contentRange: response.headers.get('Content-Range'), } } diff --git a/services/web/frontend/js/infrastructure/project-snapshot.ts b/services/web/frontend/js/infrastructure/project-snapshot.ts index e20167e902..6cd0b20982 100644 --- a/services/web/frontend/js/infrastructure/project-snapshot.ts +++ b/services/web/frontend/js/infrastructure/project-snapshot.ts @@ -143,10 +143,18 @@ export class ProjectSnapshot { ): Promise { const file = this.snapshot.getFile(path) const hash = file?.getHash() + const byteLength = file?.getByteLength() if (hash == null) { return null } - return await this.blobStore.getString(hash, options) + if (byteLength == null) { + return null + } + let blobStoreOptions + if (options?.maxSize != null && byteLength > options?.maxSize) { + blobStoreOptions = { maxSize: options.maxSize } + } + return await this.blobStore.getString(hash, blobStoreOptions) } getDocs(): Map { @@ -323,44 +331,20 @@ async function fetchBlob( options?: { maxSize?: number } ): Promise { const url = `/project/${projectId}/blob/${hash}` - if (options?.maxSize) { - return await fetchTextFileWithSizeLimit(url, options.maxSize) + let fetchOpts + if (options?.maxSize === 0) { + return '' } - const res = await fetch(url) + if (options?.maxSize) { + fetchOpts = { + headers: { + Range: `bytes=0-${options.maxSize - 1}`, + }, + } + } + const res = await fetch(url, fetchOpts) if (!res.ok) { throw new FetchError('Failed to fetch blob', url, undefined, res) } return await res.text() } - -async function fetchTextFileWithSizeLimit(url: string, maxSize: number) { - let result = '' - try { - const abortController = new AbortController() - const response = await fetch(url, { - signal: abortController.signal, - }) - - if (!response.ok) { - throw new Error('Failed to fetch blob') - } - if (!response.body) { - throw new Error('Response body is empty') - } - - const reader = response.body.pipeThrough(new TextDecoderStream()) - for await (const chunk of reader) { - result += chunk - if (result.length > maxSize) { - abortController.abort() - } - } - } catch (error: any) { - if (error?.name === 'AbortError') { - // This is fine, we just return the result we have so far - } else { - throw error - } - } - return result.slice(0, maxSize) -} diff --git a/services/web/test/frontend/infrastructure/project-snapshot.test.ts b/services/web/test/frontend/infrastructure/project-snapshot.test.ts index d231e1c1c5..aba3c833a6 100644 --- a/services/web/test/frontend/infrastructure/project-snapshot.test.ts +++ b/services/web/test/frontend/infrastructure/project-snapshot.test.ts @@ -44,6 +44,10 @@ describe('ProjectSnapshot', function () { ), // 6.5MB hash: 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', }, + 'empty.png': { + contents: '', + hash: 'ffffffffffffffffffffffffffffffffffffffff', + }, } const chunk = { @@ -82,6 +86,13 @@ describe('ProjectSnapshot', function () { byteLength: files['bibliography.bib'].contents.length, }, }, + { + pathname: 'empty.png', + file: { + hash: files['empty.png'].hash, + byteLength: files['empty.png'].contents.length, + }, + }, ], timestamp: '2025-01-01T12:00:00.000Z', }, @@ -145,10 +156,24 @@ describe('ProjectSnapshot', function () { }) } + // fetch-mock doesn't seem to expose the header to the response function, + // so we just use a constant here + const MOCKED_MAX_SIZE = 100 + function mockBlobs(paths = Object.keys(files) as (keyof typeof files)[]) { for (const path of paths) { const file = files[path] - fetchMock.get(`/project/${projectId}/blob/${file.hash}`, file.contents) + fetchMock + .get({ + url: `/project/${projectId}/blob/${file.hash}`, + missingHeaders: ['Range'], + response: file.contents, + }) + .get({ + url: `/project/${projectId}/blob/${file.hash}`, + headers: { Range: `bytes=0-${MOCKED_MAX_SIZE - 1}` }, + response: file.contents.slice(0, MOCKED_MAX_SIZE), + }) } } @@ -243,13 +268,18 @@ describe('ProjectSnapshot', function () { hash: files['bibliography.bib'].hash, size: files['bibliography.bib'].contents.length, }, + { + path: 'empty.png', + hash: 'ffffffffffffffffffffffffffffffffffffffff', + size: 0, + }, ]) }) }) describe('getBinaryFileContents', function () { beforeEach(function () { - mockBlobs(['bibliography.bib']) + mockBlobs(['bibliography.bib', 'empty.png']) }) it('can fetch whole file', async function () { @@ -257,14 +287,20 @@ describe('ProjectSnapshot', function () { expect(blob).to.equal(files['bibliography.bib'].contents) }) - // NOTE: fetch-mock does not support the .response.body.pipeThrough API, - // so this test is skipped for now. - // eslint-disable-next-line mocha/no-skipped-tests - it.skip('can fetch part of file', async function () { + it('can fetch part of file', async function () { const blob = await snapshot.getBinaryFileContents('bibliography.bib', { - maxSize: 100, + maxSize: MOCKED_MAX_SIZE, }) - expect(blob).to.equal(files['bibliography.bib'].contents.slice(0, 100)) + expect(blob).to.equal( + files['bibliography.bib'].contents.slice(0, MOCKED_MAX_SIZE) + ) + }) + + it('can fetch empty file with maxSize', async function () { + const blob = await snapshot.getBinaryFileContents('empty.png', { + maxSize: 200, + }) + expect(blob).to.equal(files['empty.png'].contents) }) }) }) diff --git a/services/web/test/unit/src/History/HistoryController.test.mjs b/services/web/test/unit/src/History/HistoryController.test.mjs index 38356108cf..a3e3e0a987 100644 --- a/services/web/test/unit/src/History/HistoryController.test.mjs +++ b/services/web/test/unit/src/History/HistoryController.test.mjs @@ -32,6 +32,7 @@ describe('HistoryController', function () { ctx.HistoryManager = { promises: { injectUserDetails: sinon.stub(), + requestBlobWithProjectId: sinon.stub(), }, } @@ -297,4 +298,81 @@ describe('HistoryController', function () { }) }) }) + + describe('requestBlob', function () { + describe('With Range header', function () { + beforeEach(async function (ctx) { + ctx.req = { + params: { project_id: ctx.project_id, hash: 'foo' }, + body: {}, + get: sinon.stub(), + } + ctx.req.get.withArgs('Range').returns('bytes=0-42') + ctx.res = { setHeader: sinon.stub(), status: sinon.stub() } + ctx.HistoryManager.promises.requestBlobWithProjectId.resolves({ + stream: null, + contentLength: '43', + contentRange: 'bytes 0-42/100', + }) + await ctx.HistoryController.getBlob(ctx.req, ctx.res, ctx.next) + }) + + it('should forward the range request', function (ctx) { + ctx.HistoryManager.promises.requestBlobWithProjectId.should.have.been.calledWith( + ctx.project_id, + 'foo', + 'GET', + 'bytes=0-42' + ) + }) + + it('should forward the Content-Range header', function (ctx) { + ctx.res.setHeader.should.have.been.calledWith( + 'Content-Range', + 'bytes 0-42/100' + ) + }) + + it('should forward the Content-Length header', function (ctx) { + ctx.res.setHeader.should.have.been.calledWith('Content-Length', '43') + }) + + it('should have status 206', function (ctx) { + ctx.res.status.should.have.been.calledWith(206) + }) + }) + + describe('Without Range header', function () { + beforeEach(async function (ctx) { + ctx.req = { + params: { project_id: ctx.project_id, hash: 'foo' }, + body: {}, + get: sinon.stub(), + } + ctx.req.get.withArgs('Range').returns(null) + ctx.res = { setHeader: sinon.stub(), status: sinon.stub() } + ctx.HistoryManager.promises.requestBlobWithProjectId.resolves({ + stream: null, + contentLength: '100', + range: null, + }) + await ctx.HistoryController.getBlob(ctx.req, ctx.res, ctx.next) + }) + + it('should not have a Content-Range header', function (ctx) { + expect(ctx.res.setHeader).to.not.have.been.calledWith( + 'Content-Range', + sinon.match.string + ) + }) + + it('should forward the Content-Length header', function (ctx) { + ctx.res.setHeader.should.have.been.calledWith('Content-Length', '100') + }) + + it('should not have status 206', function (ctx) { + ctx.res.status.should.not.have.been.calledWith(206) + }) + }) + }) })