diff --git a/services/history-v1/api/controllers/projects.js b/services/history-v1/api/controllers/projects.js index 2ebe5d1771..d5a6fe3afc 100644 --- a/services/history-v1/api/controllers/projects.js +++ b/services/history-v1/api/controllers/projects.js @@ -284,22 +284,21 @@ async function headProjectBlob(req, res) { } // Support simple, singular ranges starting from zero only, up-to 2MB = 2_000_000, 7 digits -const RANGE_HEADER = /^bytes=(\d{1,7})-(\d{1,7})$/ +const RANGE_HEADER = /^bytes=0-(\d{1,7})$/ /** * @param {string} header - * @return {undefined | {start: number, end: number}} + * @return {{}|{start: number, end: number}} * @private */ function _getRangeOpts(header) { - if (!header) return undefined + if (!header) return {} const match = header.match(RANGE_HEADER) if (match) { - const start = parseInt(match[1], 10) - const end = parseInt(match[2], 10) - return { start, end } + const end = parseInt(match[1], 10) + return { start: 0, end } } - return undefined + return {} } async function getProjectBlob(req, res, next) { @@ -312,31 +311,6 @@ async function getProjectBlob(req, res, next) { try { let stream try { - if (opts) { - // This is a range request, so we need to set the appropriate headers - // Browser caching only works if the total size is known, so we have - // to fetch the blob metadata first. - const metaData = await blobStore.getBlob(hash) - if (metaData) { - const blobLength = metaData.getByteLength() - if (opts.start > opts.end || opts.start >= blobLength) { - return res - .status(416) // Requested Range Not Satisfiable - .set('Content-Range', `bytes */${blobLength}`) - .set('Content-Length', '0') - .end() - } - // Valid range request - const actualEnd = Math.min(opts.end, blobLength - 1) - const returnedSize = actualEnd - opts.start + 1 - res.set('Content-Length', returnedSize) - res.set( - 'Content-Range', - `bytes ${opts.start}-${actualEnd}/${blobLength}` - ) - res.status(206) - } - } stream = await blobStore.getStream(hash, opts) } catch (err) { if (err instanceof Blob.NotFoundError) { diff --git a/services/history-v1/api/swagger/projects.js b/services/history-v1/api/swagger/projects.js index b2aa875df0..fd68d570e9 100644 --- a/services/history-v1/api/swagger/projects.js +++ b/services/history-v1/api/swagger/projects.js @@ -190,12 +190,6 @@ exports.paths = { type: 'file', }, }, - 206: { - description: 'Success', - schema: { - type: 'file', - }, - }, 404: { description: 'Not Found', schema: { diff --git a/services/history-v1/test/acceptance/js/api/project_blobs.test.js b/services/history-v1/test/acceptance/js/api/project_blobs.test.js index b325d11428..f2677e4d5b 100644 --- a/services/history-v1/test/acceptance/js/api/project_blobs.test.js +++ b/services/history-v1/test/acceptance/js/api/project_blobs.test.js @@ -116,7 +116,7 @@ describe('Project blobs API', function () { expect(payload).to.equal(fileContents.toString()) }) - it('supports range request from beginning', async function () { + it('supports range request', async function () { const url = new URL( testServer.url( `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` @@ -125,127 +125,7 @@ describe('Project blobs API', function () { url.searchParams.append('token', token) const response = await fetch(url, { headers: { Range: 'bytes=0-4' } }) const payload = await response.text() - // 0-4 is inclusive, so 5 bytes - expect(payload).to.equal(fileContents.slice(0, 5).toString()) - expect(response.headers.get('Content-Range')).to.equal( - `bytes 0-4/${testFiles.HELLO_TXT_BYTE_LENGTH}` - ) - expect(response.headers.get('Content-Length')).to.equal('5') - expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT) - }) - - it('supports range request in middle', async function () { - const url = new URL( - testServer.url( - `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` - ) - ) - url.searchParams.append('token', token) - const response = await fetch(url, { headers: { Range: 'bytes=5-9' } }) - const payload = await response.text() - // 5-9 is inclusive, so 5 bytes - expect(payload).to.equal(fileContents.slice(5, 10).toString()) - expect(response.headers.get('Content-Range')).to.equal( - `bytes 5-9/${testFiles.HELLO_TXT_BYTE_LENGTH}` - ) - expect(response.headers.get('Content-Length')).to.equal('5') - expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT) - }) - - it('supports range request past end', async function () { - const url = new URL( - testServer.url( - `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` - ) - ) - url.searchParams.append('token', token) - const response = await fetch(url, { headers: { Range: 'bytes=5-100' } }) - const payload = await response.text() - expect(payload).to.equal(fileContents.slice(5).toString()) - // Response range should be capped to the actual file size - expect(response.headers.get('Content-Range')).to.equal( - `bytes 5-10/${testFiles.HELLO_TXT_BYTE_LENGTH}` - ) - expect(response.headers.get('Content-Length')).to.equal('6') - expect(response.status).to.equal(HTTPStatus.PARTIAL_CONTENT) - }) - - describe('with invalid request ranges', async function () { - async function testInvalidRange(range) { - const url = new URL( - testServer.url( - `/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}` - ) - ) - url.searchParams.append('token', token) - const response = await fetch(url, { headers: { Range: range } }) - - expect(response.headers.get('Content-Range')).to.equal( - `bytes */${testFiles.HELLO_TXT_BYTE_LENGTH}` - ) - expect(response.headers.get('Content-Length')).to.equal('0') - expect(response.status).to.equal( - HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE - ) - } - - it('should fail on invalid range where start > end', async function () { - testInvalidRange('bytes=10-5') - }) - - it('should fail on invalid range where start = length', async function () { - testInvalidRange(`bytes=${testFiles.HELLO_TXT_BYTE_LENGTH + 1}-105`) - }) - - it('should fail on invalid range where start > length', async function () { - testInvalidRange(`bytes=${testFiles.HELLO_TXT_BYTE_LENGTH}-105`) - }) - }) - - describe('with an empty blob', async function () { - beforeEach(async function () { - const response = await fetch( - testServer.url( - `/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}` - ), - { - method: 'PUT', - headers: { Authorization: `Bearer ${token}` }, - body: '', - } - ) - expect(response.ok).to.be.true - }) - - it('should not handle range requests', async function () { - const url = new URL( - testServer.url( - `/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}` - ) - ) - url.searchParams.append('token', token) - const response = await fetch(url, { headers: { Range: 'bytes=0-0' } }) - expect(response.headers.get('Content-Range')).to.equal( - `bytes */${testFiles.EMPTY_FILE_BYTE_LENGTH}` - ) - expect(response.headers.get('Content-Length')).to.equal('0') - expect(response.status).to.equal( - HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE - ) - }) - - it('should handle full requests', async function () { - const url = new URL( - testServer.url( - `/api/projects/${projectId}/blobs/${testFiles.EMPTY_FILE_HASH}` - ) - ) - url.searchParams.append('token', token) - const response = await fetch(url) - const payload = await response.text() - expect(payload).to.equal('') - expect(response.status).to.equal(HTTPStatus.OK) - }) + expect(payload).to.equal(fileContents.toString().slice(0, 4)) }) it('supports HEAD request', async function () { diff --git a/services/history-v1/test/acceptance/js/storage/support/test_files.js b/services/history-v1/test/acceptance/js/storage/support/test_files.js index 5632aea753..c28443c4c8 100644 --- a/services/history-v1/test/acceptance/js/storage/support/test_files.js +++ b/services/history-v1/test/acceptance/js/storage/support/test_files.js @@ -19,9 +19,6 @@ exports.NON_BMP_TXT_BYTE_LENGTH = 57 exports.NULL_CHARACTERS_TXT_HASH = '4227ca4e8736af63036e7457e2db376ddf7e5795' exports.NULL_CHARACTERS_TXT_BYTE_LENGTH = 3 -exports.EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' -exports.EMPTY_FILE_BYTE_LENGTH = 0 - // git hashes of some short strings for testing exports.STRING_A_HASH = '2e65efe2a145dda7ee51d1741299f848e5bf752e' exports.STRING_AB_HASH = '9ae9e86b7bd6cb1472d9373702d8249973da0832' diff --git a/services/web/app/src/Features/History/HistoryController.mjs b/services/web/app/src/Features/History/HistoryController.mjs index 047e860eb2..a393ab5702 100644 --- a/services/web/app/src/Features/History/HistoryController.mjs +++ b/services/web/app/src/Features/History/HistoryController.mjs @@ -66,9 +66,9 @@ async function requestBlob(method, req, res) { } const range = req.get('Range') - let stream, contentLength, contentRange + let stream, contentLength try { - ;({ stream, contentLength, contentRange } = + ;({ stream, contentLength } = await HistoryManager.promises.requestBlobWithProjectId( projectId, hash, @@ -80,11 +80,7 @@ async function requestBlob(method, req, res) { throw err } - if (contentLength) res.setHeader('Content-Length', contentLength) - if (contentRange) { - res.setHeader('Content-Range', contentRange) - res.status(206) // Partial Content - } + if (contentLength) res.setHeader('Content-Length', contentLength) // set on HEAD res.setHeader('Content-Type', 'application/octet-stream') setBlobCacheHeaders(res, hash) diff --git a/services/web/app/src/Features/History/HistoryManager.js b/services/web/app/src/Features/History/HistoryManager.js index e8f852780e..30be65db38 100644 --- a/services/web/app/src/Features/History/HistoryManager.js +++ b/services/web/app/src/Features/History/HistoryManager.js @@ -207,7 +207,6 @@ async function requestBlob(historyId, hash, method = 'GET', range = '') { url, stream, contentLength: parseInt(response.headers.get('Content-Length'), 10), - contentRange: response.headers.get('Content-Range'), } } diff --git a/services/web/frontend/js/infrastructure/project-snapshot.ts b/services/web/frontend/js/infrastructure/project-snapshot.ts index 6cd0b20982..e20167e902 100644 --- a/services/web/frontend/js/infrastructure/project-snapshot.ts +++ b/services/web/frontend/js/infrastructure/project-snapshot.ts @@ -143,18 +143,10 @@ export class ProjectSnapshot { ): Promise { const file = this.snapshot.getFile(path) const hash = file?.getHash() - const byteLength = file?.getByteLength() if (hash == null) { return null } - if (byteLength == null) { - return null - } - let blobStoreOptions - if (options?.maxSize != null && byteLength > options?.maxSize) { - blobStoreOptions = { maxSize: options.maxSize } - } - return await this.blobStore.getString(hash, blobStoreOptions) + return await this.blobStore.getString(hash, options) } getDocs(): Map { @@ -331,20 +323,44 @@ async function fetchBlob( options?: { maxSize?: number } ): Promise { const url = `/project/${projectId}/blob/${hash}` - let fetchOpts - if (options?.maxSize === 0) { - return '' - } if (options?.maxSize) { - fetchOpts = { - headers: { - Range: `bytes=0-${options.maxSize - 1}`, - }, - } + return await fetchTextFileWithSizeLimit(url, options.maxSize) } - const res = await fetch(url, fetchOpts) + const res = await fetch(url) if (!res.ok) { throw new FetchError('Failed to fetch blob', url, undefined, res) } return await res.text() } + +async function fetchTextFileWithSizeLimit(url: string, maxSize: number) { + let result = '' + try { + const abortController = new AbortController() + const response = await fetch(url, { + signal: abortController.signal, + }) + + if (!response.ok) { + throw new Error('Failed to fetch blob') + } + if (!response.body) { + throw new Error('Response body is empty') + } + + const reader = response.body.pipeThrough(new TextDecoderStream()) + for await (const chunk of reader) { + result += chunk + if (result.length > maxSize) { + abortController.abort() + } + } + } catch (error: any) { + if (error?.name === 'AbortError') { + // This is fine, we just return the result we have so far + } else { + throw error + } + } + return result.slice(0, maxSize) +} diff --git a/services/web/test/frontend/infrastructure/project-snapshot.test.ts b/services/web/test/frontend/infrastructure/project-snapshot.test.ts index aba3c833a6..d231e1c1c5 100644 --- a/services/web/test/frontend/infrastructure/project-snapshot.test.ts +++ b/services/web/test/frontend/infrastructure/project-snapshot.test.ts @@ -44,10 +44,6 @@ describe('ProjectSnapshot', function () { ), // 6.5MB hash: 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', }, - 'empty.png': { - contents: '', - hash: 'ffffffffffffffffffffffffffffffffffffffff', - }, } const chunk = { @@ -86,13 +82,6 @@ describe('ProjectSnapshot', function () { byteLength: files['bibliography.bib'].contents.length, }, }, - { - pathname: 'empty.png', - file: { - hash: files['empty.png'].hash, - byteLength: files['empty.png'].contents.length, - }, - }, ], timestamp: '2025-01-01T12:00:00.000Z', }, @@ -156,24 +145,10 @@ describe('ProjectSnapshot', function () { }) } - // fetch-mock doesn't seem to expose the header to the response function, - // so we just use a constant here - const MOCKED_MAX_SIZE = 100 - function mockBlobs(paths = Object.keys(files) as (keyof typeof files)[]) { for (const path of paths) { const file = files[path] - fetchMock - .get({ - url: `/project/${projectId}/blob/${file.hash}`, - missingHeaders: ['Range'], - response: file.contents, - }) - .get({ - url: `/project/${projectId}/blob/${file.hash}`, - headers: { Range: `bytes=0-${MOCKED_MAX_SIZE - 1}` }, - response: file.contents.slice(0, MOCKED_MAX_SIZE), - }) + fetchMock.get(`/project/${projectId}/blob/${file.hash}`, file.contents) } } @@ -268,18 +243,13 @@ describe('ProjectSnapshot', function () { hash: files['bibliography.bib'].hash, size: files['bibliography.bib'].contents.length, }, - { - path: 'empty.png', - hash: 'ffffffffffffffffffffffffffffffffffffffff', - size: 0, - }, ]) }) }) describe('getBinaryFileContents', function () { beforeEach(function () { - mockBlobs(['bibliography.bib', 'empty.png']) + mockBlobs(['bibliography.bib']) }) it('can fetch whole file', async function () { @@ -287,20 +257,14 @@ describe('ProjectSnapshot', function () { expect(blob).to.equal(files['bibliography.bib'].contents) }) - it('can fetch part of file', async function () { + // NOTE: fetch-mock does not support the .response.body.pipeThrough API, + // so this test is skipped for now. + // eslint-disable-next-line mocha/no-skipped-tests + it.skip('can fetch part of file', async function () { const blob = await snapshot.getBinaryFileContents('bibliography.bib', { - maxSize: MOCKED_MAX_SIZE, + maxSize: 100, }) - expect(blob).to.equal( - files['bibliography.bib'].contents.slice(0, MOCKED_MAX_SIZE) - ) - }) - - it('can fetch empty file with maxSize', async function () { - const blob = await snapshot.getBinaryFileContents('empty.png', { - maxSize: 200, - }) - expect(blob).to.equal(files['empty.png'].contents) + expect(blob).to.equal(files['bibliography.bib'].contents.slice(0, 100)) }) }) }) diff --git a/services/web/test/unit/src/History/HistoryController.test.mjs b/services/web/test/unit/src/History/HistoryController.test.mjs index a3e3e0a987..38356108cf 100644 --- a/services/web/test/unit/src/History/HistoryController.test.mjs +++ b/services/web/test/unit/src/History/HistoryController.test.mjs @@ -32,7 +32,6 @@ describe('HistoryController', function () { ctx.HistoryManager = { promises: { injectUserDetails: sinon.stub(), - requestBlobWithProjectId: sinon.stub(), }, } @@ -298,81 +297,4 @@ describe('HistoryController', function () { }) }) }) - - describe('requestBlob', function () { - describe('With Range header', function () { - beforeEach(async function (ctx) { - ctx.req = { - params: { project_id: ctx.project_id, hash: 'foo' }, - body: {}, - get: sinon.stub(), - } - ctx.req.get.withArgs('Range').returns('bytes=0-42') - ctx.res = { setHeader: sinon.stub(), status: sinon.stub() } - ctx.HistoryManager.promises.requestBlobWithProjectId.resolves({ - stream: null, - contentLength: '43', - contentRange: 'bytes 0-42/100', - }) - await ctx.HistoryController.getBlob(ctx.req, ctx.res, ctx.next) - }) - - it('should forward the range request', function (ctx) { - ctx.HistoryManager.promises.requestBlobWithProjectId.should.have.been.calledWith( - ctx.project_id, - 'foo', - 'GET', - 'bytes=0-42' - ) - }) - - it('should forward the Content-Range header', function (ctx) { - ctx.res.setHeader.should.have.been.calledWith( - 'Content-Range', - 'bytes 0-42/100' - ) - }) - - it('should forward the Content-Length header', function (ctx) { - ctx.res.setHeader.should.have.been.calledWith('Content-Length', '43') - }) - - it('should have status 206', function (ctx) { - ctx.res.status.should.have.been.calledWith(206) - }) - }) - - describe('Without Range header', function () { - beforeEach(async function (ctx) { - ctx.req = { - params: { project_id: ctx.project_id, hash: 'foo' }, - body: {}, - get: sinon.stub(), - } - ctx.req.get.withArgs('Range').returns(null) - ctx.res = { setHeader: sinon.stub(), status: sinon.stub() } - ctx.HistoryManager.promises.requestBlobWithProjectId.resolves({ - stream: null, - contentLength: '100', - range: null, - }) - await ctx.HistoryController.getBlob(ctx.req, ctx.res, ctx.next) - }) - - it('should not have a Content-Range header', function (ctx) { - expect(ctx.res.setHeader).to.not.have.been.calledWith( - 'Content-Range', - sinon.match.string - ) - }) - - it('should forward the Content-Length header', function (ctx) { - ctx.res.setHeader.should.have.been.calledWith('Content-Length', '100') - }) - - it('should not have status 206', function (ctx) { - ctx.res.status.should.not.have.been.calledWith(206) - }) - }) - }) })