From 2640d4a566025e1e4c8b0ebbbeb0746efeb96175 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Thu, 23 Jan 2025 09:35:16 +0000 Subject: [PATCH] [project-history] fixes for checking blob exists (#23051) * [project-history] skip HEAD on blob without file-hash * [project-history] only 404s indicate a missing blob * [project-history] error checking for existing blob is fatal * [project-history] always skip creating blob if already exists Ignore createdBlob detail after performing check on blob. It might not exist on old operations that have been queued. * [project-history] fix unit tests * [project-history] migrate test to new fetch call GitOrigin-RevId: 457963042e9d836647d3c122516329d4dc9b544f --- .../app/js/HistoryStoreManager.js | 14 +- .../test/acceptance/js/SyncTests.js | 346 ++++++++++++++++++ .../HistoryStoreManagerTests.js | 7 +- 3 files changed, 360 insertions(+), 7 deletions(-) diff --git a/services/project-history/app/js/HistoryStoreManager.js b/services/project-history/app/js/HistoryStoreManager.js index 641eb0f2fd..ac795cfad4 100644 --- a/services/project-history/app/js/HistoryStoreManager.js +++ b/services/project-history/app/js/HistoryStoreManager.js @@ -247,6 +247,7 @@ function createBlobFromString(historyId, data, fileId, callback) { } function _checkBlobExists(historyId, hash, callback) { + if (!hash) return callback(null, false) const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}` fetchNothing(url, { method: 'HEAD', @@ -256,7 +257,7 @@ function _checkBlobExists(historyId, hash, callback) { callback(null, true) }) .catch(err => { - if (err instanceof RequestFailedError) { + if (err instanceof RequestFailedError && err.response.status === 404) { return callback(null, false) } callback(OError.tag(err), false) @@ -337,11 +338,14 @@ export function createBlobForUpdate(projectId, historyId, update, callback) { ) _checkBlobExists(historyId, update.hash, (err, blobExists) => { if (err) { - logger.warn( - { err, projectId, fileId, update }, - 'error checking whether blob exists, reading from filestore' + return callback( + new OError( + 'error checking whether blob exists', + { projectId, historyId, update }, + err + ) ) - } else if (update.createdBlob && blobExists) { + } else if (blobExists) { logger.debug( { projectId, fileId, update }, 'Skipping blob creation as it has already been created' diff --git a/services/project-history/test/acceptance/js/SyncTests.js b/services/project-history/test/acceptance/js/SyncTests.js index 27db3434b4..8a72ff2a79 100644 --- a/services/project-history/test/acceptance/js/SyncTests.js +++ b/services/project-history/test/acceptance/js/SyncTests.js @@ -4,8 +4,10 @@ import { expect } from 'chai' import request from 'request' import assert from 'node:assert' import mongodb from 'mongodb-legacy' +import logger from '@overleaf/logger' import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js' import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js' +import sinon from 'sinon' const { ObjectId } = mongodb const EMPTY_FILE_HASH = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' @@ -16,6 +18,15 @@ const MockWeb = () => nock('http://127.0.0.1:3000') describe('Syncing with web and doc-updater', function () { const historyId = new ObjectId().toString() + let loggerWarn, loggerError + beforeEach(function () { + loggerWarn = sinon.spy(logger, 'warn') + loggerError = sinon.spy(logger, 'error') + }) + afterEach(function () { + loggerWarn.restore() + loggerError.restore() + }) beforeEach(function (done) { this.timestamp = new Date() @@ -202,7 +213,113 @@ describe('Syncing with web and doc-updater', function () { MockFileStore() .get(`/project/${this.project_id}/file/${this.file_id}`) .reply(200, fileContents) + const headBlob = MockHistoryStore() + .head(`/api/projects/${historyId}/blobs/${fileHash}`) + .reply(404) + const createBlob = MockHistoryStore() + .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents) + .reply(201) + const addFile = MockHistoryStore() + .post(`/api/projects/${historyId}/legacy_changes`, body => { + expect(body).to.deep.equal([ + { + v2Authors: [], + authors: [], + timestamp: this.timestamp.toJSON(), + operations: [ + { + pathname: 'test.png', + file: { + hash: fileHash, + }, + }, + ], + origin: { kind: 'test-origin' }, + }, + ]) + return true + }) + .query({ end_version: 0 }) + .reply(204) + + async.series( + [ + cb => { + ProjectHistoryClient.resyncHistory(this.project_id, cb) + }, + cb => { + const update = { + projectHistoryId: historyId, + resyncProjectStructure: { + docs: [], + files: [ + { + file: this.file_id, + path: '/test.png', + _hash: fileHash, + url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`, + }, + { path: '/persistedFile' }, + ], + }, + meta: { + ts: this.timestamp, + }, + } + ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb) + }, + cb => { + ProjectHistoryClient.flushProject(this.project_id, cb) + }, + ], + error => { + if (error) { + throw error + } + assert(!loggerWarn.called, 'no warning logged on 404') + assert( + headBlob.isDone(), + 'HEAD /api/projects/:historyId/blobs/:hash should have been called' + ) + assert( + createBlob.isDone(), + '/api/projects/:historyId/blobs/:hash should have been called' + ) + assert( + addFile.isDone(), + `/api/projects/${historyId}/changes should have been called` + ) + done() + } + ) + }) + it('should skip HEAD on blob without hash', function (done) { + MockHistoryStore() + .get(`/api/projects/${historyId}/latest/history`) + .reply(200, { + chunk: { + history: { + snapshot: { + files: { + persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 }, + }, + }, + changes: [], + }, + startVersion: 0, + }, + }) + + const fileContents = Buffer.from([1, 2, 3]) + const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74' + + MockFileStore() + .get(`/project/${this.project_id}/file/${this.file_id}`) + .reply(200, fileContents) + const headBlob = MockHistoryStore() + .head(`/api/projects/${historyId}/blobs/undefined`) + .reply(500) const createBlob = MockHistoryStore() .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents) .reply(201) @@ -263,6 +380,11 @@ describe('Syncing with web and doc-updater', function () { if (error) { throw error } + assert(!loggerWarn.called, 'no warning logged on 404') + assert( + !headBlob.isDone(), + 'HEAD /api/projects/:historyId/blobs/:hash should have been skipped' + ) assert( createBlob.isDone(), '/api/projects/:historyId/blobs/:hash should have been called' @@ -275,6 +397,230 @@ describe('Syncing with web and doc-updater', function () { } ) }) + it('should record error when checking blob fails with 500', function (done) { + MockHistoryStore() + .get(`/api/projects/${historyId}/latest/history`) + .reply(200, { + chunk: { + history: { + snapshot: { + files: { + persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 }, + }, + }, + changes: [], + }, + startVersion: 0, + }, + }) + + const fileContents = Buffer.from([1, 2, 3]) + const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74' + + MockFileStore() + .get(`/project/${this.project_id}/file/${this.file_id}`) + .reply(200, fileContents) + const headBlob = MockHistoryStore() + .head(`/api/projects/${historyId}/blobs/${fileHash}`) + .reply(500) + const createBlob = MockHistoryStore() + .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents) + .reply(201) + + const addFile = MockHistoryStore() + .post(`/api/projects/${historyId}/legacy_changes`, body => { + expect(body).to.deep.equal([ + { + v2Authors: [], + authors: [], + timestamp: this.timestamp.toJSON(), + operations: [ + { + pathname: 'test.png', + file: { + hash: fileHash, + }, + }, + ], + origin: { kind: 'test-origin' }, + }, + ]) + return true + }) + .query({ end_version: 0 }) + .reply(204) + + async.series( + [ + cb => { + ProjectHistoryClient.resyncHistory(this.project_id, cb) + }, + cb => { + const update = { + projectHistoryId: historyId, + resyncProjectStructure: { + docs: [], + files: [ + { + file: this.file_id, + path: '/test.png', + _hash: fileHash, + url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`, + }, + { path: '/persistedFile' }, + ], + }, + meta: { + ts: this.timestamp, + }, + } + ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb) + }, + cb => { + ProjectHistoryClient.flushProject( + this.project_id, + { + allowErrors: true, + }, + (err, res) => { + if (err) return cb(err) + assert(res.statusCode === 500, 'resync should have failed') + cb() + } + ) + }, + ], + error => { + if (error) { + throw error + } + assert( + loggerError.calledWithMatch( + sinon.match.any, + 'error checking whether blob exists' + ), + 'error logged on 500' + ) + assert( + headBlob.isDone(), + 'HEAD /api/projects/:historyId/blobs/:hash should have been called' + ) + assert( + !createBlob.isDone(), + '/api/projects/:historyId/blobs/:hash should have been skipped' + ) + assert( + !addFile.isDone(), + `/api/projects/${historyId}/changes should have been skipped` + ) + done() + } + ) + }) + it('should skip blob write when blob exists', function (done) { + MockHistoryStore() + .get(`/api/projects/${historyId}/latest/history`) + .reply(200, { + chunk: { + history: { + snapshot: { + files: { + persistedFile: { hash: EMPTY_FILE_HASH, byteLength: 0 }, + }, + }, + changes: [], + }, + startVersion: 0, + }, + }) + + const fileContents = Buffer.from([1, 2, 3]) + const fileHash = 'aed2973e4b8a7ff1b30ff5c4751e5a2b38989e74' + + MockFileStore() + .get(`/project/${this.project_id}/file/${this.file_id}`) + .reply(200, fileContents) + const headBlob = MockHistoryStore() + .head(`/api/projects/${historyId}/blobs/${fileHash}`) + .reply(200) + const createBlob = MockHistoryStore() + .put(`/api/projects/${historyId}/blobs/${fileHash}`, fileContents) + .reply(201) + + const addFile = MockHistoryStore() + .post(`/api/projects/${historyId}/legacy_changes`, body => { + expect(body).to.deep.equal([ + { + v2Authors: [], + authors: [], + timestamp: this.timestamp.toJSON(), + operations: [ + { + pathname: 'test.png', + file: { + hash: fileHash, + }, + }, + ], + origin: { kind: 'test-origin' }, + }, + ]) + return true + }) + .query({ end_version: 0 }) + .reply(204) + + async.series( + [ + cb => { + ProjectHistoryClient.resyncHistory(this.project_id, cb) + }, + cb => { + const update = { + projectHistoryId: historyId, + resyncProjectStructure: { + docs: [], + files: [ + { + file: this.file_id, + path: '/test.png', + _hash: fileHash, + url: `http://127.0.0.1:3009/project/${this.project_id}/file/${this.file_id}`, + }, + { path: '/persistedFile' }, + ], + }, + meta: { + ts: this.timestamp, + }, + } + ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb) + }, + cb => { + ProjectHistoryClient.flushProject(this.project_id, cb) + }, + ], + error => { + if (error) { + throw error + } + assert(!loggerWarn.called, 'no warning logged on 404') + assert( + headBlob.isDone(), + 'HEAD /api/projects/:historyId/blobs/:hash should have been called' + ) + assert( + !createBlob.isDone(), + '/api/projects/:historyId/blobs/:hash should have been skipped' + ) + assert( + addFile.isDone(), + `/api/projects/${historyId}/changes should have been called` + ) + done() + } + ) + }) }) describe("when a file exists which shouldn't", function () { diff --git a/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js b/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js index 4b2545dd43..3edf4e1aa7 100644 --- a/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js +++ b/services/project-history/test/unit/js/HistoryStoreManager/HistoryStoreManagerTests.js @@ -57,7 +57,6 @@ describe('HistoryStoreManager', function () { } this.request = sinon.stub() - this.request.get = sinon.stub() this.logger = { debug: sinon.stub(), @@ -382,6 +381,9 @@ describe('HistoryStoreManager', function () { this.fileStream = {} this.hash = 'random-hash' this.LocalFileWriter.bufferOnDisk.callsArgWith(4, null, this.hash) + this.FetchUtils.fetchNothing.rejects( + new RequestFailedError('', {}, { status: 404 }) + ) this.FetchUtils.fetchStream.resolves(this.fileStream) }) @@ -443,7 +445,7 @@ describe('HistoryStoreManager', function () { }) it('should not request the file from the filestore', function () { - expect(this.request.get).to.not.have.been.called + expect(this.FetchUtils.fetchStream).to.not.have.been.called }) }) @@ -503,6 +505,7 @@ describe('HistoryStoreManager', function () { }) describe('when history-v1 confirms that the blob exists', function () { beforeEach(function (done) { + this.FetchUtils.fetchNothing.resolves() this.HistoryStoreManager.createBlobForUpdate( this.projectId, this.historyId,