From da027f46cf848a530906e28558a227cb373de70a Mon Sep 17 00:00:00 2001 From: Malik Glossop Date: Tue, 28 Apr 2026 13:35:30 +0200 Subject: [PATCH] Merge pull request #32530 from overleaf/mg-stuck-sync-repro Detect and auto-clear stuck sync states GitOrigin-RevId: 680861a33e42432dab7d40ad421980b707eb6089 --- services/project-history/app/js/Errors.js | 2 + .../project-history/app/js/HttpController.js | 1 + .../project-history/app/js/RetryManager.js | 18 +- .../project-history/app/js/SyncManager.js | 94 ++++++++- .../test/acceptance/js/SyncTests.js | 198 ++++++++++++++++++ .../js/helpers/ProjectHistoryClient.js | 28 ++- .../unit/js/RetryManager/RetryManagerTests.js | 38 +++- .../unit/js/SyncManager/SyncManagerTests.js | 177 +++++++++++++++- 8 files changed, 538 insertions(+), 18 deletions(-) diff --git a/services/project-history/app/js/Errors.js b/services/project-history/app/js/Errors.js index 0b8d24b0d2..d2052d0169 100644 --- a/services/project-history/app/js/Errors.js +++ b/services/project-history/app/js/Errors.js @@ -1,5 +1,7 @@ import OError from '@overleaf/o-error' +export const SYNC_ONGOING_ERROR_MESSAGE = 'sync ongoing' + export class NotFoundError extends OError {} export class BadRequestError extends OError {} export class SyncError extends OError {} diff --git a/services/project-history/app/js/HttpController.js b/services/project-history/app/js/HttpController.js index 4c4931be48..57d32cee66 100644 --- a/services/project-history/app/js/HttpController.js +++ b/services/project-history/app/js/HttpController.js @@ -374,6 +374,7 @@ export function getResyncPending(req, res, next) { if (err) return next(err) res.json({ resyncPending: state.isSyncOngoing(), + syncStuck: state.isSyncStuck(), }) }) } diff --git a/services/project-history/app/js/RetryManager.js b/services/project-history/app/js/RetryManager.js index c9c7ac58e4..37d63dff9a 100644 --- a/services/project-history/app/js/RetryManager.js +++ b/services/project-history/app/js/RetryManager.js @@ -4,6 +4,7 @@ import logger from '@overleaf/logger' import OError from '@overleaf/o-error' import * as UpdatesProcessor from './UpdatesProcessor.js' import * as SyncManager from './SyncManager.js' +import { SYNC_ONGOING_ERROR_MESSAGE } from './Errors.js' import * as WebApiManager from './WebApiManager.js' import * as RedisManager from './RedisManager.js' import * as ErrorRecorder from './ErrorRecorder.js' @@ -44,9 +45,11 @@ promises.retryFailures = async (options = {}) => { } else if (failureType === 'hard') { const batch = await getFailureBatch(hardErrorSelector, limit) const result = await retryFailureBatch(batch, timeout, async failure => { - await resyncProject(failure.project_id, { - hard: failureRequiresHardResync(failure), - }) + // Ongoing-sync failures always use soft resync to preserve sync state. + // SyncManager needs existing state to detect and clear stuck syncs. + const hard = + failureRequiresHardResync(failure) && !isOngoingSyncFailure(failure) + await resyncProject(failure.project_id, { hard }) }) return result } @@ -62,6 +65,11 @@ function softErrorSelector(failure) { } function hardErrorSelector(failure) { + // Ongoing-sync failures are always retried via soft resync. + // SyncManager's stuck detection handles the retry limit (stuckClearCount). + if (isOngoingSyncFailure(failure)) return true + + // Other failures: retry hard/repeated ones, but stop after MAX_RESYNC_ATTEMPTS return ( (isHardFailure(failure) || isRepeatedFailure(failure)) && !isStuckFailure(failure) @@ -84,6 +92,10 @@ function isRepeatedFailure(failure) { return failure.attempts > 3 } +export function isOngoingSyncFailure(failure) { + return failure.error?.includes(SYNC_ONGOING_ERROR_MESSAGE) ?? false +} + function isStuckFailure(failure) { return ( failure.resyncAttempts != null && diff --git a/services/project-history/app/js/SyncManager.js b/services/project-history/app/js/SyncManager.js index 24c9c0a5d4..c8d29279c3 100644 --- a/services/project-history/app/js/SyncManager.js +++ b/services/project-history/app/js/SyncManager.js @@ -8,7 +8,11 @@ import logger from '@overleaf/logger' import Metrics from '@overleaf/metrics' import OError from '@overleaf/o-error' import { File, Range } from 'overleaf-editor-core' -import { NeedFullProjectStructureResyncError, SyncError } from './Errors.js' +import { + NeedFullProjectStructureResyncError, + SYNC_ONGOING_ERROR_MESSAGE, + SyncError, +} from './Errors.js' import { db, ObjectId } from './mongodb.js' import * as SnapshotManager from './SnapshotManager.js' import * as LockManager from './LockManager.js' @@ -30,6 +34,8 @@ import { isInsert, isDelete } from './Utils.js' */ const MAX_RESYNC_HISTORY_RECORDS = 100 // keep this many records of previous resyncs const EXPIRE_RESYNC_HISTORY_INTERVAL_MS = 90 * 24 * 3600 * 1000 // 90 days +const SYNC_STUCK_TIMEOUT_MS = 4 * 60 * 60 * 1000 // 4 hours +const MAX_STUCK_CLEAR_ATTEMPTS = 5 const keys = Settings.redis.lock.key_schema @@ -93,7 +99,42 @@ async function startResyncWithoutLock(projectId, options) { const syncState = await getResyncState(projectId) if (syncState.isSyncOngoing()) { - throw new OError('sync ongoing') + if (syncState.isSyncStuck()) { + const stuckDocPaths = Array.from(syncState.resyncDocContents) + const stuckClearCount = syncState.stuckClearCount + + if (stuckClearCount >= MAX_STUCK_CLEAR_ATTEMPTS) { + await _recordStuckClearInSyncState(projectId, stuckDocPaths) + Metrics.inc('project_history_sync_stuck_permanent') + // Log error only on first permanent-stuck detection to avoid spam + if (stuckClearCount === MAX_STUCK_CLEAR_ATTEMPTS) { + logger.error( + { + projectId, + stuckClearCount: stuckClearCount + 1, + stuckDocPaths, + resyncPendingSince: syncState.resyncPendingSince, + }, + 'sync permanently stuck — exceeded auto-clear limit' + ) + } + throw new OError('sync permanently stuck') + } + + logger.warn( + { + projectId, + stuckClearCount: stuckClearCount + 1, + stuckDocPaths, + resyncPendingSince: syncState.resyncPendingSince, + }, + 'sync stuck, clearing state and restarting' + ) + Metrics.inc('project_history_sync_stuck_cleared') + await _recordStuckClearInSyncState(projectId, stuckDocPaths) + } else { + throw new OError(SYNC_ONGOING_ERROR_MESSAGE) + } } syncState.setOrigin(options.origin || { kind: 'history-resync' }) syncState.startProjectStructureSync() @@ -167,7 +208,12 @@ async function setResyncState(projectId, syncState) { update.$set.expiresAt = new Date( Date.now() + EXPIRE_RESYNC_HISTORY_INTERVAL_MS ) - update.$unset = { resyncPendingSince: 1 } + update.$unset = { + resyncPendingSince: 1, + stuckClearCount: 1, + lastStuckClearAt: 1, + lastStuckDocPaths: 1, + } } // apply the update @@ -184,6 +230,17 @@ async function clearResyncState(projectId) { }) } +async function _recordStuckClearInSyncState(projectId, stuckDocPaths) { + await db.projectHistorySyncState.updateOne( + { project_id: new ObjectId(projectId) }, + { + $inc: { stuckClearCount: 1 }, + $set: { lastStuckClearAt: new Date(), lastStuckDocPaths: stuckDocPaths }, + $unset: { resyncPendingSince: 1 }, + } + ) +} + /** * @param {string} projectId * @param {Date} date @@ -223,6 +280,7 @@ async function skipUpdatesDuringSync(projectId, updates) { if (!shouldSkipUpdate) { filteredUpdates.push(update) } else { + Metrics.inc('project_history_sync_update_skipped') logger.debug({ projectId, update }, 'skipping update due to resync') } } @@ -297,7 +355,10 @@ class SyncState { resyncCount, resyncPendingSince, lastUpdated, - history + history, + stuckClearCount, + lastStuckClearAt, + lastStuckDocPaths ) { this.projectId = projectId this.resyncProjectStructure = resyncProjectStructure @@ -307,6 +368,9 @@ class SyncState { this.resyncPendingSince = resyncPendingSince this.lastUpdated = lastUpdated this.history = history + this.stuckClearCount = stuckClearCount + this.lastStuckClearAt = lastStuckClearAt + this.lastStuckDocPaths = lastStuckDocPaths } static fromRaw(projectId, rawSyncState) { @@ -336,6 +400,9 @@ class SyncState { } } const lastUpdated = rawSyncState.lastUpdated + const stuckClearCount = rawSyncState.stuckClearCount ?? 0 + const lastStuckClearAt = rawSyncState.lastStuckClearAt + const lastStuckDocPaths = rawSyncState.lastStuckDocPaths return new SyncState( projectId, resyncProjectStructure, @@ -344,7 +411,10 @@ class SyncState { resyncCount, resyncPendingSince, lastUpdated, - history + history, + stuckClearCount, + lastStuckClearAt, + lastStuckDocPaths ) } @@ -459,6 +529,20 @@ class SyncState { isSyncOngoing() { return this.isProjectStructureSyncing() || this.isAnyDocContentSyncing() } + + isSyncStuck() { + if (!this.isSyncOngoing()) { + return false + } + if (!this.resyncPendingSince) { + // No timestamp recorded — treat long-running syncs without a timestamp + // as potentially stuck (legacy state from before this field was added) + return true + } + return ( + Date.now() - this.resyncPendingSince.getTime() > SYNC_STUCK_TIMEOUT_MS + ) + } } class SyncUpdateExpander { diff --git a/services/project-history/test/acceptance/js/SyncTests.js b/services/project-history/test/acceptance/js/SyncTests.js index 28e489cacf..17314c77d4 100644 --- a/services/project-history/test/acceptance/js/SyncTests.js +++ b/services/project-history/test/acceptance/js/SyncTests.js @@ -2385,6 +2385,204 @@ describe('Syncing with web and doc-updater', function () { ) }) }) + + describe('stuck sync state (missing resyncDocContent updates)', function () { + beforeEach(function () { + MockHistoryStore() + .get(`/api/projects/${historyId}/latest/history`) + .reply(200, { + chunk: { + history: { + snapshot: { + files: { + 'main.tex': { + hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb', + stringLength: 3, + }, + }, + }, + changes: [], + }, + startVersion: 0, + }, + }) + + MockHistoryStore() + .get( + `/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb` + ) + .reply(200, 'a\nb') + }) + + it('should get stuck when resyncDocContent update is missing', async function () { + // Step 1: Start a resync + await ProjectHistoryClient.resyncHistory(this.project_id) + + // Step 2: Push resyncProjectStructure update (lists docs to sync) + // but do NOT push the corresponding resyncDocContent update + const structureUpdate = { + projectHistoryId: historyId, + resyncProjectStructure: { + docs: [{ path: '/main.tex' }], + files: [], + }, + meta: { + ts: this.timestamp, + }, + } + await ProjectHistoryClient.pushRawUpdate( + this.project_id, + structureUpdate + ) + + // Step 3: Flush — processes structure update, adds /main.tex to resyncDocContents + // This succeeds (204) but leaves sync in an incomplete state + await ProjectHistoryClient.flushProject(this.project_id) + + // Step 4: Verify the sync state is stuck + const syncState = await ProjectHistoryClient.getSyncState( + this.project_id + ) + expect(syncState).to.not.be.null + expect(syncState.resyncProjectStructure).to.equal(false) + expect(syncState.resyncDocContents).to.deep.equal(['/main.tex']) + expect(syncState.resyncPendingSince).to.be.a('date') + // Sync thinks it's ongoing because resyncDocContents is not empty + + // Step 5: Push a normal text update (simulating a user edit) + const textUpdate = { + doc: this.doc_id, + op: [{ p: 3, i: '\nc' }], + v: 1, + meta: { + ts: this.timestamp, + pathname: '/main.tex', + doc_length: 3, + }, + } + await ProjectHistoryClient.pushRawUpdate(this.project_id, textUpdate) + + // Step 6: Flush again — the text update should be silently skipped + // because the doc is in resyncDocContents (sync "ongoing") + await ProjectHistoryClient.flushProject(this.project_id) + + // Step 7: Verify the sync state is STILL stuck (text update was skipped) + const syncStateAfter = await ProjectHistoryClient.getSyncState( + this.project_id + ) + expect(syncStateAfter.resyncProjectStructure).to.equal(false) + expect(syncStateAfter.resyncDocContents).to.deep.equal(['/main.tex']) + + // Step 7b: Verify resync-pending reports ongoing but not yet stuck + const pending = await ProjectHistoryClient.getResyncPending( + this.project_id + ) + expect(pending.resyncPending).to.equal(true) + expect(pending.syncStuck).to.equal(false) + + // Step 8: Verify a normal resync FAILS because sync is "ongoing" + try { + await fetchNothing( + `http://127.0.0.1:3054/project/${this.project_id}/resync`, + { + method: 'POST', + json: { origin: { kind: 'test-origin' } }, + } + ) + assert.fail('resync should have failed') + } catch (error) { + if (error instanceof RequestFailedError) { + expect(error.response.status).to.equal(500) + } else { + throw error + } + } + }) + + it('should recover with a hard resync (force=true)', async function () { + // Set up the stuck state (same as above) + await ProjectHistoryClient.resyncHistory(this.project_id) + + const structureUpdate = { + projectHistoryId: historyId, + resyncProjectStructure: { + docs: [{ path: '/main.tex' }], + files: [], + }, + meta: { + ts: this.timestamp, + }, + } + await ProjectHistoryClient.pushRawUpdate( + this.project_id, + structureUpdate + ) + await ProjectHistoryClient.flushProject(this.project_id) + + // Verify stuck + const syncState = await ProjectHistoryClient.getSyncState( + this.project_id + ) + expect(syncState.resyncDocContents).to.deep.equal(['/main.tex']) + + // Mock the web resync endpoint for the hard resync + MockWeb() + .post(`/project/${this.project_id}/history/resync`) + .reply(204) + + // Hard resync should clear the stuck state and succeed + const response = await fetchNothing( + `http://127.0.0.1:3054/project/${this.project_id}/resync?force=true`, + { + method: 'POST', + json: { origin: { kind: 'test-origin' } }, + } + ) + expect(response.status).to.equal(204) + + // After hard resync, sync state should be cleared or reset + const syncStateAfter = await ProjectHistoryClient.getSyncState( + this.project_id + ) + // Hard resync clears and restarts — at this point the new resync + // should be in progress (resyncProjectStructure = true) or cleared + // depending on whether web sent updates + if (syncStateAfter) { + expect(syncStateAfter.resyncProjectStructure).to.equal(true) + } + }) + + it('should auto-recover when stuck (null resyncPendingSince)', async function () { + // Inject a stuck sync state: ongoing but no resyncPendingSince + // (legacy state from before the timestamp field was added) + await ProjectHistoryClient.injectStuckSyncState(this.project_id, [ + '/main.tex', + ]) + + // Verify the state is recognised as stuck + const pending = await ProjectHistoryClient.getResyncPending( + this.project_id + ) + expect(pending.resyncPending).to.equal(true) + expect(pending.syncStuck).to.equal(true) + + // Mock the web callback that requestResync triggers + MockWeb() + .post(`/project/${this.project_id}/history/resync`) + .reply(204) + + // A plain resync (no force) should detect stuck, clear, and restart + await ProjectHistoryClient.resyncHistory(this.project_id) + + // stuckClearCount incremented and a new structure sync started + const syncState = await ProjectHistoryClient.getSyncState( + this.project_id + ) + expect(syncState.stuckClearCount).to.equal(1) + expect(syncState.resyncProjectStructure).to.equal(true) + expect(syncState.lastStuckDocPaths).to.deep.equal(['/main.tex']) + }) + }) }) }) diff --git a/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js b/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js index 3fd3261a85..6d221bed3c 100644 --- a/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js +++ b/services/project-history/test/acceptance/js/helpers/ProjectHistoryClient.js @@ -1,7 +1,7 @@ import { expect } from 'chai' import Settings from '@overleaf/settings' import RedisWrapper from '@overleaf/redis-wrapper' -import { db } from '../../../../app/js/mongodb.js' +import { db, ObjectId } from '../../../../app/js/mongodb.js' import { fetchJson, fetchJsonWithResponse, @@ -223,6 +223,32 @@ export async function getFailures() { return failures } +export async function getSyncState(projectId) { + return await db.projectHistorySyncState.findOne({ + project_id: new ObjectId(projectId), + }) +} + +export async function getResyncPending(projectId) { + return await fetchJson( + `http://127.0.0.1:3054/project/${projectId}/resync-pending` + ) +} + +export async function injectStuckSyncState(projectId, docPaths) { + await db.projectHistorySyncState.replaceOne( + { project_id: new ObjectId(projectId) }, + { + project_id: new ObjectId(projectId), + resyncProjectStructure: false, + resyncDocContents: docPaths, + stuckClearCount: 0, + history: [], + }, + { upsert: true } + ) +} + export async function deleteProject(projectId) { const response = await fetchNothing( `http://127.0.0.1:3054/project/${projectId}`, diff --git a/services/project-history/test/unit/js/RetryManager/RetryManagerTests.js b/services/project-history/test/unit/js/RetryManager/RetryManagerTests.js index e8443aa398..f0aec32123 100644 --- a/services/project-history/test/unit/js/RetryManager/RetryManagerTests.js +++ b/services/project-history/test/unit/js/RetryManager/RetryManagerTests.js @@ -12,6 +12,8 @@ describe('RetryManager', function () { this.projectId2 = new ObjectId().toString() this.projectId3 = new ObjectId().toString() this.projectId4 = new ObjectId().toString() + this.projectId5 = new ObjectId().toString() + this.projectId6 = new ObjectId().toString() this.historyId = 12345 this.WebApiManager = { @@ -39,13 +41,25 @@ describe('RetryManager', function () { }, { project_id: this.projectId3, - error: 'sync ongoing', - attempts: 10, + error: 'OpsOutOfOrderError: doc version out of order', + attempts: 5, resyncAttempts: 1, }, { project_id: this.projectId4, - error: 'sync ongoing', + error: 'OpsOutOfOrderError: doc version out of order', + attempts: 5, + resyncAttempts: 2, + }, + { + project_id: this.projectId5, + error: 'OError: sync ongoing', + attempts: 10, + resyncAttempts: 1, + }, + { + project_id: this.projectId6, + error: 'OError: sync ongoing', attempts: 10, resyncAttempts: 2, }, @@ -129,6 +143,24 @@ describe('RetryManager', function () { ).not.to.have.been.calledWith(this.projectId4) }) + it('should use soft resync for sync ongoing failures regardless of resyncAttempts', function () { + expect(this.SyncManager.promises.startResync).to.have.been.calledWith( + this.projectId5 + ) + expect( + this.SyncManager.promises.startHardResync + ).not.to.have.been.calledWith(this.projectId5) + }) + + it('should retry stuck sync ongoing failures via soft resync', function () { + expect(this.SyncManager.promises.startResync).to.have.been.calledWith( + this.projectId6 + ) + expect( + this.SyncManager.promises.startHardResync + ).not.to.have.been.calledWith(this.projectId6) + }) + it('should count the unprocessed updates', function () { expect( this.RedisManager.promises.countUnprocessedUpdates diff --git a/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js b/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js index 1004ffd78d..66a9a62542 100644 --- a/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js +++ b/services/project-history/test/unit/js/SyncManager/SyncManagerTests.js @@ -194,11 +194,11 @@ describe('SyncManager', function () { project_id: new ObjectId(this.projectId), }, sinon.match({ - $set: { + $set: sinon.match({ resyncProjectStructure: true, resyncDocContents: [], origin: { kind: 'history-resync' }, - }, + }), $currentDate: { lastUpdated: true }, $inc: { resyncCount: 1 }, $unset: { expiresAt: true }, @@ -212,7 +212,10 @@ describe('SyncManager', function () { describe('if project structure sync is in progress', function () { beforeEach(function () { - const syncState = { resyncProjectStructure: true } + const syncState = { + resyncProjectStructure: true, + resyncPendingSince: new Date(), + } this.db.projectHistorySyncState.findOne.resolves(syncState) }) @@ -225,7 +228,10 @@ describe('SyncManager', function () { describe('if doc content sync in is progress', function () { beforeEach(async function () { - const syncState = { resyncDocContents: ['/foo.tex'] } + const syncState = { + resyncDocContents: ['/foo.tex'], + resyncPendingSince: new Date(), + } this.db.projectHistorySyncState.findOne.resolves(syncState) }) @@ -235,6 +241,112 @@ describe('SyncManager', function () { ).to.be.rejectedWith('sync ongoing') }) }) + + describe('if sync is stuck (no resyncPendingSince — legacy state)', function () { + beforeEach(function () { + const syncState = { resyncDocContents: ['/foo.tex'] } + this.db.projectHistorySyncState.findOne.resolves(syncState) + }) + + it('records the stuck clear, does not delete state, and starts a new resync', async function () { + await this.SyncManager.promises.startResync(this.projectId) + expect( + this.db.projectHistorySyncState.updateOne + ).to.have.been.calledWith( + { project_id: new ObjectId(this.projectId) }, + sinon.match({ + $inc: { stuckClearCount: 1 }, + $unset: { resyncPendingSince: 1 }, + }) + ) + expect(this.WebApiManager.promises.requestResync.calledOnce).to.be.true + expect( + this.Metrics.inc.calledWith('project_history_sync_stuck_cleared') + ).to.be.true + }) + }) + + describe('if sync is stuck (resyncPendingSince older than timeout)', function () { + beforeEach(function () { + const fiveHoursAgo = new Date(Date.now() - 5 * 60 * 60 * 1000) + const syncState = { + resyncDocContents: ['/foo.tex'], + resyncPendingSince: fiveHoursAgo, + } + this.db.projectHistorySyncState.findOne.resolves(syncState) + }) + + it('records the stuck clear and starts a new resync', async function () { + await this.SyncManager.promises.startResync(this.projectId) + expect( + this.db.projectHistorySyncState.updateOne + ).to.have.been.calledWith( + { project_id: new ObjectId(this.projectId) }, + sinon.match({ + $inc: { stuckClearCount: 1 }, + $unset: { resyncPendingSince: 1 }, + }) + ) + expect(this.WebApiManager.promises.requestResync.calledOnce).to.be.true + }) + }) + + describe('if sync is not stuck (resyncPendingSince within timeout)', function () { + beforeEach(function () { + const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000) + const syncState = { + resyncDocContents: ['/foo.tex'], + resyncPendingSince: threeHoursAgo, + } + this.db.projectHistorySyncState.findOne.resolves(syncState) + }) + + it('throws sync ongoing and does not clear', async function () { + await expect( + this.SyncManager.promises.startResync(this.projectId) + ).to.be.rejectedWith('sync ongoing') + expect( + this.Metrics.inc.calledWith('project_history_sync_stuck_cleared') + ).to.be.false + expect( + this.Metrics.inc.calledWith('project_history_sync_stuck_permanent') + ).to.be.false + }) + }) + + describe('if sync is permanently stuck (exceeded retry limit)', function () { + beforeEach(function () { + const syncState = { + resyncDocContents: ['/foo.tex'], + stuckClearCount: 5, + } + this.db.projectHistorySyncState.findOne.resolves(syncState) + }) + + it('throws permanently stuck, emits metric, and records the attempt', async function () { + await expect( + this.SyncManager.promises.startResync(this.projectId) + ).to.be.rejectedWith('sync permanently stuck') + expect( + this.Metrics.inc.calledWith('project_history_sync_stuck_permanent') + ).to.be.true + expect( + this.db.projectHistorySyncState.updateOne + ).to.have.been.calledWith( + { project_id: new ObjectId(this.projectId) }, + sinon.match({ $inc: { stuckClearCount: 1 } }) + ) + }) + + it('does not attempt a resync', async function () { + try { + await this.SyncManager.promises.startResync(this.projectId) + } catch (e) { + // expected + } + expect(this.WebApiManager.promises.requestResync.called).to.be.false + }) + }) }) describe('setResyncState', function () { @@ -265,7 +377,11 @@ describe('SyncManager', function () { ).to.have.been.calledWith( { project_id: new ObjectId(this.projectId) }, sinon.match({ - $set: this.syncState.toRaw(), + $set: sinon.match({ + resyncProjectStructure: true, + resyncDocContents: [], + origin: { kind: 'history-resync' }, + }), $currentDate: { lastUpdated: true }, $inc: { resyncCount: 1 }, $unset: { expiresAt: true }, @@ -289,7 +405,7 @@ describe('SyncManager', function () { } }) - it('sets the sync state entry in mongo to expire', async function () { + it('sets the sync state entry in mongo to expire and clears stuck fields', async function () { await this.SyncManager.promises.setResyncState( this.projectId, this.syncState @@ -307,6 +423,12 @@ describe('SyncManager', function () { expiresAt: new Date(this.now.getTime() + 90 * 24 * 3600 * 1000), }, $currentDate: { lastUpdated: true }, + $unset: { + resyncPendingSince: 1, + stuckClearCount: 1, + lastStuckClearAt: 1, + lastStuckDocPaths: 1, + }, }), { upsert: true } ) @@ -2005,4 +2127,47 @@ describe('SyncManager', function () { }) }) }) + + describe('stuck sync state reproduction', function () { + describe('when resyncDocContents has entries but no resyncDocContent update arrives', function () { + beforeEach(function () { + // Simulate the stuck state: project structure sync is done, + // but doc content sync never completed for /main.tex + const stuckSyncState = { + resyncProjectStructure: false, + resyncDocContents: ['/main.tex'], + origin: { kind: 'history-resync' }, + } + this.db.projectHistorySyncState.findOne.resolves(stuckSyncState) + }) + + it('should skip text updates for a doc stuck in resyncDocContents', async function () { + const textUpdate = { + doc: new ObjectId(), + op: [{ i: 'hello', p: 0 }], + meta: { + pathname: '/main.tex', + doc_length: 0, + }, + } + this.UpdateTranslator.isTextUpdate.returns(false) + this.UpdateTranslator.isTextUpdate.withArgs(textUpdate).returns(true) + + const { updates: filteredUpdates, syncState } = + await this.SyncManager.promises.skipUpdatesDuringSync( + this.projectId, + [textUpdate] + ) + + // The text update is silently dropped — this is the stuck state bug + expect(filteredUpdates).to.deep.equal([]) + // Sync state still shows /main.tex as syncing + expect(syncState.toRaw().resyncDocContents).to.deep.equal(['/main.tex']) + // Metric is emitted for the skipped update + expect( + this.Metrics.inc.calledWith('project_history_sync_update_skipped') + ).to.be.true + }) + }) + }) })