mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 09:09:36 +02:00
Merge pull request #32530 from overleaf/mg-stuck-sync-repro
Detect and auto-clear stuck sync states GitOrigin-RevId: 680861a33e42432dab7d40ad421980b707eb6089
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
export const SYNC_ONGOING_ERROR_MESSAGE = 'sync ongoing'
|
||||
|
||||
export class NotFoundError extends OError {}
|
||||
export class BadRequestError extends OError {}
|
||||
export class SyncError extends OError {}
|
||||
|
||||
@@ -374,6 +374,7 @@ export function getResyncPending(req, res, next) {
|
||||
if (err) return next(err)
|
||||
res.json({
|
||||
resyncPending: state.isSyncOngoing(),
|
||||
syncStuck: state.isSyncStuck(),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as SyncManager from './SyncManager.js'
|
||||
import { SYNC_ONGOING_ERROR_MESSAGE } from './Errors.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as RedisManager from './RedisManager.js'
|
||||
import * as ErrorRecorder from './ErrorRecorder.js'
|
||||
@@ -44,9 +45,11 @@ promises.retryFailures = async (options = {}) => {
|
||||
} else if (failureType === 'hard') {
|
||||
const batch = await getFailureBatch(hardErrorSelector, limit)
|
||||
const result = await retryFailureBatch(batch, timeout, async failure => {
|
||||
await resyncProject(failure.project_id, {
|
||||
hard: failureRequiresHardResync(failure),
|
||||
})
|
||||
// Ongoing-sync failures always use soft resync to preserve sync state.
|
||||
// SyncManager needs existing state to detect and clear stuck syncs.
|
||||
const hard =
|
||||
failureRequiresHardResync(failure) && !isOngoingSyncFailure(failure)
|
||||
await resyncProject(failure.project_id, { hard })
|
||||
})
|
||||
return result
|
||||
}
|
||||
@@ -62,6 +65,11 @@ function softErrorSelector(failure) {
|
||||
}
|
||||
|
||||
function hardErrorSelector(failure) {
|
||||
// Ongoing-sync failures are always retried via soft resync.
|
||||
// SyncManager's stuck detection handles the retry limit (stuckClearCount).
|
||||
if (isOngoingSyncFailure(failure)) return true
|
||||
|
||||
// Other failures: retry hard/repeated ones, but stop after MAX_RESYNC_ATTEMPTS
|
||||
return (
|
||||
(isHardFailure(failure) || isRepeatedFailure(failure)) &&
|
||||
!isStuckFailure(failure)
|
||||
@@ -84,6 +92,10 @@ function isRepeatedFailure(failure) {
|
||||
return failure.attempts > 3
|
||||
}
|
||||
|
||||
export function isOngoingSyncFailure(failure) {
|
||||
return failure.error?.includes(SYNC_ONGOING_ERROR_MESSAGE) ?? false
|
||||
}
|
||||
|
||||
function isStuckFailure(failure) {
|
||||
return (
|
||||
failure.resyncAttempts != null &&
|
||||
|
||||
@@ -8,7 +8,11 @@ import logger from '@overleaf/logger'
|
||||
import Metrics from '@overleaf/metrics'
|
||||
import OError from '@overleaf/o-error'
|
||||
import { File, Range } from 'overleaf-editor-core'
|
||||
import { NeedFullProjectStructureResyncError, SyncError } from './Errors.js'
|
||||
import {
|
||||
NeedFullProjectStructureResyncError,
|
||||
SYNC_ONGOING_ERROR_MESSAGE,
|
||||
SyncError,
|
||||
} from './Errors.js'
|
||||
import { db, ObjectId } from './mongodb.js'
|
||||
import * as SnapshotManager from './SnapshotManager.js'
|
||||
import * as LockManager from './LockManager.js'
|
||||
@@ -30,6 +34,8 @@ import { isInsert, isDelete } from './Utils.js'
|
||||
*/
|
||||
const MAX_RESYNC_HISTORY_RECORDS = 100 // keep this many records of previous resyncs
|
||||
const EXPIRE_RESYNC_HISTORY_INTERVAL_MS = 90 * 24 * 3600 * 1000 // 90 days
|
||||
const SYNC_STUCK_TIMEOUT_MS = 4 * 60 * 60 * 1000 // 4 hours
|
||||
const MAX_STUCK_CLEAR_ATTEMPTS = 5
|
||||
|
||||
const keys = Settings.redis.lock.key_schema
|
||||
|
||||
@@ -93,7 +99,42 @@ async function startResyncWithoutLock(projectId, options) {
|
||||
|
||||
const syncState = await getResyncState(projectId)
|
||||
if (syncState.isSyncOngoing()) {
|
||||
throw new OError('sync ongoing')
|
||||
if (syncState.isSyncStuck()) {
|
||||
const stuckDocPaths = Array.from(syncState.resyncDocContents)
|
||||
const stuckClearCount = syncState.stuckClearCount
|
||||
|
||||
if (stuckClearCount >= MAX_STUCK_CLEAR_ATTEMPTS) {
|
||||
await _recordStuckClearInSyncState(projectId, stuckDocPaths)
|
||||
Metrics.inc('project_history_sync_stuck_permanent')
|
||||
// Log error only on first permanent-stuck detection to avoid spam
|
||||
if (stuckClearCount === MAX_STUCK_CLEAR_ATTEMPTS) {
|
||||
logger.error(
|
||||
{
|
||||
projectId,
|
||||
stuckClearCount: stuckClearCount + 1,
|
||||
stuckDocPaths,
|
||||
resyncPendingSince: syncState.resyncPendingSince,
|
||||
},
|
||||
'sync permanently stuck — exceeded auto-clear limit'
|
||||
)
|
||||
}
|
||||
throw new OError('sync permanently stuck')
|
||||
}
|
||||
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
stuckClearCount: stuckClearCount + 1,
|
||||
stuckDocPaths,
|
||||
resyncPendingSince: syncState.resyncPendingSince,
|
||||
},
|
||||
'sync stuck, clearing state and restarting'
|
||||
)
|
||||
Metrics.inc('project_history_sync_stuck_cleared')
|
||||
await _recordStuckClearInSyncState(projectId, stuckDocPaths)
|
||||
} else {
|
||||
throw new OError(SYNC_ONGOING_ERROR_MESSAGE)
|
||||
}
|
||||
}
|
||||
syncState.setOrigin(options.origin || { kind: 'history-resync' })
|
||||
syncState.startProjectStructureSync()
|
||||
@@ -167,7 +208,12 @@ async function setResyncState(projectId, syncState) {
|
||||
update.$set.expiresAt = new Date(
|
||||
Date.now() + EXPIRE_RESYNC_HISTORY_INTERVAL_MS
|
||||
)
|
||||
update.$unset = { resyncPendingSince: 1 }
|
||||
update.$unset = {
|
||||
resyncPendingSince: 1,
|
||||
stuckClearCount: 1,
|
||||
lastStuckClearAt: 1,
|
||||
lastStuckDocPaths: 1,
|
||||
}
|
||||
}
|
||||
|
||||
// apply the update
|
||||
@@ -184,6 +230,17 @@ async function clearResyncState(projectId) {
|
||||
})
|
||||
}
|
||||
|
||||
async function _recordStuckClearInSyncState(projectId, stuckDocPaths) {
|
||||
await db.projectHistorySyncState.updateOne(
|
||||
{ project_id: new ObjectId(projectId) },
|
||||
{
|
||||
$inc: { stuckClearCount: 1 },
|
||||
$set: { lastStuckClearAt: new Date(), lastStuckDocPaths: stuckDocPaths },
|
||||
$unset: { resyncPendingSince: 1 },
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {Date} date
|
||||
@@ -223,6 +280,7 @@ async function skipUpdatesDuringSync(projectId, updates) {
|
||||
if (!shouldSkipUpdate) {
|
||||
filteredUpdates.push(update)
|
||||
} else {
|
||||
Metrics.inc('project_history_sync_update_skipped')
|
||||
logger.debug({ projectId, update }, 'skipping update due to resync')
|
||||
}
|
||||
}
|
||||
@@ -297,7 +355,10 @@ class SyncState {
|
||||
resyncCount,
|
||||
resyncPendingSince,
|
||||
lastUpdated,
|
||||
history
|
||||
history,
|
||||
stuckClearCount,
|
||||
lastStuckClearAt,
|
||||
lastStuckDocPaths
|
||||
) {
|
||||
this.projectId = projectId
|
||||
this.resyncProjectStructure = resyncProjectStructure
|
||||
@@ -307,6 +368,9 @@ class SyncState {
|
||||
this.resyncPendingSince = resyncPendingSince
|
||||
this.lastUpdated = lastUpdated
|
||||
this.history = history
|
||||
this.stuckClearCount = stuckClearCount
|
||||
this.lastStuckClearAt = lastStuckClearAt
|
||||
this.lastStuckDocPaths = lastStuckDocPaths
|
||||
}
|
||||
|
||||
static fromRaw(projectId, rawSyncState) {
|
||||
@@ -336,6 +400,9 @@ class SyncState {
|
||||
}
|
||||
}
|
||||
const lastUpdated = rawSyncState.lastUpdated
|
||||
const stuckClearCount = rawSyncState.stuckClearCount ?? 0
|
||||
const lastStuckClearAt = rawSyncState.lastStuckClearAt
|
||||
const lastStuckDocPaths = rawSyncState.lastStuckDocPaths
|
||||
return new SyncState(
|
||||
projectId,
|
||||
resyncProjectStructure,
|
||||
@@ -344,7 +411,10 @@ class SyncState {
|
||||
resyncCount,
|
||||
resyncPendingSince,
|
||||
lastUpdated,
|
||||
history
|
||||
history,
|
||||
stuckClearCount,
|
||||
lastStuckClearAt,
|
||||
lastStuckDocPaths
|
||||
)
|
||||
}
|
||||
|
||||
@@ -459,6 +529,20 @@ class SyncState {
|
||||
isSyncOngoing() {
|
||||
return this.isProjectStructureSyncing() || this.isAnyDocContentSyncing()
|
||||
}
|
||||
|
||||
isSyncStuck() {
|
||||
if (!this.isSyncOngoing()) {
|
||||
return false
|
||||
}
|
||||
if (!this.resyncPendingSince) {
|
||||
// No timestamp recorded — treat long-running syncs without a timestamp
|
||||
// as potentially stuck (legacy state from before this field was added)
|
||||
return true
|
||||
}
|
||||
return (
|
||||
Date.now() - this.resyncPendingSince.getTime() > SYNC_STUCK_TIMEOUT_MS
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class SyncUpdateExpander {
|
||||
|
||||
@@ -2385,6 +2385,204 @@ describe('Syncing with web and doc-updater', function () {
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('stuck sync state (missing resyncDocContent updates)', function () {
|
||||
beforeEach(function () {
|
||||
MockHistoryStore()
|
||||
.get(`/api/projects/${historyId}/latest/history`)
|
||||
.reply(200, {
|
||||
chunk: {
|
||||
history: {
|
||||
snapshot: {
|
||||
files: {
|
||||
'main.tex': {
|
||||
hash: '0a207c060e61f3b88eaee0a8cd0696f46fb155eb',
|
||||
stringLength: 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
changes: [],
|
||||
},
|
||||
startVersion: 0,
|
||||
},
|
||||
})
|
||||
|
||||
MockHistoryStore()
|
||||
.get(
|
||||
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
|
||||
)
|
||||
.reply(200, 'a\nb')
|
||||
})
|
||||
|
||||
it('should get stuck when resyncDocContent update is missing', async function () {
|
||||
// Step 1: Start a resync
|
||||
await ProjectHistoryClient.resyncHistory(this.project_id)
|
||||
|
||||
// Step 2: Push resyncProjectStructure update (lists docs to sync)
|
||||
// but do NOT push the corresponding resyncDocContent update
|
||||
const structureUpdate = {
|
||||
projectHistoryId: historyId,
|
||||
resyncProjectStructure: {
|
||||
docs: [{ path: '/main.tex' }],
|
||||
files: [],
|
||||
},
|
||||
meta: {
|
||||
ts: this.timestamp,
|
||||
},
|
||||
}
|
||||
await ProjectHistoryClient.pushRawUpdate(
|
||||
this.project_id,
|
||||
structureUpdate
|
||||
)
|
||||
|
||||
// Step 3: Flush — processes structure update, adds /main.tex to resyncDocContents
|
||||
// This succeeds (204) but leaves sync in an incomplete state
|
||||
await ProjectHistoryClient.flushProject(this.project_id)
|
||||
|
||||
// Step 4: Verify the sync state is stuck
|
||||
const syncState = await ProjectHistoryClient.getSyncState(
|
||||
this.project_id
|
||||
)
|
||||
expect(syncState).to.not.be.null
|
||||
expect(syncState.resyncProjectStructure).to.equal(false)
|
||||
expect(syncState.resyncDocContents).to.deep.equal(['/main.tex'])
|
||||
expect(syncState.resyncPendingSince).to.be.a('date')
|
||||
// Sync thinks it's ongoing because resyncDocContents is not empty
|
||||
|
||||
// Step 5: Push a normal text update (simulating a user edit)
|
||||
const textUpdate = {
|
||||
doc: this.doc_id,
|
||||
op: [{ p: 3, i: '\nc' }],
|
||||
v: 1,
|
||||
meta: {
|
||||
ts: this.timestamp,
|
||||
pathname: '/main.tex',
|
||||
doc_length: 3,
|
||||
},
|
||||
}
|
||||
await ProjectHistoryClient.pushRawUpdate(this.project_id, textUpdate)
|
||||
|
||||
// Step 6: Flush again — the text update should be silently skipped
|
||||
// because the doc is in resyncDocContents (sync "ongoing")
|
||||
await ProjectHistoryClient.flushProject(this.project_id)
|
||||
|
||||
// Step 7: Verify the sync state is STILL stuck (text update was skipped)
|
||||
const syncStateAfter = await ProjectHistoryClient.getSyncState(
|
||||
this.project_id
|
||||
)
|
||||
expect(syncStateAfter.resyncProjectStructure).to.equal(false)
|
||||
expect(syncStateAfter.resyncDocContents).to.deep.equal(['/main.tex'])
|
||||
|
||||
// Step 7b: Verify resync-pending reports ongoing but not yet stuck
|
||||
const pending = await ProjectHistoryClient.getResyncPending(
|
||||
this.project_id
|
||||
)
|
||||
expect(pending.resyncPending).to.equal(true)
|
||||
expect(pending.syncStuck).to.equal(false)
|
||||
|
||||
// Step 8: Verify a normal resync FAILS because sync is "ongoing"
|
||||
try {
|
||||
await fetchNothing(
|
||||
`http://127.0.0.1:3054/project/${this.project_id}/resync`,
|
||||
{
|
||||
method: 'POST',
|
||||
json: { origin: { kind: 'test-origin' } },
|
||||
}
|
||||
)
|
||||
assert.fail('resync should have failed')
|
||||
} catch (error) {
|
||||
if (error instanceof RequestFailedError) {
|
||||
expect(error.response.status).to.equal(500)
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('should recover with a hard resync (force=true)', async function () {
|
||||
// Set up the stuck state (same as above)
|
||||
await ProjectHistoryClient.resyncHistory(this.project_id)
|
||||
|
||||
const structureUpdate = {
|
||||
projectHistoryId: historyId,
|
||||
resyncProjectStructure: {
|
||||
docs: [{ path: '/main.tex' }],
|
||||
files: [],
|
||||
},
|
||||
meta: {
|
||||
ts: this.timestamp,
|
||||
},
|
||||
}
|
||||
await ProjectHistoryClient.pushRawUpdate(
|
||||
this.project_id,
|
||||
structureUpdate
|
||||
)
|
||||
await ProjectHistoryClient.flushProject(this.project_id)
|
||||
|
||||
// Verify stuck
|
||||
const syncState = await ProjectHistoryClient.getSyncState(
|
||||
this.project_id
|
||||
)
|
||||
expect(syncState.resyncDocContents).to.deep.equal(['/main.tex'])
|
||||
|
||||
// Mock the web resync endpoint for the hard resync
|
||||
MockWeb()
|
||||
.post(`/project/${this.project_id}/history/resync`)
|
||||
.reply(204)
|
||||
|
||||
// Hard resync should clear the stuck state and succeed
|
||||
const response = await fetchNothing(
|
||||
`http://127.0.0.1:3054/project/${this.project_id}/resync?force=true`,
|
||||
{
|
||||
method: 'POST',
|
||||
json: { origin: { kind: 'test-origin' } },
|
||||
}
|
||||
)
|
||||
expect(response.status).to.equal(204)
|
||||
|
||||
// After hard resync, sync state should be cleared or reset
|
||||
const syncStateAfter = await ProjectHistoryClient.getSyncState(
|
||||
this.project_id
|
||||
)
|
||||
// Hard resync clears and restarts — at this point the new resync
|
||||
// should be in progress (resyncProjectStructure = true) or cleared
|
||||
// depending on whether web sent updates
|
||||
if (syncStateAfter) {
|
||||
expect(syncStateAfter.resyncProjectStructure).to.equal(true)
|
||||
}
|
||||
})
|
||||
|
||||
it('should auto-recover when stuck (null resyncPendingSince)', async function () {
|
||||
// Inject a stuck sync state: ongoing but no resyncPendingSince
|
||||
// (legacy state from before the timestamp field was added)
|
||||
await ProjectHistoryClient.injectStuckSyncState(this.project_id, [
|
||||
'/main.tex',
|
||||
])
|
||||
|
||||
// Verify the state is recognised as stuck
|
||||
const pending = await ProjectHistoryClient.getResyncPending(
|
||||
this.project_id
|
||||
)
|
||||
expect(pending.resyncPending).to.equal(true)
|
||||
expect(pending.syncStuck).to.equal(true)
|
||||
|
||||
// Mock the web callback that requestResync triggers
|
||||
MockWeb()
|
||||
.post(`/project/${this.project_id}/history/resync`)
|
||||
.reply(204)
|
||||
|
||||
// A plain resync (no force) should detect stuck, clear, and restart
|
||||
await ProjectHistoryClient.resyncHistory(this.project_id)
|
||||
|
||||
// stuckClearCount incremented and a new structure sync started
|
||||
const syncState = await ProjectHistoryClient.getSyncState(
|
||||
this.project_id
|
||||
)
|
||||
expect(syncState.stuckClearCount).to.equal(1)
|
||||
expect(syncState.resyncProjectStructure).to.equal(true)
|
||||
expect(syncState.lastStuckDocPaths).to.deep.equal(['/main.tex'])
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { expect } from 'chai'
|
||||
import Settings from '@overleaf/settings'
|
||||
import RedisWrapper from '@overleaf/redis-wrapper'
|
||||
import { db } from '../../../../app/js/mongodb.js'
|
||||
import { db, ObjectId } from '../../../../app/js/mongodb.js'
|
||||
import {
|
||||
fetchJson,
|
||||
fetchJsonWithResponse,
|
||||
@@ -223,6 +223,32 @@ export async function getFailures() {
|
||||
return failures
|
||||
}
|
||||
|
||||
export async function getSyncState(projectId) {
|
||||
return await db.projectHistorySyncState.findOne({
|
||||
project_id: new ObjectId(projectId),
|
||||
})
|
||||
}
|
||||
|
||||
export async function getResyncPending(projectId) {
|
||||
return await fetchJson(
|
||||
`http://127.0.0.1:3054/project/${projectId}/resync-pending`
|
||||
)
|
||||
}
|
||||
|
||||
export async function injectStuckSyncState(projectId, docPaths) {
|
||||
await db.projectHistorySyncState.replaceOne(
|
||||
{ project_id: new ObjectId(projectId) },
|
||||
{
|
||||
project_id: new ObjectId(projectId),
|
||||
resyncProjectStructure: false,
|
||||
resyncDocContents: docPaths,
|
||||
stuckClearCount: 0,
|
||||
history: [],
|
||||
},
|
||||
{ upsert: true }
|
||||
)
|
||||
}
|
||||
|
||||
export async function deleteProject(projectId) {
|
||||
const response = await fetchNothing(
|
||||
`http://127.0.0.1:3054/project/${projectId}`,
|
||||
|
||||
@@ -12,6 +12,8 @@ describe('RetryManager', function () {
|
||||
this.projectId2 = new ObjectId().toString()
|
||||
this.projectId3 = new ObjectId().toString()
|
||||
this.projectId4 = new ObjectId().toString()
|
||||
this.projectId5 = new ObjectId().toString()
|
||||
this.projectId6 = new ObjectId().toString()
|
||||
this.historyId = 12345
|
||||
|
||||
this.WebApiManager = {
|
||||
@@ -39,13 +41,25 @@ describe('RetryManager', function () {
|
||||
},
|
||||
{
|
||||
project_id: this.projectId3,
|
||||
error: 'sync ongoing',
|
||||
attempts: 10,
|
||||
error: 'OpsOutOfOrderError: doc version out of order',
|
||||
attempts: 5,
|
||||
resyncAttempts: 1,
|
||||
},
|
||||
{
|
||||
project_id: this.projectId4,
|
||||
error: 'sync ongoing',
|
||||
error: 'OpsOutOfOrderError: doc version out of order',
|
||||
attempts: 5,
|
||||
resyncAttempts: 2,
|
||||
},
|
||||
{
|
||||
project_id: this.projectId5,
|
||||
error: 'OError: sync ongoing',
|
||||
attempts: 10,
|
||||
resyncAttempts: 1,
|
||||
},
|
||||
{
|
||||
project_id: this.projectId6,
|
||||
error: 'OError: sync ongoing',
|
||||
attempts: 10,
|
||||
resyncAttempts: 2,
|
||||
},
|
||||
@@ -129,6 +143,24 @@ describe('RetryManager', function () {
|
||||
).not.to.have.been.calledWith(this.projectId4)
|
||||
})
|
||||
|
||||
it('should use soft resync for sync ongoing failures regardless of resyncAttempts', function () {
|
||||
expect(this.SyncManager.promises.startResync).to.have.been.calledWith(
|
||||
this.projectId5
|
||||
)
|
||||
expect(
|
||||
this.SyncManager.promises.startHardResync
|
||||
).not.to.have.been.calledWith(this.projectId5)
|
||||
})
|
||||
|
||||
it('should retry stuck sync ongoing failures via soft resync', function () {
|
||||
expect(this.SyncManager.promises.startResync).to.have.been.calledWith(
|
||||
this.projectId6
|
||||
)
|
||||
expect(
|
||||
this.SyncManager.promises.startHardResync
|
||||
).not.to.have.been.calledWith(this.projectId6)
|
||||
})
|
||||
|
||||
it('should count the unprocessed updates', function () {
|
||||
expect(
|
||||
this.RedisManager.promises.countUnprocessedUpdates
|
||||
|
||||
@@ -194,11 +194,11 @@ describe('SyncManager', function () {
|
||||
project_id: new ObjectId(this.projectId),
|
||||
},
|
||||
sinon.match({
|
||||
$set: {
|
||||
$set: sinon.match({
|
||||
resyncProjectStructure: true,
|
||||
resyncDocContents: [],
|
||||
origin: { kind: 'history-resync' },
|
||||
},
|
||||
}),
|
||||
$currentDate: { lastUpdated: true },
|
||||
$inc: { resyncCount: 1 },
|
||||
$unset: { expiresAt: true },
|
||||
@@ -212,7 +212,10 @@ describe('SyncManager', function () {
|
||||
|
||||
describe('if project structure sync is in progress', function () {
|
||||
beforeEach(function () {
|
||||
const syncState = { resyncProjectStructure: true }
|
||||
const syncState = {
|
||||
resyncProjectStructure: true,
|
||||
resyncPendingSince: new Date(),
|
||||
}
|
||||
this.db.projectHistorySyncState.findOne.resolves(syncState)
|
||||
})
|
||||
|
||||
@@ -225,7 +228,10 @@ describe('SyncManager', function () {
|
||||
|
||||
describe('if doc content sync in is progress', function () {
|
||||
beforeEach(async function () {
|
||||
const syncState = { resyncDocContents: ['/foo.tex'] }
|
||||
const syncState = {
|
||||
resyncDocContents: ['/foo.tex'],
|
||||
resyncPendingSince: new Date(),
|
||||
}
|
||||
this.db.projectHistorySyncState.findOne.resolves(syncState)
|
||||
})
|
||||
|
||||
@@ -235,6 +241,112 @@ describe('SyncManager', function () {
|
||||
).to.be.rejectedWith('sync ongoing')
|
||||
})
|
||||
})
|
||||
|
||||
describe('if sync is stuck (no resyncPendingSince — legacy state)', function () {
|
||||
beforeEach(function () {
|
||||
const syncState = { resyncDocContents: ['/foo.tex'] }
|
||||
this.db.projectHistorySyncState.findOne.resolves(syncState)
|
||||
})
|
||||
|
||||
it('records the stuck clear, does not delete state, and starts a new resync', async function () {
|
||||
await this.SyncManager.promises.startResync(this.projectId)
|
||||
expect(
|
||||
this.db.projectHistorySyncState.updateOne
|
||||
).to.have.been.calledWith(
|
||||
{ project_id: new ObjectId(this.projectId) },
|
||||
sinon.match({
|
||||
$inc: { stuckClearCount: 1 },
|
||||
$unset: { resyncPendingSince: 1 },
|
||||
})
|
||||
)
|
||||
expect(this.WebApiManager.promises.requestResync.calledOnce).to.be.true
|
||||
expect(
|
||||
this.Metrics.inc.calledWith('project_history_sync_stuck_cleared')
|
||||
).to.be.true
|
||||
})
|
||||
})
|
||||
|
||||
describe('if sync is stuck (resyncPendingSince older than timeout)', function () {
|
||||
beforeEach(function () {
|
||||
const fiveHoursAgo = new Date(Date.now() - 5 * 60 * 60 * 1000)
|
||||
const syncState = {
|
||||
resyncDocContents: ['/foo.tex'],
|
||||
resyncPendingSince: fiveHoursAgo,
|
||||
}
|
||||
this.db.projectHistorySyncState.findOne.resolves(syncState)
|
||||
})
|
||||
|
||||
it('records the stuck clear and starts a new resync', async function () {
|
||||
await this.SyncManager.promises.startResync(this.projectId)
|
||||
expect(
|
||||
this.db.projectHistorySyncState.updateOne
|
||||
).to.have.been.calledWith(
|
||||
{ project_id: new ObjectId(this.projectId) },
|
||||
sinon.match({
|
||||
$inc: { stuckClearCount: 1 },
|
||||
$unset: { resyncPendingSince: 1 },
|
||||
})
|
||||
)
|
||||
expect(this.WebApiManager.promises.requestResync.calledOnce).to.be.true
|
||||
})
|
||||
})
|
||||
|
||||
describe('if sync is not stuck (resyncPendingSince within timeout)', function () {
|
||||
beforeEach(function () {
|
||||
const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000)
|
||||
const syncState = {
|
||||
resyncDocContents: ['/foo.tex'],
|
||||
resyncPendingSince: threeHoursAgo,
|
||||
}
|
||||
this.db.projectHistorySyncState.findOne.resolves(syncState)
|
||||
})
|
||||
|
||||
it('throws sync ongoing and does not clear', async function () {
|
||||
await expect(
|
||||
this.SyncManager.promises.startResync(this.projectId)
|
||||
).to.be.rejectedWith('sync ongoing')
|
||||
expect(
|
||||
this.Metrics.inc.calledWith('project_history_sync_stuck_cleared')
|
||||
).to.be.false
|
||||
expect(
|
||||
this.Metrics.inc.calledWith('project_history_sync_stuck_permanent')
|
||||
).to.be.false
|
||||
})
|
||||
})
|
||||
|
||||
describe('if sync is permanently stuck (exceeded retry limit)', function () {
|
||||
beforeEach(function () {
|
||||
const syncState = {
|
||||
resyncDocContents: ['/foo.tex'],
|
||||
stuckClearCount: 5,
|
||||
}
|
||||
this.db.projectHistorySyncState.findOne.resolves(syncState)
|
||||
})
|
||||
|
||||
it('throws permanently stuck, emits metric, and records the attempt', async function () {
|
||||
await expect(
|
||||
this.SyncManager.promises.startResync(this.projectId)
|
||||
).to.be.rejectedWith('sync permanently stuck')
|
||||
expect(
|
||||
this.Metrics.inc.calledWith('project_history_sync_stuck_permanent')
|
||||
).to.be.true
|
||||
expect(
|
||||
this.db.projectHistorySyncState.updateOne
|
||||
).to.have.been.calledWith(
|
||||
{ project_id: new ObjectId(this.projectId) },
|
||||
sinon.match({ $inc: { stuckClearCount: 1 } })
|
||||
)
|
||||
})
|
||||
|
||||
it('does not attempt a resync', async function () {
|
||||
try {
|
||||
await this.SyncManager.promises.startResync(this.projectId)
|
||||
} catch (e) {
|
||||
// expected
|
||||
}
|
||||
expect(this.WebApiManager.promises.requestResync.called).to.be.false
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('setResyncState', function () {
|
||||
@@ -265,7 +377,11 @@ describe('SyncManager', function () {
|
||||
).to.have.been.calledWith(
|
||||
{ project_id: new ObjectId(this.projectId) },
|
||||
sinon.match({
|
||||
$set: this.syncState.toRaw(),
|
||||
$set: sinon.match({
|
||||
resyncProjectStructure: true,
|
||||
resyncDocContents: [],
|
||||
origin: { kind: 'history-resync' },
|
||||
}),
|
||||
$currentDate: { lastUpdated: true },
|
||||
$inc: { resyncCount: 1 },
|
||||
$unset: { expiresAt: true },
|
||||
@@ -289,7 +405,7 @@ describe('SyncManager', function () {
|
||||
}
|
||||
})
|
||||
|
||||
it('sets the sync state entry in mongo to expire', async function () {
|
||||
it('sets the sync state entry in mongo to expire and clears stuck fields', async function () {
|
||||
await this.SyncManager.promises.setResyncState(
|
||||
this.projectId,
|
||||
this.syncState
|
||||
@@ -307,6 +423,12 @@ describe('SyncManager', function () {
|
||||
expiresAt: new Date(this.now.getTime() + 90 * 24 * 3600 * 1000),
|
||||
},
|
||||
$currentDate: { lastUpdated: true },
|
||||
$unset: {
|
||||
resyncPendingSince: 1,
|
||||
stuckClearCount: 1,
|
||||
lastStuckClearAt: 1,
|
||||
lastStuckDocPaths: 1,
|
||||
},
|
||||
}),
|
||||
{ upsert: true }
|
||||
)
|
||||
@@ -2005,4 +2127,47 @@ describe('SyncManager', function () {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('stuck sync state reproduction', function () {
|
||||
describe('when resyncDocContents has entries but no resyncDocContent update arrives', function () {
|
||||
beforeEach(function () {
|
||||
// Simulate the stuck state: project structure sync is done,
|
||||
// but doc content sync never completed for /main.tex
|
||||
const stuckSyncState = {
|
||||
resyncProjectStructure: false,
|
||||
resyncDocContents: ['/main.tex'],
|
||||
origin: { kind: 'history-resync' },
|
||||
}
|
||||
this.db.projectHistorySyncState.findOne.resolves(stuckSyncState)
|
||||
})
|
||||
|
||||
it('should skip text updates for a doc stuck in resyncDocContents', async function () {
|
||||
const textUpdate = {
|
||||
doc: new ObjectId(),
|
||||
op: [{ i: 'hello', p: 0 }],
|
||||
meta: {
|
||||
pathname: '/main.tex',
|
||||
doc_length: 0,
|
||||
},
|
||||
}
|
||||
this.UpdateTranslator.isTextUpdate.returns(false)
|
||||
this.UpdateTranslator.isTextUpdate.withArgs(textUpdate).returns(true)
|
||||
|
||||
const { updates: filteredUpdates, syncState } =
|
||||
await this.SyncManager.promises.skipUpdatesDuringSync(
|
||||
this.projectId,
|
||||
[textUpdate]
|
||||
)
|
||||
|
||||
// The text update is silently dropped — this is the stuck state bug
|
||||
expect(filteredUpdates).to.deep.equal([])
|
||||
// Sync state still shows /main.tex as syncing
|
||||
expect(syncState.toRaw().resyncDocContents).to.deep.equal(['/main.tex'])
|
||||
// Metric is emitted for the skipped update
|
||||
expect(
|
||||
this.Metrics.inc.calledWith('project_history_sync_update_skipped')
|
||||
).to.be.true
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user