mirror of
https://github.com/yu-i-i/overleaf-cep.git
synced 2026-05-23 09:09:36 +02:00
[web] add admin page for project history failures (#32751)
GitOrigin-RevId: c9564e07239b761851c9a86894f9ef3023dfcf2b
This commit is contained in:
@@ -11,15 +11,18 @@ import { db } from './mongodb.js'
|
||||
*/
|
||||
|
||||
/**
|
||||
* @template {{error?: string}} T
|
||||
* @template {{error: string}|{}} T
|
||||
* @param {T} failure
|
||||
* @return {T}
|
||||
*/
|
||||
function normalizeFailure(failure) {
|
||||
return {
|
||||
...failure,
|
||||
error: failure.error?.replace('OError:', 'Error:'),
|
||||
if ('error' in failure && failure.error?.includes('OError:')) {
|
||||
return {
|
||||
...failure,
|
||||
error: failure.error.replace('OError:', 'Error:'),
|
||||
}
|
||||
}
|
||||
return failure
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -149,10 +152,10 @@ async function getFailuresByType() {
|
||||
const maxQueueSize = {}
|
||||
// count all the failures and number of attempts by type
|
||||
for (const result of results || []) {
|
||||
const failureType = result.error
|
||||
const failureType = 'error' in result ? result.error : 'resync'
|
||||
const attempts = result.attempts || 1 // allow for field to be absent
|
||||
const requests = result.requestCount || 0
|
||||
const queueSize = result.queueSize || 0
|
||||
const queueSize = 'queueSize' in result ? result.queueSize : 0
|
||||
if (failureCounts[failureType] > 0) {
|
||||
failureCounts[failureType]++
|
||||
failureAttempts[failureType] += attempts
|
||||
@@ -169,45 +172,60 @@ async function getFailuresByType() {
|
||||
return { failureCounts, failureAttempts, failureRequests, maxQueueSize }
|
||||
}
|
||||
|
||||
/**
|
||||
* Mapping between error messages and short labels.
|
||||
* @type {Record<string, string>}
|
||||
*/
|
||||
const SHORT_ERROR_NAMES = {
|
||||
'Error: bad response from filestore: 404': 'filestore-404',
|
||||
'Error: bad response from filestore: 500': 'filestore-500',
|
||||
'NotFoundError: got a 404 from web api': 'web-api-404',
|
||||
'Error: history store a non-success status code: 413': 'history-store-413',
|
||||
'Error: history store a non-success status code: 422': 'history-store-422',
|
||||
'Error: history store a non-success status code: 500': 'history-store-500',
|
||||
'Error: history store a non-success status code: 503': 'history-store-503',
|
||||
'Error: web returned a non-success status code: 500 (attempts: 2)': 'web-500',
|
||||
'Error: ESOCKETTIMEDOUT': 'socket-timeout',
|
||||
'Error: no project found': 'no-project-found',
|
||||
'OpsOutOfOrderError: project structure version out of order on incoming updates':
|
||||
'incoming-project-version-out-of-order',
|
||||
'OpsOutOfOrderError: doc version out of order on incoming updates':
|
||||
'incoming-doc-version-out-of-order',
|
||||
'OpsOutOfOrderError: project structure version out of order':
|
||||
'chunk-project-version-out-of-order',
|
||||
'OpsOutOfOrderError: doc version out of order':
|
||||
'chunk-doc-version-out-of-order',
|
||||
'Error: failed to extend lock': 'lock-overrun',
|
||||
'Error: tried to release timed out lock': 'lock-overrun',
|
||||
'Error: Timeout': 'lock-overrun',
|
||||
'Error: sync ongoing': 'sync-ongoing',
|
||||
'SyncError: unexpected resyncProjectStructure update': 'sync-error',
|
||||
'[object Error]': 'unknown-error-object',
|
||||
'UpdateWithUnknownFormatError: update with unknown format': 'unknown-format',
|
||||
'Error: update with unknown format': 'unknown-format',
|
||||
'TextOperationError: The base length of the second operation has to be the target length of the first operation':
|
||||
'text-op-error',
|
||||
'Error: ENOSPC: no space left on device, write': 'ENOSPC',
|
||||
'*': 'other',
|
||||
}
|
||||
|
||||
async function getFailuresFull() {
|
||||
const results = []
|
||||
for await (const failure of await getFailedProjects()) {
|
||||
results.push({
|
||||
category:
|
||||
'error' in failure ? SHORT_ERROR_NAMES[failure.error] : undefined,
|
||||
...failure,
|
||||
})
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
async function getFailures() {
|
||||
const { failureCounts, failureAttempts, failureRequests, maxQueueSize } =
|
||||
await getFailuresByType()
|
||||
|
||||
let attempts, failureType, label, requests
|
||||
const shortNames = {
|
||||
'Error: bad response from filestore: 404': 'filestore-404',
|
||||
'Error: bad response from filestore: 500': 'filestore-500',
|
||||
'NotFoundError: got a 404 from web api': 'web-api-404',
|
||||
'Error: history store a non-success status code: 413': 'history-store-413',
|
||||
'Error: history store a non-success status code: 422': 'history-store-422',
|
||||
'Error: history store a non-success status code: 500': 'history-store-500',
|
||||
'Error: history store a non-success status code: 503': 'history-store-503',
|
||||
'Error: web returned a non-success status code: 500 (attempts: 2)':
|
||||
'web-500',
|
||||
'Error: ESOCKETTIMEDOUT': 'socket-timeout',
|
||||
'Error: no project found': 'no-project-found',
|
||||
'OpsOutOfOrderError: project structure version out of order on incoming updates':
|
||||
'incoming-project-version-out-of-order',
|
||||
'OpsOutOfOrderError: doc version out of order on incoming updates':
|
||||
'incoming-doc-version-out-of-order',
|
||||
'OpsOutOfOrderError: project structure version out of order':
|
||||
'chunk-project-version-out-of-order',
|
||||
'OpsOutOfOrderError: doc version out of order':
|
||||
'chunk-doc-version-out-of-order',
|
||||
'Error: failed to extend lock': 'lock-overrun',
|
||||
'Error: tried to release timed out lock': 'lock-overrun',
|
||||
'Error: Timeout': 'lock-overrun',
|
||||
'Error: sync ongoing': 'sync-ongoing',
|
||||
'SyncError: unexpected resyncProjectStructure update': 'sync-error',
|
||||
'[object Error]': 'unknown-error-object',
|
||||
'UpdateWithUnknownFormatError: update with unknown format':
|
||||
'unknown-format',
|
||||
'Error: update with unknown format': 'unknown-format',
|
||||
'TextOperationError: The base length of the second operation has to be the target length of the first operation':
|
||||
'text-op-error',
|
||||
'Error: ENOSPC: no space left on device, write': 'ENOSPC',
|
||||
'*': 'other',
|
||||
}
|
||||
|
||||
// set all the known errors to zero if not present (otherwise gauges stay on their last value)
|
||||
const summaryCounts = {}
|
||||
@@ -215,8 +233,8 @@ async function getFailures() {
|
||||
const summaryRequests = {}
|
||||
const summaryMaxQueueSize = {}
|
||||
|
||||
for (failureType in shortNames) {
|
||||
label = shortNames[failureType]
|
||||
for (failureType in SHORT_ERROR_NAMES) {
|
||||
label = SHORT_ERROR_NAMES[failureType]
|
||||
summaryCounts[label] = 0
|
||||
summaryAttempts[label] = 0
|
||||
summaryRequests[label] = 0
|
||||
@@ -226,7 +244,7 @@ async function getFailures() {
|
||||
// record a metric for each type of failure
|
||||
for (failureType in failureCounts) {
|
||||
const failureCount = failureCounts[failureType]
|
||||
label = shortNames[failureType] || shortNames['*']
|
||||
label = SHORT_ERROR_NAMES[failureType] || SHORT_ERROR_NAMES['*']
|
||||
summaryCounts[label] += failureCount
|
||||
summaryAttempts[label] += failureAttempts[failureType]
|
||||
summaryRequests[label] += failureRequests[failureType]
|
||||
@@ -266,6 +284,7 @@ async function getFailures() {
|
||||
|
||||
// EXPORTS
|
||||
|
||||
const getFailuresFullCb = callbackify(getFailuresFull)
|
||||
const getFailedProjectsCb = callbackify(getFailedProjects)
|
||||
const getFailureRecordCb = callbackify(getFailureRecord)
|
||||
const getFailuresCb = callbackify(getFailures)
|
||||
@@ -278,6 +297,7 @@ const setForceDebugCb = callbackify(setForceDebug)
|
||||
|
||||
export {
|
||||
cloneFailureCb as cloneFailure,
|
||||
getFailuresFullCb as getFailuresFull,
|
||||
getFailedProjectsCb as getFailedProjects,
|
||||
getFailureRecordCb as getFailureRecord,
|
||||
getLastFailureCb as getLastFailure,
|
||||
|
||||
@@ -719,6 +719,13 @@ export function getFailures(req, res, next) {
|
||||
})
|
||||
}
|
||||
|
||||
export function getFailuresFull(req, res, next) {
|
||||
ErrorRecorder.getFailuresFull((error, result) => {
|
||||
if (error) return next(error)
|
||||
res.send(result)
|
||||
})
|
||||
}
|
||||
|
||||
export function getQueueCounts(req, res, next) {
|
||||
RedisManager.getProjectIdsWithHistoryOpsCount((err, queuedProjectsCount) => {
|
||||
if (err != null) {
|
||||
|
||||
@@ -87,6 +87,8 @@ export function initialize(app) {
|
||||
|
||||
app.get('/status/failures', HttpController.getFailures)
|
||||
|
||||
app.get('/status/failures-full', HttpController.getFailuresFull)
|
||||
|
||||
app.get('/status/queue', HttpController.getQueueCounts)
|
||||
|
||||
app.post('/retry/failures', HttpController.retryFailures)
|
||||
|
||||
@@ -7,8 +7,10 @@ export type ProjectHistoryFailure = {
|
||||
resyncAttempts: number
|
||||
resyncStartedAt: Date
|
||||
requestCount?: number
|
||||
history: (ErrorRecord | SyncStartRecord)[]
|
||||
} & ErrorRecord
|
||||
history: FailureRecord[]
|
||||
} & FailureRecord
|
||||
|
||||
type FailureRecord = ErrorRecord | SyncStartRecord
|
||||
|
||||
type ErrorRecord = {
|
||||
error: string
|
||||
|
||||
@@ -70,6 +70,11 @@ describe('Retrying failed projects', function () {
|
||||
attempts: 1,
|
||||
error: 'OError: ESOCKETTIMEDOUT',
|
||||
},
|
||||
{
|
||||
project_id: new ObjectId(),
|
||||
attempts: 1,
|
||||
resyncStartedAt: new Date(),
|
||||
},
|
||||
])
|
||||
|
||||
const body = await ProjectHistoryClient.getFailures()
|
||||
@@ -77,9 +82,11 @@ describe('Retrying failed projects', function () {
|
||||
_.merge(baseLineFailures, {
|
||||
attempts: {
|
||||
'socket-timeout': 2,
|
||||
other: 1,
|
||||
},
|
||||
counts: {
|
||||
'socket-timeout': 2,
|
||||
other: 1,
|
||||
},
|
||||
})
|
||||
)
|
||||
|
||||
@@ -316,6 +316,12 @@ async function getDebugInfo(projectId) {
|
||||
)
|
||||
}
|
||||
|
||||
async function getHistoryFailures() {
|
||||
return await fetchJson(
|
||||
`${settings.apis.project_history.url}/status/failures-full`
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get history changes since a given version
|
||||
*
|
||||
@@ -492,5 +498,6 @@ export default {
|
||||
getLatestHistoryWithHistoryId,
|
||||
ensureNoResyncPending,
|
||||
getDebugInfo,
|
||||
getHistoryFailures,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -68,6 +68,7 @@ import { AlgoliaConfig } from '../../../modules/algolia-search/frontend/js/types
|
||||
import { WritefullPublicEnv } from '@wf/domain/writefull-public-env'
|
||||
import { UserNotificationPreferences } from '../../../types/notifications'
|
||||
import { SharingPermissions } from '../../../modules/sharing-permissions/app/src/types'
|
||||
import { FullHistoryFailure } from '@ol-types/history/projectHistory'
|
||||
|
||||
export interface Meta {
|
||||
'ol-ExposedSettings': ExposedSettings
|
||||
@@ -249,6 +250,7 @@ export interface Meta {
|
||||
'ol-primaryEmail': { email: string; confirmed: boolean }
|
||||
'ol-project': any // TODO
|
||||
'ol-projectEntityCounts': { files: number; docs: number }
|
||||
'ol-projectHistoryFailures': FullHistoryFailure[]
|
||||
'ol-projectName': string
|
||||
'ol-projectSyncSuccessMessage': string
|
||||
'ol-projectTags': Tag[]
|
||||
|
||||
50
services/web/types/history/projectHistory.ts
Normal file
50
services/web/types/history/projectHistory.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
export type FullHistoryFailure = {
|
||||
category?: string
|
||||
} & ProjectHistoryFailureRecordSchema
|
||||
|
||||
export type ProjectHistoryFailureRecordSchema = {
|
||||
_id: string
|
||||
project_id: string
|
||||
attempts: number
|
||||
resyncAttempts: number
|
||||
resyncStartedAt: string
|
||||
requestCount?: number
|
||||
history: ProjectHistoryFailureEntrySchema[]
|
||||
} & ProjectHistoryFailureEntrySchema
|
||||
|
||||
export type ProjectHistoryFailureEntrySchema =
|
||||
| SyncStartRecordSchema
|
||||
| ErrorRecordSchema
|
||||
|
||||
export type ErrorRecordSchema = {
|
||||
error: string
|
||||
stack: string
|
||||
queueSize: number
|
||||
ts: string
|
||||
}
|
||||
|
||||
export type SyncStartRecordSchema = {
|
||||
resyncStartedAt: string
|
||||
}
|
||||
|
||||
export type SyncStateSchema = {
|
||||
resyncProjectStructure: boolean
|
||||
resyncDocContents: string[]
|
||||
origin: { kind: string }
|
||||
}
|
||||
|
||||
export type SyncStateHistoryEntry = {
|
||||
syncState: SyncStateSchema
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
export type HistoryDebugInfoResponse = {
|
||||
failureRecord?: ProjectHistoryFailureRecordSchema
|
||||
syncState: SyncStateSchema & {
|
||||
resyncPending: boolean
|
||||
resyncCount: number
|
||||
resyncPendingSince?: string
|
||||
lastUpdated: string
|
||||
history: SyncStateHistoryEntry[]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user