feat: monitor add max retries to avoid network fluctuation
This commit is contained in:
parent
461e23be2f
commit
97d55da454
@ -19,6 +19,7 @@ const defaultValues: Omit<MonitorInfoEditorValues, 'payload'> = {
|
||||
type: monitorProviders[0].name,
|
||||
active: true,
|
||||
interval: 60,
|
||||
maxRetries: 0,
|
||||
};
|
||||
|
||||
interface MonitorInfoEditorProps {
|
||||
@ -90,6 +91,14 @@ export const MonitorInfoEditor: React.FC<MonitorInfoEditorProps> = React.memo(
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
label="Max Retries"
|
||||
name="maxRetries"
|
||||
tooltip="Maximum retries before the service is marked as down and a notification is sent"
|
||||
>
|
||||
<InputNumber min={0} max={10} defaultValue={0} />
|
||||
</Form.Item>
|
||||
|
||||
{formEl}
|
||||
|
||||
<Form.Item label="Notification" name="notificationIds">
|
||||
|
@ -1,4 +1,5 @@
|
||||
import { z } from 'zod';
|
||||
import { MonitorModelSchema } from '../../prisma/zod';
|
||||
|
||||
// Match prisma `JsonValue`
|
||||
export const jsonFieldSchema = z.union([
|
||||
@ -53,18 +54,7 @@ export const websiteInfoSchema = z.object({
|
||||
deletedAt: z.date().nullable(),
|
||||
});
|
||||
|
||||
export const monitorInfoSchema = z.object({
|
||||
id: z.string(),
|
||||
workspaceId: z.string(),
|
||||
name: z.string(),
|
||||
type: z.string(),
|
||||
active: z.boolean(),
|
||||
interval: z.number(),
|
||||
payload: jsonFieldSchema,
|
||||
createdAt: z.date(),
|
||||
});
|
||||
|
||||
export const monitorInfoWithNotificationIdSchema = monitorInfoSchema.and(
|
||||
export const monitorInfoWithNotificationIdSchema = MonitorModelSchema.and(
|
||||
z.object({
|
||||
notifications: z.array(z.object({ id: z.string() })),
|
||||
})
|
||||
|
@ -5,7 +5,7 @@ import { logger } from '../../utils/logger';
|
||||
|
||||
export type MonitorUpsertData = Pick<
|
||||
Monitor,
|
||||
'workspaceId' | 'name' | 'type' | 'interval'
|
||||
'workspaceId' | 'name' | 'type' | 'interval' | 'maxRetries'
|
||||
> & {
|
||||
id?: string;
|
||||
active?: boolean;
|
||||
|
@ -15,6 +15,7 @@ import { createAuditLog } from '../auditLog';
|
||||
export class MonitorRunner {
|
||||
isStopped = false;
|
||||
timer: NodeJS.Timeout | null = null;
|
||||
retriedNum = 0;
|
||||
|
||||
constructor(public monitor: Monitor & { notifications: Notification[] }) {}
|
||||
|
||||
@ -23,7 +24,7 @@ export class MonitorRunner {
|
||||
*/
|
||||
async startMonitor() {
|
||||
const monitor = this.monitor;
|
||||
const { type, interval, workspaceId } = monitor;
|
||||
const { type, interval, workspaceId, maxRetries } = monitor;
|
||||
|
||||
const provider = monitorProviders[type];
|
||||
if (!provider) {
|
||||
@ -58,30 +59,41 @@ export class MonitorRunner {
|
||||
value = -1;
|
||||
}
|
||||
|
||||
// check event update
|
||||
if (value < 0 && currentStatus === 'UP') {
|
||||
await this.createEvent(
|
||||
'DOWN',
|
||||
`Monitor [${monitor.name}] has been down`
|
||||
);
|
||||
await this.notify(`[${monitor.name}] 🔴 Down`, [
|
||||
token.text(
|
||||
`[${monitor.name}] 🔴 Down\nTime: ${dayjs().format(
|
||||
'YYYY-MM-DD HH:mm:ss (z)'
|
||||
)}`
|
||||
),
|
||||
]);
|
||||
currentStatus = 'DOWN';
|
||||
} else if (value > 0 && currentStatus === 'DOWN') {
|
||||
await this.createEvent('UP', `Monitor [${monitor.name}] has been up`);
|
||||
await this.notify(`[${monitor.name}] ✅ Up`, [
|
||||
token.text(
|
||||
`[${monitor.name}] ✅ Up\nTime: ${dayjs().format(
|
||||
'YYYY-MM-DD HH:mm:ss (z)'
|
||||
)}`
|
||||
),
|
||||
]);
|
||||
currentStatus = 'UP';
|
||||
if (this.retriedNum < maxRetries) {
|
||||
// can be retry
|
||||
this.retriedNum++;
|
||||
} else {
|
||||
// check event update
|
||||
if (value < 0 && currentStatus === 'UP') {
|
||||
// UP -> DOWN
|
||||
await this.createEvent(
|
||||
'DOWN',
|
||||
`Monitor [${monitor.name}] has been down`
|
||||
);
|
||||
await this.notify(`[${monitor.name}] 🔴 Down`, [
|
||||
token.text(
|
||||
`[${monitor.name}] 🔴 Down\nTime: ${dayjs().format(
|
||||
'YYYY-MM-DD HH:mm:ss (z)'
|
||||
)}`
|
||||
),
|
||||
]);
|
||||
currentStatus = 'DOWN';
|
||||
} else if (value > 0 && currentStatus === 'DOWN') {
|
||||
// DOWN -> UP
|
||||
this.retriedNum = 0;
|
||||
await this.createEvent(
|
||||
'UP',
|
||||
`Monitor [${monitor.name}] has been up`
|
||||
);
|
||||
await this.notify(`[${monitor.name}] ✅ Up`, [
|
||||
token.text(
|
||||
`[${monitor.name}] ✅ Up\nTime: ${dayjs().format(
|
||||
'YYYY-MM-DD HH:mm:ss (z)'
|
||||
)}`
|
||||
),
|
||||
]);
|
||||
currentStatus = 'UP';
|
||||
}
|
||||
}
|
||||
|
||||
// insert into data
|
||||
@ -93,9 +105,6 @@ export class MonitorRunner {
|
||||
});
|
||||
|
||||
subscribeEventBus.emit('onMonitorReceiveNewData', workspaceId, data);
|
||||
|
||||
// Run next loop
|
||||
nextAction();
|
||||
} catch (err) {
|
||||
logger.error('[Monitor] Run monitor error,', monitor.id, String(err));
|
||||
createAuditLog({
|
||||
@ -104,6 +113,9 @@ export class MonitorRunner {
|
||||
relatedType: 'Monitor',
|
||||
content: `Run monitor(id: ${monitor.id}) error: ${String(err)}`,
|
||||
});
|
||||
} finally {
|
||||
// Run next loop
|
||||
nextAction();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
-- AlterTable
|
||||
ALTER TABLE "Monitor" ADD COLUMN "maxRetries" INTEGER NOT NULL DEFAULT 0;
|
@ -265,6 +265,7 @@ model Monitor {
|
||||
type String @db.VarChar(100)
|
||||
active Boolean @default(true) @db.Boolean
|
||||
interval Int @default(20) @db.Integer
|
||||
maxRetries Int @default(0)
|
||||
// TODO
|
||||
// maxRetry Int @default(0) @db.Integer
|
||||
// retryInterval Int @default(0) @db.Integer
|
||||
|
@ -15,6 +15,7 @@ export const MonitorModelSchema = z.object({
|
||||
type: z.string(),
|
||||
active: z.boolean(),
|
||||
interval: z.number().int(),
|
||||
maxRetries: z.number().int(),
|
||||
/**
|
||||
* [CommonPayload]
|
||||
*/
|
||||
|
@ -16,13 +16,15 @@ import {
|
||||
import dayjs from 'dayjs';
|
||||
import {
|
||||
monitorEventSchema,
|
||||
monitorInfoSchema,
|
||||
monitorInfoWithNotificationIdSchema,
|
||||
monitorStatusSchema,
|
||||
} from '../../model/_schema';
|
||||
import { OPENAPI_TAG } from '../../utils/const';
|
||||
import { OpenApiMeta } from 'trpc-openapi';
|
||||
import { MonitorStatusPageModelSchema } from '../../prisma/zod';
|
||||
import {
|
||||
MonitorModelSchema,
|
||||
MonitorStatusPageModelSchema,
|
||||
} from '../../prisma/zod';
|
||||
import { runCodeInVM } from '../../model/monitor/provider/custom';
|
||||
import { createAuditLog } from '../../model/auditLog';
|
||||
import {
|
||||
@ -124,11 +126,12 @@ export const monitorRouter = router({
|
||||
type: z.string(),
|
||||
active: z.boolean().default(true),
|
||||
interval: z.number().int().min(5).max(10000).default(20),
|
||||
maxRetries: z.number().int().min(0).max(10).default(0),
|
||||
notificationIds: z.array(z.string()).default([]),
|
||||
payload: z.object({}).passthrough(),
|
||||
})
|
||||
)
|
||||
.output(monitorInfoSchema)
|
||||
.output(MonitorModelSchema)
|
||||
.mutation(async ({ input }) => {
|
||||
const {
|
||||
id,
|
||||
@ -137,6 +140,7 @@ export const monitorRouter = router({
|
||||
type,
|
||||
active,
|
||||
interval,
|
||||
maxRetries,
|
||||
notificationIds,
|
||||
payload,
|
||||
} = input;
|
||||
@ -148,6 +152,7 @@ export const monitorRouter = router({
|
||||
type,
|
||||
active,
|
||||
interval,
|
||||
maxRetries,
|
||||
notificationIds,
|
||||
payload,
|
||||
});
|
||||
@ -166,7 +171,7 @@ export const monitorRouter = router({
|
||||
monitorId: z.string().cuid2(),
|
||||
})
|
||||
)
|
||||
.output(monitorInfoSchema)
|
||||
.output(MonitorModelSchema)
|
||||
.mutation(async ({ input }) => {
|
||||
const { workspaceId, monitorId } = input;
|
||||
|
||||
@ -239,7 +244,7 @@ export const monitorRouter = router({
|
||||
active: z.boolean(),
|
||||
})
|
||||
)
|
||||
.output(monitorInfoSchema)
|
||||
.output(MonitorModelSchema)
|
||||
.mutation(async ({ input, ctx }) => {
|
||||
const { workspaceId, monitorId, active } = input;
|
||||
const user = ctx.user;
|
||||
|
Loading…
Reference in New Issue
Block a user