From 5214ae17601856d349db98a5fa1d071dd2d72829 Mon Sep 17 00:00:00 2001 From: yusing Date: Thu, 7 Nov 2024 11:44:01 +0800 Subject: [PATCH] uptime metrics --- internal/metrics/http_metrics.go | 29 +++++++++++++++++++++++++++-- internal/watcher/health/monitor.go | 12 ++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/internal/metrics/http_metrics.go b/internal/metrics/http_metrics.go index fde8ea1..8d280e0 100644 --- a/internal/metrics/http_metrics.go +++ b/internal/metrics/http_metrics.go @@ -14,10 +14,15 @@ type ( HTTP4xx, HTTP5xx *Counter HTTPReqElapsed *Gauge + HealthStatus *Gauge } HTTPRouteMetricLabels struct { Service, Method, Host, Visitor, Path string } + StreamRouteMetricLabels struct { + Service, Visitor string + } + HealthMetricLabels string ) var rm RouteMetrics @@ -25,13 +30,15 @@ var rm RouteMetrics const ( routerNamespace = "router" routerHTTPSubsystem = "http" + + serviceNamespace = "service" ) func GetRouteMetrics() *RouteMetrics { return &rm } -func (lbl HTTPRouteMetricLabels) toPromLabels() prometheus.Labels { +func (lbl *HTTPRouteMetricLabels) toPromLabels() prometheus.Labels { return prometheus.Labels{ "service": lbl.Service, "method": lbl.Method, @@ -41,6 +48,19 @@ func (lbl HTTPRouteMetricLabels) toPromLabels() prometheus.Labels { } } +func (lbl *StreamRouteMetricLabels) toPromLabels() prometheus.Labels { + return prometheus.Labels{ + "service": lbl.Service, + "visitor": lbl.Visitor, + } +} + +func (lbl HealthMetricLabels) toPromLabels() prometheus.Labels { + return prometheus.Labels{ + "service": string(lbl), + } +} + func init() { if !common.PrometheusEnabled { return @@ -76,7 +96,12 @@ func init() { Namespace: routerNamespace, Subsystem: routerHTTPSubsystem, Name: "req_elapsed_ms", - Help: "How long it took to process the request" + partitionsHelp, + Help: "How long it took to process the request and respond a status code" + partitionsHelp, }, lbls...), + HealthStatus: NewGauge(prometheus.GaugeOpts{ + Namespace: serviceNamespace, + Name: "health_status", + Help: "The health status of the router by service", + }, "service"), } } diff --git a/internal/watcher/health/monitor.go b/internal/watcher/health/monitor.go index 9790755..bd98987 100644 --- a/internal/watcher/health/monitor.go +++ b/internal/watcher/health/monitor.go @@ -7,8 +7,10 @@ import ( "strings" "time" + "github.com/yusing/go-proxy/internal/common" E "github.com/yusing/go-proxy/internal/error" "github.com/yusing/go-proxy/internal/logging" + "github.com/yusing/go-proxy/internal/metrics" "github.com/yusing/go-proxy/internal/net/types" "github.com/yusing/go-proxy/internal/notif" "github.com/yusing/go-proxy/internal/task" @@ -172,6 +174,16 @@ func (mon *monitor) checkUpdateHealth() error { logger.Debug().Msg(detail) notif.Notify(mon.service, "server is down") } + if common.PrometheusEnabled { + go func() { + m := metrics.GetRouteMetrics() + var up float64 + if healthy { + up = 1 + } + m.HealthStatus.With(metrics.HealthMetricLabels(mon.service)).Set(up) + }() + } } return nil