From f0198616ad71a2ebea2d182f0ff8fa010fd53317 Mon Sep 17 00:00:00 2001 From: yusing Date: Fri, 14 Feb 2025 03:49:56 +0800 Subject: [PATCH] improve error handling for system info metrics --- internal/common/env.go | 7 ++ internal/error/builder.go | 6 ++ internal/metrics/systeminfo/system_info.go | 120 +++++++++++++++------ 3 files changed, 98 insertions(+), 35 deletions(-) diff --git a/internal/common/env.go b/internal/common/env.go index a3f8bba..33d4743 100644 --- a/internal/common/env.go +++ b/internal/common/env.go @@ -53,6 +53,13 @@ var ( OIDCScopes = GetEnvString("OIDC_SCOPES", "openid, profile, email") OIDCAllowedUsers = GetCommaSepEnv("OIDC_ALLOWED_USERS", "") OIDCAllowedGroups = GetCommaSepEnv("OIDC_ALLOWED_GROUPS", "") + + // metrics configuration + MetricsDisableCPU = GetEnvBool("METRICS_DISABLE_CPU", false) + MetricsDisableMemory = GetEnvBool("METRICS_DISABLE_MEMORY", false) + MetricsDisableDisk = GetEnvBool("METRICS_DISABLE_DISK", false) + MetricsDisableNetwork = GetEnvBool("METRICS_DISABLE_NETWORK", false) + MetricsDisableSensors = GetEnvBool("METRICS_DISABLE_SENSORS", false) ) func GetEnv[T any](key string, defaultValue T, parser func(string) (T, error)) T { diff --git a/internal/error/builder.go b/internal/error/builder.go index 96fb886..6a23f25 100644 --- a/internal/error/builder.go +++ b/internal/error/builder.go @@ -122,3 +122,9 @@ func (b *Builder) AddRange(errs ...error) *Builder { return b } + +func (b *Builder) ForEach(fn func(error)) { + for _, err := range b.errs { + fn(err) + } +} diff --git a/internal/metrics/systeminfo/system_info.go b/internal/metrics/systeminfo/system_info.go index 80808d4..e33e73f 100644 --- a/internal/metrics/systeminfo/system_info.go +++ b/internal/metrics/systeminfo/system_info.go @@ -3,6 +3,7 @@ package systeminfo import ( "context" "encoding/json" + "errors" "time" "github.com/shirou/gopsutil/v4/cpu" @@ -10,6 +11,9 @@ import ( "github.com/shirou/gopsutil/v4/mem" "github.com/shirou/gopsutil/v4/net" "github.com/shirou/gopsutil/v4/sensors" + "github.com/yusing/go-proxy/internal/common" + E "github.com/yusing/go-proxy/internal/error" + "github.com/yusing/go-proxy/internal/logging" "github.com/yusing/go-proxy/internal/metrics/period" "github.com/yusing/go-proxy/internal/utils/strutils" ) @@ -31,44 +35,90 @@ func init() { Poller.Start() } +func _() { // check if this behavior is not changed + var _ sensors.Warnings = disk.Warnings{} +} + func getSystemInfo(ctx context.Context, lastResult *SystemInfo) (*SystemInfo, error) { - memoryInfo, err := mem.VirtualMemory() - if err != nil { - return nil, err - } - cpuAverage, err := cpu.PercentWithContext(ctx, 150*time.Millisecond, false) - if err != nil { - return nil, err - } - diskInfo, err := disk.Usage("/") - if err != nil { - return nil, err - } - networkIO, err := net.IOCounters(false) - if err != nil { - return nil, err - } - sensors, err := sensors.SensorsTemperatures() - if err != nil { - return nil, err - } - var networkUp, networkDown float64 - if lastResult != nil { - interval := time.Since(lastResult.Timestamp).Seconds() - networkUp = float64(networkIO[0].BytesSent-lastResult.NetworkIO.BytesSent) / interval - networkDown = float64(networkIO[0].BytesRecv-lastResult.NetworkIO.BytesRecv) / interval + errs := E.NewBuilder("failed to get system info") + var systemInfo SystemInfo + + if !common.MetricsDisableCPU { + cpuAverage, err := cpu.PercentWithContext(ctx, 150*time.Millisecond, false) + if err != nil { + errs.Add(err) + } else { + systemInfo.CPUAverage = cpuAverage[0] + } } - return &SystemInfo{ - Timestamp: time.Now(), - CPUAverage: cpuAverage[0], - Memory: memoryInfo, - Disk: diskInfo, - NetworkIO: &networkIO[0], - NetworkUp: networkUp, - NetworkDown: networkDown, - Sensors: sensors, - }, nil + if !common.MetricsDisableMemory { + memoryInfo, err := mem.VirtualMemory() + if err != nil { + errs.Add(err) + } + systemInfo.Memory = memoryInfo + } + + if !common.MetricsDisableDisk { + diskInfo, err := disk.Usage("/") + if err != nil { + errs.Add(err) + } + systemInfo.Disk = diskInfo + } + + if !common.MetricsDisableNetwork { + networkIO, err := net.IOCounters(false) + if err != nil { + errs.Add(err) + } else { + networkIO := networkIO[0] + systemInfo.NetworkIO = &networkIO + var networkUp, networkDown float64 + if lastResult != nil { + interval := time.Since(lastResult.Timestamp).Seconds() + networkUp = float64(networkIO.BytesSent-lastResult.NetworkIO.BytesSent) / interval + networkDown = float64(networkIO.BytesRecv-lastResult.NetworkIO.BytesRecv) / interval + } + systemInfo.NetworkUp = networkUp + systemInfo.NetworkDown = networkDown + } + } + + if !common.MetricsDisableSensors { + sensorsInfo, err := sensors.SensorsTemperatures() + if err != nil { + errs.Add(err) + } + systemInfo.Sensors = sensorsInfo + } + + if errs.HasError() { + allWarnings := E.NewBuilder("") + allErrors := E.NewBuilder("failed to get system info") + errs.ForEach(func(err error) { + // disk.Warnings has the same type + // all Warnings are alias of common.Warnings from "github.com/shirou/gopsutil/v4/internal/common" + // see line 37 + var warnings sensors.Warnings + if errors.As(err, &warnings) { + for _, warning := range warnings.List { + allWarnings.Add(warning) + } + } else { + allErrors.Add(err) + } + }) + if allWarnings.HasError() { + logging.Warn().Msg(allWarnings.String()) + } + if allErrors.HasError() { + return nil, allErrors.Error() + } + } + + return &systemInfo, nil } func (s *SystemInfo) MarshalJSON() ([]byte, error) {