idlewatcher: refactor and fix data race

This commit is contained in:
yusing 2025-02-25 04:29:07 +08:00
parent 8074b93992
commit 767560804d
7 changed files with 71 additions and 68 deletions

View file

@ -18,11 +18,6 @@ type (
StopMethod StopMethod `json:"stop_method,omitempty"` StopMethod StopMethod `json:"stop_method,omitempty"`
StopSignal Signal `json:"stop_signal,omitempty"` StopSignal Signal `json:"stop_signal,omitempty"`
StartEndpoint string `json:"start_endpoint,omitempty"` // Optional path that must be hit to start container StartEndpoint string `json:"start_endpoint,omitempty"` // Optional path that must be hit to start container
DockerHost string `json:"docker_host,omitempty"`
ContainerName string `json:"container_name,omitempty"`
ContainerID string `json:"container_id,omitempty"`
ContainerRunning bool `json:"container_running,omitempty"`
} }
StopMethod string StopMethod string
Signal string Signal string
@ -41,19 +36,10 @@ var validSignals = map[string]struct{}{
} }
func ValidateConfig(cont *docker.Container) (*Config, gperr.Error) { func ValidateConfig(cont *docker.Container) (*Config, gperr.Error) {
if cont == nil { if cont == nil || cont.IdleTimeout == "" {
return nil, nil return nil, nil
} }
if cont.IdleTimeout == "" {
return &Config{
DockerHost: cont.DockerHost,
ContainerName: cont.ContainerName,
ContainerID: cont.ContainerID,
ContainerRunning: cont.Running,
}, nil
}
errs := gperr.NewBuilder("invalid idlewatcher config") errs := gperr.NewBuilder("invalid idlewatcher config")
idleTimeout := gperr.Collect(errs, validateDurationPostitive, cont.IdleTimeout) idleTimeout := gperr.Collect(errs, validateDurationPostitive, cont.IdleTimeout)
@ -74,11 +60,6 @@ func ValidateConfig(cont *docker.Container) (*Config, gperr.Error) {
StopMethod: stopMethod, StopMethod: stopMethod,
StopSignal: signal, StopSignal: signal,
StartEndpoint: startEndpoint, StartEndpoint: startEndpoint,
DockerHost: cont.DockerHost,
ContainerName: cont.ContainerName,
ContainerID: cont.ContainerID,
ContainerRunning: cont.Running,
}, nil }, nil
} }

View file

@ -1,7 +1,7 @@
package idlewatcher package idlewatcher
import ( import (
"sync/atomic" "errors"
"time" "time"
"github.com/yusing/go-proxy/internal/docker/idlewatcher/types" "github.com/yusing/go-proxy/internal/docker/idlewatcher/types"
@ -12,6 +12,7 @@ import (
route "github.com/yusing/go-proxy/internal/route/types" route "github.com/yusing/go-proxy/internal/route/types"
"github.com/yusing/go-proxy/internal/task" "github.com/yusing/go-proxy/internal/task"
U "github.com/yusing/go-proxy/internal/utils" U "github.com/yusing/go-proxy/internal/utils"
"github.com/yusing/go-proxy/internal/utils/atomic"
"github.com/yusing/go-proxy/internal/watcher/health" "github.com/yusing/go-proxy/internal/watcher/health"
"github.com/yusing/go-proxy/internal/watcher/health/monitor" "github.com/yusing/go-proxy/internal/watcher/health/monitor"
) )
@ -25,8 +26,7 @@ type (
stream net.Stream stream net.Stream
hc health.HealthChecker hc health.HealthChecker
metric *metrics.Gauge metric *metrics.Gauge
lastErr error lastErr atomic.Value[error]
ready atomic.Bool
} }
) )
@ -35,6 +35,8 @@ const (
idleWakerCheckTimeout = time.Second idleWakerCheckTimeout = time.Second
) )
var noErr = errors.New("no error")
// TODO: support stream // TODO: support stream
func newWaker(parent task.Parent, route route.Route, rp *reverseproxy.ReverseProxy, stream net.Stream) (Waker, gperr.Error) { func newWaker(parent task.Parent, route route.Route, rp *reverseproxy.ReverseProxy, stream net.Stream) (Waker, gperr.Error) {
@ -127,7 +129,7 @@ func (w *Watcher) Status() health.Status {
} }
func (w *Watcher) getStatusUpdateReady() health.Status { func (w *Watcher) getStatusUpdateReady() health.Status {
if !w.ContainerRunning { if !w.running.Load() {
return health.StatusNapping return health.StatusNapping
} }
@ -138,19 +140,26 @@ func (w *Watcher) getStatusUpdateReady() health.Status {
result, err := w.hc.CheckHealth() result, err := w.hc.CheckHealth()
switch { switch {
case err != nil: case err != nil:
w.lastErr = err w.lastErr.Store(err)
w.ready.Store(false) w.ready.Store(false)
return health.StatusError return health.StatusError
case result.Healthy: case result.Healthy:
w.lastErr = nil w.lastErr.Store(noErr)
w.ready.Store(true) w.ready.Store(true)
return health.StatusHealthy return health.StatusHealthy
default: default:
w.lastErr = nil w.lastErr.Store(noErr)
return health.StatusStarting return health.StatusStarting
} }
} }
func (w *Watcher) LastError() error {
if err := w.lastErr.Load(); err != noErr {
return err
}
return nil
}
// MarshalJSON implements health.HealthMonitor. // MarshalJSON implements health.HealthMonitor.
func (w *Watcher) MarshalJSON() ([]byte, error) { func (w *Watcher) MarshalJSON() ([]byte, error) {
var url *net.URL var url *net.URL
@ -158,8 +167,8 @@ func (w *Watcher) MarshalJSON() ([]byte, error) {
url = w.hc.URL() url = w.hc.URL()
} }
var detail string var detail string
if w.lastErr != nil { if err := w.LastError(); err != nil {
detail = w.lastErr.Error() detail = err.Error()
} }
return (&monitor.JSONRepresentation{ return (&monitor.JSONRepresentation{
Name: w.Name(), Name: w.Name(),

View file

@ -15,7 +15,7 @@ import (
route "github.com/yusing/go-proxy/internal/route/types" route "github.com/yusing/go-proxy/internal/route/types"
"github.com/yusing/go-proxy/internal/task" "github.com/yusing/go-proxy/internal/task"
U "github.com/yusing/go-proxy/internal/utils" U "github.com/yusing/go-proxy/internal/utils"
F "github.com/yusing/go-proxy/internal/utils/functional" "github.com/yusing/go-proxy/internal/utils/atomic"
"github.com/yusing/go-proxy/internal/watcher" "github.com/yusing/go-proxy/internal/watcher"
"github.com/yusing/go-proxy/internal/watcher/events" "github.com/yusing/go-proxy/internal/watcher/events"
) )
@ -26,15 +26,22 @@ type (
zerolog.Logger zerolog.Logger
*idlewatcher.Config
*waker *waker
*containerMeta
*idlewatcher.Config
client *docker.SharedClient client *docker.SharedClient
running atomic.Bool
ready atomic.Bool
stopByMethod StopCallback // send a docker command w.r.t. `stop_method` stopByMethod StopCallback // send a docker command w.r.t. `stop_method`
ticker *time.Ticker ticker *time.Ticker
lastReset time.Time lastReset time.Time
task *task.Task task *task.Task
} }
containerMeta struct {
ContainerID, ContainerName string
}
WakeDone <-chan error WakeDone <-chan error
WakeFunc func() WakeDone WakeFunc func() WakeDone
@ -42,8 +49,8 @@ type (
) )
var ( var (
watcherMap = F.NewMapOf[string, *Watcher]() watcherMap = make(map[string]*Watcher)
watcherMapMu sync.Mutex watcherMapMu sync.RWMutex
errShouldNotReachHere = errors.New("should not reach here") errShouldNotReachHere = errors.New("should not reach here")
) )
@ -57,39 +64,44 @@ func registerWatcher(watcherTask *task.Task, route route.Route, waker *waker) (*
panic(errShouldNotReachHere) panic(errShouldNotReachHere)
} }
cont := route.ContainerInfo()
key := cont.ContainerID
watcherMapMu.Lock() watcherMapMu.Lock()
defer watcherMapMu.Unlock() defer watcherMapMu.Unlock()
key := cfg.ContainerID // cancel previous watcher
if w, ok := watcherMap[key]; ok {
if w, ok := watcherMap.Load(key); ok { defer w.Finish("new request with same container id")
w.Config = cfg
w.waker = waker
w.resetIdleTimer()
watcherTask.Finish("used existing watcher")
return w, nil
} }
client, err := docker.NewClient(cfg.DockerHost) client, err := docker.NewClient(cont.DockerHost)
if err != nil { if err != nil {
return nil, err return nil, err
} }
w := &Watcher{ w := &Watcher{
Logger: logging.With().Str("name", cfg.ContainerName).Logger(), Logger: logging.With().Str("name", cont.ContainerName).Logger(),
Config: cfg,
waker: waker, waker: waker,
containerMeta: &containerMeta{
ContainerID: cont.ContainerID,
ContainerName: cont.ContainerName,
},
Config: cfg,
client: client, client: client,
task: watcherTask, task: watcherTask,
ticker: time.NewTicker(cfg.IdleTimeout), ticker: time.NewTicker(cfg.IdleTimeout),
} }
w.running.Store(cont.Running)
w.stopByMethod = w.getStopCallback() w.stopByMethod = w.getStopCallback()
watcherMap.Store(key, w) watcherMap[key] = w
go func() { go func() {
cause := w.watchUntilDestroy() cause := w.watchUntilDestroy()
watcherMap.Delete(w.ContainerID) watcherMapMu.Lock()
defer watcherMapMu.Unlock()
delete(watcherMap, key)
w.ticker.Stop() w.ticker.Stop()
w.client.Close() w.client.Close()
@ -118,10 +130,6 @@ func (w *Watcher) WakeError(err error) {
w.Err(err).Str("action", "wake").Msg("error") w.Err(err).Str("action", "wake").Msg("error")
} }
func (w *Watcher) LogReason(action, reason string) {
w.Info().Str("reason", reason).Msg(action)
}
func (w *Watcher) containerStop(ctx context.Context) error { func (w *Watcher) containerStop(ctx context.Context) error {
return w.client.ContainerStop(ctx, w.ContainerID, container.StopOptions{ return w.client.ContainerStop(ctx, w.ContainerID, container.StopOptions{
Signal: string(w.StopSignal), Signal: string(w.StopSignal),
@ -156,7 +164,7 @@ func (w *Watcher) containerStatus() (string, error) {
} }
func (w *Watcher) wakeIfStopped() error { func (w *Watcher) wakeIfStopped() error {
if w.ContainerRunning { if w.running.Load() {
return nil return nil
} }
@ -239,7 +247,7 @@ func (w *Watcher) getEventCh(dockerWatcher *watcher.DockerWatcher) (eventCh <-ch
// it exits only if the context is canceled, the container is destroyed, // it exits only if the context is canceled, the container is destroyed,
// errors occurred on docker client, or route provider died (mainly caused by config reload). // errors occurred on docker client, or route provider died (mainly caused by config reload).
func (w *Watcher) watchUntilDestroy() (returnCause error) { func (w *Watcher) watchUntilDestroy() (returnCause error) {
dockerWatcher := watcher.NewDockerWatcher(w.Config.DockerHost) dockerWatcher := watcher.NewDockerWatcher(w.client.DaemonHost())
dockerEventCh, dockerEventErrCh := w.getEventCh(dockerWatcher) dockerEventCh, dockerEventErrCh := w.getEventCh(dockerWatcher)
for { for {
@ -254,17 +262,17 @@ func (w *Watcher) watchUntilDestroy() (returnCause error) {
case e := <-dockerEventCh: case e := <-dockerEventCh:
switch { switch {
case e.Action == events.ActionContainerDestroy: case e.Action == events.ActionContainerDestroy:
w.ContainerRunning = false w.running.Store(false)
w.ready.Store(false) w.ready.Store(false)
w.LogReason("watcher stopped", "container destroyed") w.Info().Str("reason", "container destroyed").Msg("watcher stopped")
return errors.New("container destroyed") return errors.New("container destroyed")
// create / start / unpause // create / start / unpause
case e.Action.IsContainerWake(): case e.Action.IsContainerWake():
w.ContainerRunning = true w.running.Store(true)
w.resetIdleTimer() w.resetIdleTimer()
w.Info().Msg("awaken") w.Info().Msg("awaken")
case e.Action.IsContainerSleep(): // stop / pause / kil case e.Action.IsContainerSleep(): // stop / pause / kil
w.ContainerRunning = false w.running.Store(false)
w.ready.Store(false) w.ready.Store(false)
w.ticker.Stop() w.ticker.Stop()
default: default:
@ -283,7 +291,7 @@ func (w *Watcher) watchUntilDestroy() (returnCause error) {
} }
case <-w.ticker.C: case <-w.ticker.C:
w.ticker.Stop() w.ticker.Stop()
if w.ContainerRunning { if w.running.Load() {
err := w.stopByMethod() err := w.stopByMethod()
switch { switch {
case errors.Is(err, context.Canceled): case errors.Is(err, context.Canceled):
@ -294,7 +302,7 @@ func (w *Watcher) watchUntilDestroy() (returnCause error) {
} }
w.Err(err).Msgf("container stop with method %q failed", w.StopMethod) w.Err(err).Msgf("container stop with method %q failed", w.StopMethod)
default: default:
w.LogReason("container stopped", "idle timeout") w.Info().Str("reason", "idle timeout").Msg("container stopped")
} }
} }
} }

View file

@ -113,10 +113,10 @@ func (r *ReveseProxyRoute) Start(parent task.Parent) gperr.Error {
r.HealthMon = waker r.HealthMon = waker
case r.UseHealthCheck(): case r.UseHealthCheck():
if r.IsDocker() { if r.IsDocker() {
client, err := docker.NewClient(r.Idlewatcher.DockerHost) client, err := docker.NewClient(r.Container.DockerHost)
if err == nil { if err == nil {
fallback := r.newHealthMonitor() fallback := r.newHealthMonitor()
r.HealthMon = monitor.NewDockerHealthMonitor(client, r.Idlewatcher.ContainerID, r.TargetName(), r.HealthCheck, fallback) r.HealthMon = monitor.NewDockerHealthMonitor(client, r.Container.ContainerID, r.TargetName(), r.HealthCheck, fallback)
r.task.OnCancel("close_docker_client", client.Close) r.task.OnCancel("close_docker_client", client.Close)
} }
} }

View file

@ -67,10 +67,10 @@ func (r *StreamRoute) Start(parent task.Parent) gperr.Error {
r.HealthMon = waker r.HealthMon = waker
case r.UseHealthCheck(): case r.UseHealthCheck():
if r.IsDocker() { if r.IsDocker() {
client, err := docker.NewClient(r.IdlewatcherConfig().DockerHost) client, err := docker.NewClient(r.Container.DockerHost)
if err == nil { if err == nil {
fallback := monitor.NewRawHealthChecker(r.TargetURL(), r.HealthCheck) fallback := monitor.NewRawHealthChecker(r.TargetURL(), r.HealthCheck)
r.HealthMon = monitor.NewDockerHealthMonitor(client, r.IdlewatcherConfig().ContainerID, r.TargetName(), r.HealthCheck, fallback) r.HealthMon = monitor.NewDockerHealthMonitor(client, r.Container.ContainerID, r.TargetName(), r.HealthCheck, fallback)
r.task.OnCancel("close_docker_client", client.Close) r.task.OnCancel("close_docker_client", client.Close)
} }
} }

View file

@ -0,0 +1,5 @@
package atomic
import "sync/atomic"
type Bool = atomic.Bool

View file

@ -22,11 +22,11 @@ func (a *Value[T]) Store(v T) {
} }
func (a *Value[T]) Swap(v T) T { func (a *Value[T]) Swap(v T) T {
return a.Value.Swap(v).(T) if v := a.Value.Swap(v); v != nil {
} return v.(T)
}
func (a *Value[T]) CompareAndSwap(oldV, newV T) bool { var zero T
return a.Value.CompareAndSwap(oldV, newV) return zero
} }
func (a *Value[T]) MarshalJSON() ([]byte, error) { func (a *Value[T]) MarshalJSON() ([]byte, error) {