improved docker reconnect mechanism, removed redundant checkings, refactor

This commit is contained in:
yusing 2025-02-24 07:50:23 +08:00
parent 5f1b78ec84
commit bda547198e
9 changed files with 84 additions and 86 deletions

View file

@ -156,12 +156,19 @@ func (cfg *AgentConfig) Transport() *http.Transport {
if addr != AgentHost+":443" { if addr != AgentHost+":443" {
return nil, &net.AddrError{Err: "invalid address", Addr: addr} return nil, &net.AddrError{Err: "invalid address", Addr: addr}
} }
return gphttp.DefaultDialer.DialContext(ctx, network, cfg.Addr) if network != "tcp" {
return nil, &net.OpError{Op: "dial", Net: network, Source: nil, Addr: nil}
}
return cfg.DialContext(ctx)
}, },
TLSClientConfig: cfg.tlsConfig, TLSClientConfig: cfg.tlsConfig,
} }
} }
func (cfg *AgentConfig) DialContext(ctx context.Context) (net.Conn, error) {
return gphttp.DefaultDialer.DialContext(ctx, "tcp", cfg.Addr)
}
func (cfg *AgentConfig) Name() string { func (cfg *AgentConfig) Name() string {
return cfg.name return cfg.name
} }

View file

@ -36,7 +36,7 @@ func getDockerClients() (DockerClients, gperr.Error) {
connErrs := gperr.NewBuilder("failed to connect to docker") connErrs := gperr.NewBuilder("failed to connect to docker")
for name, host := range dockerHosts { for name, host := range dockerHosts {
dockerClient, err := docker.ConnectClient(host) dockerClient, err := docker.NewClient(host)
if err != nil { if err != nil {
connErrs.Add(err) connErrs.Add(err)
continue continue
@ -45,7 +45,7 @@ func getDockerClients() (DockerClients, gperr.Error) {
} }
for _, agent := range cfg.ListAgents() { for _, agent := range cfg.ListAgents() {
dockerClient, err := docker.ConnectClient(agent.FakeDockerHost()) dockerClient, err := docker.NewClient(agent.FakeDockerHost())
if err != nil { if err != nil {
connErrs.Add(err) connErrs.Add(err)
continue continue
@ -74,7 +74,7 @@ func getDockerClient(w http.ResponseWriter, server string) (*docker.SharedClient
if host == "" { if host == "" {
return nil, false, nil return nil, false, nil
} }
dockerClient, err := docker.ConnectClient(host) dockerClient, err := docker.NewClient(host)
if err != nil { if err != nil {
return nil, false, err return nil, false, err
} }

View file

@ -1,8 +1,10 @@
package docker package docker
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"net"
"net/http" "net/http"
"sync" "sync"
"time" "time"
@ -23,11 +25,14 @@ type (
key string key string
refCount uint32 refCount uint32
closedOn int64 closedOn int64
addr string
dial func(ctx context.Context) (net.Conn, error)
} }
) )
var ( var (
clientMap = make(map[string]*SharedClient, 5) clientMap = make(map[string]*SharedClient, 10)
clientMapMu sync.RWMutex clientMapMu sync.RWMutex
clientOptEnvHost = []client.Opt{ clientOptEnvHost = []client.Opt{
@ -64,9 +69,7 @@ func init() {
for _, c := range clientMap { for _, c := range clientMap {
delete(clientMap, c.key) delete(clientMap, c.key)
if c.Connected() { c.Client.Close()
c.Client.Close()
}
} }
}) })
} }
@ -78,10 +81,6 @@ func closeTimedOutClients() {
now := time.Now().Unix() now := time.Now().Unix()
for _, c := range clientMap { for _, c := range clientMap {
if !c.Connected() {
delete(clientMap, c.key)
continue
}
if c.closedOn == 0 { if c.closedOn == 0 {
continue continue
} }
@ -93,8 +92,17 @@ func closeTimedOutClients() {
} }
} }
func (c *SharedClient) Connected() bool { func (c *SharedClient) Address() string {
return c != nil && c.Client != nil return c.addr
}
func (c *SharedClient) CheckConnection(ctx context.Context) error {
conn, err := c.dial(ctx)
if err != nil {
return err
}
conn.Close()
return nil
} }
// if the client is still referenced, this is no-op. // if the client is still referenced, this is no-op.
@ -103,7 +111,7 @@ func (c *SharedClient) Close() {
c.refCount-- c.refCount--
} }
// ConnectClient creates a new Docker client connection to the specified host. // NewClient creates a new Docker client connection to the specified host.
// //
// Returns existing client if available. // Returns existing client if available.
// //
@ -113,7 +121,7 @@ func (c *SharedClient) Close() {
// Returns: // Returns:
// - Client: the Docker client connection. // - Client: the Docker client connection.
// - error: an error if the connection failed. // - error: an error if the connection failed.
func ConnectClient(host string) (*SharedClient, error) { func NewClient(host string) (*SharedClient, error) {
clientMapMu.Lock() clientMapMu.Lock()
defer clientMapMu.Unlock() defer clientMapMu.Unlock()
@ -125,6 +133,8 @@ func ConnectClient(host string) (*SharedClient, error) {
// create client // create client
var opt []client.Opt var opt []client.Opt
var addr string
var dial func(ctx context.Context) (net.Conn, error)
if agent.IsDockerHostAgent(host) { if agent.IsDockerHostAgent(host) {
cfg, ok := config.GetInstance().GetAgent(host) cfg, ok := config.GetInstance().GetAgent(host)
@ -136,6 +146,8 @@ func ConnectClient(host string) (*SharedClient, error) {
client.WithHTTPClient(cfg.NewHTTPClient()), client.WithHTTPClient(cfg.NewHTTPClient()),
client.WithAPIVersionNegotiation(), client.WithAPIVersionNegotiation(),
} }
addr = "tcp://" + cfg.Addr
dial = cfg.DialContext
} else { } else {
switch host { switch host {
case "": case "":
@ -177,9 +189,16 @@ func ConnectClient(host string) (*SharedClient, error) {
Client: client, Client: client,
key: host, key: host,
refCount: 1, refCount: 1,
addr: addr,
dial: dial,
} }
defer logging.Debug().Str("host", host).Msg("docker client connected") // non-agent client
if c.dial == nil {
c.dial = client.Dialer()
}
defer logging.Debug().Str("host", host).Msg("docker client initialized")
clientMap[c.key] = c clientMap[c.key] = c
return c, nil return c, nil

View file

@ -8,7 +8,7 @@ import (
"github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/container"
"github.com/rs/zerolog" "github.com/rs/zerolog"
D "github.com/yusing/go-proxy/internal/docker" "github.com/yusing/go-proxy/internal/docker"
idlewatcher "github.com/yusing/go-proxy/internal/docker/idlewatcher/types" idlewatcher "github.com/yusing/go-proxy/internal/docker/idlewatcher/types"
"github.com/yusing/go-proxy/internal/gperr" "github.com/yusing/go-proxy/internal/gperr"
"github.com/yusing/go-proxy/internal/logging" "github.com/yusing/go-proxy/internal/logging"
@ -29,7 +29,7 @@ type (
*idlewatcher.Config *idlewatcher.Config
*waker *waker
client *D.SharedClient client *docker.SharedClient
stopByMethod StopCallback // send a docker command w.r.t. `stop_method` stopByMethod StopCallback // send a docker command w.r.t. `stop_method`
ticker *time.Ticker ticker *time.Ticker
lastReset time.Time lastReset time.Time
@ -70,7 +70,7 @@ func registerWatcher(watcherTask *task.Task, route route.Route, waker *waker) (*
return w, nil return w, nil
} }
client, err := D.ConnectClient(cfg.DockerHost) client, err := docker.NewClient(cfg.DockerHost)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -146,9 +146,6 @@ func (w *Watcher) containerStart(ctx context.Context) error {
} }
func (w *Watcher) containerStatus() (string, error) { func (w *Watcher) containerStatus() (string, error) {
if !w.client.Connected() {
return "", errors.New("docker client not connected")
}
ctx, cancel := context.WithTimeoutCause(w.task.Context(), dockerReqTimeout, errors.New("docker request timeout")) ctx, cancel := context.WithTimeoutCause(w.task.Context(), dockerReqTimeout, errors.New("docker request timeout"))
defer cancel() defer cancel()
json, err := w.client.ContainerInspect(ctx, w.ContainerID) json, err := w.client.ContainerInspect(ctx, w.ContainerID)
@ -242,7 +239,7 @@ func (w *Watcher) getEventCh(dockerWatcher *watcher.DockerWatcher) (eventCh <-ch
// it exits only if the context is canceled, the container is destroyed, // it exits only if the context is canceled, the container is destroyed,
// errors occurred on docker client, or route provider died (mainly caused by config reload). // errors occurred on docker client, or route provider died (mainly caused by config reload).
func (w *Watcher) watchUntilDestroy() (returnCause error) { func (w *Watcher) watchUntilDestroy() (returnCause error) {
dockerWatcher := watcher.NewDockerWatcherWithClient(w.client) dockerWatcher := watcher.NewDockerWatcher(w.Config.DockerHost)
dockerEventCh, dockerEventErrCh := w.getEventCh(dockerWatcher) dockerEventCh, dockerEventErrCh := w.getEventCh(dockerWatcher)
for { for {

View file

@ -7,7 +7,7 @@ import (
) )
func Inspect(dockerHost string, containerID string) (*Container, error) { func Inspect(dockerHost string, containerID string) (*Container, error) {
client, err := ConnectClient(dockerHost) client, err := NewClient(dockerHost)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -5,7 +5,6 @@ import (
"errors" "errors"
"time" "time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/container"
"github.com/docker/docker/client" "github.com/docker/docker/client"
) )
@ -22,8 +21,8 @@ var listOptions = container.ListOptions{
All: true, All: true,
} }
func ListContainers(clientHost string) ([]types.Container, error) { func ListContainers(clientHost string) ([]container.Summary, error) {
dockerClient, err := ConnectClient(clientHost) dockerClient, err := NewClient(clientHost)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -113,7 +113,7 @@ func (r *ReveseProxyRoute) Start(parent task.Parent) gperr.Error {
r.HealthMon = waker r.HealthMon = waker
case r.UseHealthCheck(): case r.UseHealthCheck():
if r.IsDocker() { if r.IsDocker() {
client, err := docker.ConnectClient(r.Idlewatcher.DockerHost) client, err := docker.NewClient(r.Idlewatcher.DockerHost)
if err == nil { if err == nil {
fallback := r.newHealthMonitor() fallback := r.newHealthMonitor()
r.HealthMon = monitor.NewDockerHealthMonitor(client, r.Idlewatcher.ContainerID, r.TargetName(), r.HealthCheck, fallback) r.HealthMon = monitor.NewDockerHealthMonitor(client, r.Idlewatcher.ContainerID, r.TargetName(), r.HealthCheck, fallback)

View file

@ -67,7 +67,7 @@ func (r *StreamRoute) Start(parent task.Parent) gperr.Error {
r.HealthMon = waker r.HealthMon = waker
case r.UseHealthCheck(): case r.UseHealthCheck():
if r.IsDocker() { if r.IsDocker() {
client, err := docker.ConnectClient(r.IdlewatcherConfig().DockerHost) client, err := docker.NewClient(r.IdlewatcherConfig().DockerHost)
if err == nil { if err == nil {
fallback := monitor.NewRawHealthChecker(r.TargetURL(), r.HealthCheck) fallback := monitor.NewRawHealthChecker(r.TargetURL(), r.HealthCheck)
r.HealthMon = monitor.NewDockerHealthMonitor(client, r.IdlewatcherConfig().ContainerID, r.TargetName(), r.HealthCheck, fallback) r.HealthMon = monitor.NewDockerHealthMonitor(client, r.IdlewatcherConfig().ContainerID, r.TargetName(), r.HealthCheck, fallback)

View file

@ -16,9 +16,8 @@ import (
type ( type (
DockerWatcher struct { DockerWatcher struct {
host string host string
client *docker.SharedClient client *docker.SharedClient
clientOwned bool
} }
DockerListOptions = docker_events.ListOptions DockerListOptions = docker_events.ListOptions
) )
@ -60,29 +59,14 @@ func DockerFilterContainerNameID(nameOrID string) filters.KeyValuePair {
} }
func NewDockerWatcher(host string) *DockerWatcher { func NewDockerWatcher(host string) *DockerWatcher {
return &DockerWatcher{ return &DockerWatcher{host: host}
host: host,
clientOwned: true,
}
}
func NewDockerWatcherWithClient(client *docker.SharedClient) *DockerWatcher {
return &DockerWatcher{
client: client,
}
} }
func (w *DockerWatcher) Events(ctx context.Context) (<-chan Event, <-chan gperr.Error) { func (w *DockerWatcher) Events(ctx context.Context) (<-chan Event, <-chan gperr.Error) {
return w.EventsWithOptions(ctx, optionsDefault) return w.EventsWithOptions(ctx, optionsDefault)
} }
func (w *DockerWatcher) Close() { func (w DockerWatcher) parseError(err error) gperr.Error {
if w.clientOwned && w.client.Connected() {
w.client.Close()
}
}
func (w *DockerWatcher) parseError(err error) gperr.Error {
if errors.Is(err, context.DeadlineExceeded) { if errors.Is(err, context.DeadlineExceeded) {
return gperr.New("docker client connection timeout") return gperr.New("docker client connection timeout")
} }
@ -95,69 +79,61 @@ func (w *DockerWatcher) parseError(err error) gperr.Error {
func (w *DockerWatcher) checkConnection(ctx context.Context) bool { func (w *DockerWatcher) checkConnection(ctx context.Context) bool {
ctx, cancel := context.WithTimeout(ctx, dockerWatcherRetryInterval) ctx, cancel := context.WithTimeout(ctx, dockerWatcherRetryInterval)
defer cancel() defer cancel()
_, err := w.client.Ping(ctx) err := w.client.CheckConnection(ctx)
if err != nil { if err != nil {
logging.Debug().Err(err).Msg("docker watcher: connection failed")
return false return false
} }
return true return true
} }
func (w *DockerWatcher) handleEvent(event docker_events.Message, ch chan<- Event) {
action, ok := events.DockerEventMap[event.Action]
if !ok {
return
}
ch <- Event{
Type: events.EventTypeDocker,
ActorID: event.Actor.ID,
ActorAttributes: event.Actor.Attributes, // labels
ActorName: event.Actor.Attributes["name"],
Action: action,
}
}
func (w *DockerWatcher) EventsWithOptions(ctx context.Context, options DockerListOptions) (<-chan Event, <-chan gperr.Error) { func (w *DockerWatcher) EventsWithOptions(ctx context.Context, options DockerListOptions) (<-chan Event, <-chan gperr.Error) {
eventCh := make(chan Event) eventCh := make(chan Event)
errCh := make(chan gperr.Error) errCh := make(chan gperr.Error)
go func() { go func() {
var err error
w.client, err = docker.NewClient(w.host)
if err != nil {
errCh <- gperr.Wrap(err, "docker watcher: failed to initialize client")
return
}
defer func() { defer func() {
close(eventCh) close(eventCh)
close(errCh) close(errCh)
w.Close() w.client.Close()
}() }()
if !w.client.Connected() {
var err error
w.client, err = docker.ConnectClient(w.host)
attempts := 0
retryTicker := time.NewTicker(dockerWatcherRetryInterval)
for err != nil {
attempts++
errCh <- gperr.Errorf("docker connection attempt #%d: %w", attempts, err)
select {
case <-ctx.Done():
retryTicker.Stop()
return
case <-retryTicker.C:
w.client, err = docker.ConnectClient(w.host)
}
}
retryTicker.Stop()
}
defer w.Close()
cEventCh, cErrCh := w.client.Events(ctx, options) cEventCh, cErrCh := w.client.Events(ctx, options)
defer logging.Debug().Str("host", w.host).Msg("docker watcher closed") defer logging.Debug().Str("host", w.client.Address()).Msg("docker watcher closed")
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
case msg := <-cEventCh: case msg := <-cEventCh:
action, ok := events.DockerEventMap[msg.Action] w.handleEvent(msg, eventCh)
if !ok {
continue
}
event := Event{
Type: events.EventTypeDocker,
ActorID: msg.Actor.ID,
ActorAttributes: msg.Actor.Attributes, // labels
ActorName: msg.Actor.Attributes["name"],
Action: action,
}
eventCh <- event
case err := <-cErrCh: case err := <-cErrCh:
if err == nil { if err == nil {
continue continue
} }
errCh <- w.parseError(err) errCh <- w.parseError(err)
// release the error because reopening event channel may block
err = nil
// trigger reload (clear routes) // trigger reload (clear routes)
eventCh <- reloadTrigger eventCh <- reloadTrigger
for !w.checkConnection(ctx) { for !w.checkConnection(ctx) {