fix: cert renewal failure cause scheduler stuck forver

This commit is contained in:
yusing 2025-01-09 02:53:04 +08:00
parent abb0124011
commit 7fe03be73f

View file

@ -7,6 +7,7 @@ import (
"os" "os"
"path" "path"
"reflect" "reflect"
"runtime"
"sort" "sort"
"time" "time"
@ -148,28 +149,40 @@ func (p *Provider) ShouldRenewOn() time.Time {
panic("no certificate available") panic("no certificate available")
} }
func (p *Provider) ScheduleRenewal() { func (p *Provider) ScheduleRenewal(parent task.Parent) {
if p.GetName() == ProviderLocal { if p.GetName() == ProviderLocal {
return return
} }
go func() { go func() {
task := task.RootTask("cert-renew-scheduler", true) lastErrOn := time.Time{}
renewalTime := p.ShouldRenewOn()
timer := time.NewTimer(time.Until(renewalTime))
defer timer.Stop()
task := parent.Subtask("cert-renew-scheduler")
defer task.Finish(nil) defer task.Finish(nil)
for { for {
renewalTime := p.ShouldRenewOn()
timer := time.NewTimer(time.Until(renewalTime))
select { select {
case <-task.Context().Done(): case <-task.Context().Done():
timer.Stop()
return return
case <-timer.C: case <-timer.C:
// Retry after 1 hour on failure
if time.Now().Before(lastErrOn.Add(time.Hour)) {
continue
}
if err := p.renewIfNeeded(); err != nil { if err := p.renewIfNeeded(); err != nil {
E.LogWarn("cert renew failed", err, &logger) E.LogWarn("cert renew failed", err, &logger)
// Retry after 1 hour on failure lastErrOn = time.Now()
time.Sleep(time.Hour) continue
} }
// Reset on success
lastErrOn = time.Time{}
renewalTime = p.ShouldRenewOn()
timer.Reset(time.Until(renewalTime))
default:
// Allow other tasks to run
runtime.Gosched()
} }
} }
}() }()