3f170293f5
release / release (push) Successful in 1m19s
Keep the N always-on tunnel slots fixed but let each slot's WireGuard config
rotate through a larger pool, so a 10-concurrent provider cap (e.g. Proton) can
still cycle 50-100 profiles.
- lxc/rotate.sh + weircon-rotate.{service,timer}: round-robin one slot at a
time through wg-pool/, repointing a symlink and restarting only that slot.
- service: proxyManager tracks per-slot in-flight + drain/undrain state; a
localhost admin server (WEIRCON_ADMIN_LISTEN) lets rotate.sh drain a slot
before teardown and warm it back in after, so no request is routed to a
tunnel mid-rotation. Slots self-heal if undrain never arrives.
- GET /status: poll-friendly JSON of per-slot egress IP/state plus inferred
next-rotation slot + ETA, fed by a background egress-IP prober.
- docs + env examples for all new knobs.
195 lines
5.7 KiB
Go
195 lines
5.7 KiB
Go
package main
|
|
|
|
import (
|
|
"errors"
|
|
"math/rand/v2"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// errDraining is returned when a request pins a slot that is currently being
|
|
// rotated out. errNoProxies is returned when no slot is selectable at all.
|
|
var (
|
|
errDraining = errors.New("proxy is draining for rotation")
|
|
errNoProxies = errors.New("no proxy currently available")
|
|
)
|
|
|
|
// proxyManager tracks per-slot availability so the rotation tooling can take a
|
|
// slot out of service a moment before its WireGuard tunnel is torn down and
|
|
// bring it back a moment after the new tunnel is up — without ever handing an
|
|
// in-flight request to a slot that is mid-rotation.
|
|
//
|
|
// Lifecycle of a rotation (driven by rotate.sh via the admin server):
|
|
//
|
|
// drain(id) -> stop routing NEW requests to id, block until in-flight
|
|
// drains to zero (bounded by timeout) + a short settle.
|
|
// (restart the tunnel unit)
|
|
// undrain(id) -> mark id available again, but keep it out of the candidate
|
|
// set for `warmup` so the fresh handshake can settle first.
|
|
//
|
|
// A drained slot self-heals: if undrain never arrives (e.g. rotate.sh died
|
|
// mid-run) the slot returns to service after maxHold so the pool can't shrink
|
|
// permanently.
|
|
type proxyManager struct {
|
|
mu sync.Mutex
|
|
n int
|
|
inflight []int
|
|
draining []bool
|
|
warmupAt []time.Time // slot not selectable until now >= warmupAt[id]
|
|
healTmr []*time.Timer
|
|
egress []egressInfo // last observed egress IP per slot (from the prober)
|
|
|
|
// Rotation cadence, inferred from drain() calls. rotate.sh rotates exactly
|
|
// one slot per timer tick in round-robin order, so the gap between drains is
|
|
// the rotation interval and the next slot to rotate is (lastSlot+1) % n.
|
|
lastSlot int
|
|
lastRotated time.Time
|
|
interval time.Duration
|
|
|
|
// afterUndrain, if set, is called (outside the lock) when a slot returns to
|
|
// service — used by the prober to refresh that slot's egress IP promptly.
|
|
afterUndrain func(id int)
|
|
|
|
settle time.Duration // quiet period after in-flight hits zero (the "before")
|
|
warmup time.Duration // post-undrain cool-in before re-selection (the "after")
|
|
maxHold time.Duration // safety: auto-undrain if rotate.sh never calls undrain
|
|
}
|
|
|
|
// egressInfo is the prober's last view of a slot's public egress.
|
|
type egressInfo struct {
|
|
ip string
|
|
latency time.Duration
|
|
ok bool
|
|
checkedAt time.Time
|
|
}
|
|
|
|
func newProxyManager(n int, settle, warmup, maxHold time.Duration) *proxyManager {
|
|
return &proxyManager{
|
|
n: n,
|
|
inflight: make([]int, n),
|
|
draining: make([]bool, n),
|
|
warmupAt: make([]time.Time, n),
|
|
healTmr: make([]*time.Timer, n),
|
|
egress: make([]egressInfo, n),
|
|
lastSlot: -1,
|
|
settle: settle,
|
|
warmup: warmup,
|
|
maxHold: maxHold,
|
|
}
|
|
}
|
|
|
|
// setEgress records the prober's latest observation for a slot.
|
|
func (m *proxyManager) setEgress(id int, ip string, latency time.Duration, ok bool) {
|
|
m.mu.Lock()
|
|
m.egress[id] = egressInfo{ip: ip, latency: latency, ok: ok, checkedAt: time.Now()}
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// selectableLocked reports whether slot id may take a new request right now.
|
|
// Caller must hold m.mu.
|
|
func (m *proxyManager) selectableLocked(id int) bool {
|
|
if m.draining[id] {
|
|
return false
|
|
}
|
|
if w := m.warmupAt[id]; !w.IsZero() && time.Now().Before(w) {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// acquirePinned reserves a specific slot for one request. It combines the
|
|
// availability check and the in-flight increment under the lock so a drain
|
|
// can't slip in between. Call release(id) when the request finishes.
|
|
func (m *proxyManager) acquirePinned(id int) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if !m.selectableLocked(id) {
|
|
return errDraining
|
|
}
|
|
m.inflight[id]++
|
|
return nil
|
|
}
|
|
|
|
// acquireAny picks a random selectable slot and reserves it. Returns errNoProxies
|
|
// if every slot is draining/warming. Call release(id) when the request finishes.
|
|
func (m *proxyManager) acquireAny() (int, error) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
cands := make([]int, 0, m.n)
|
|
for i := 0; i < m.n; i++ {
|
|
if m.selectableLocked(i) {
|
|
cands = append(cands, i)
|
|
}
|
|
}
|
|
if len(cands) == 0 {
|
|
return 0, errNoProxies
|
|
}
|
|
id := cands[rand.IntN(len(cands))]
|
|
m.inflight[id]++
|
|
return id, nil
|
|
}
|
|
|
|
// release marks one in-flight request on id as finished.
|
|
func (m *proxyManager) release(id int) {
|
|
m.mu.Lock()
|
|
if m.inflight[id] > 0 {
|
|
m.inflight[id]--
|
|
}
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// drain stops new traffic to id and waits until in-flight reaches zero (bounded
|
|
// by timeout) followed by a settle period. Returns the number of requests still
|
|
// in flight when it gave up (0 on a clean drain). A self-heal timer is armed so
|
|
// the slot recovers even if undrain is never called.
|
|
func (m *proxyManager) drain(id int, timeout time.Duration) int {
|
|
m.mu.Lock()
|
|
m.draining[id] = true
|
|
// Infer rotation cadence: the gap since the previous drain is the interval.
|
|
now := time.Now()
|
|
if !m.lastRotated.IsZero() {
|
|
if d := now.Sub(m.lastRotated); d > 0 {
|
|
m.interval = d
|
|
}
|
|
}
|
|
m.lastSlot = id
|
|
m.lastRotated = now
|
|
if m.healTmr[id] != nil {
|
|
m.healTmr[id].Stop()
|
|
}
|
|
m.healTmr[id] = time.AfterFunc(m.maxHold, func() { m.undrain(id) })
|
|
m.mu.Unlock()
|
|
|
|
deadline := time.Now().Add(timeout)
|
|
for {
|
|
m.mu.Lock()
|
|
inf := m.inflight[id]
|
|
m.mu.Unlock()
|
|
if inf == 0 {
|
|
time.Sleep(m.settle) // let the last connection's FIN flush
|
|
return 0
|
|
}
|
|
if time.Now().After(deadline) {
|
|
return inf
|
|
}
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
// undrain returns id to service after a warmup window during which it stays out
|
|
// of the candidate set (so the just-rebuilt tunnel's handshake can complete).
|
|
func (m *proxyManager) undrain(id int) {
|
|
m.mu.Lock()
|
|
if m.healTmr[id] != nil {
|
|
m.healTmr[id].Stop()
|
|
m.healTmr[id] = nil
|
|
}
|
|
m.draining[id] = false
|
|
m.warmupAt[id] = time.Now().Add(m.warmup)
|
|
hook := m.afterUndrain
|
|
m.mu.Unlock()
|
|
if hook != nil {
|
|
hook(id)
|
|
}
|
|
}
|