|
func (h *gpbftRunner) computeNextInstanceStart(cert *certs.FinalityCertificate) (_nextStart time.Time) { |
|
ecDelay := time.Duration(h.manifest.EC.DelayMultiplier * float64(h.manifest.EC.Period)) |
|
|
|
head, err := h.ec.GetHead(h.runningCtx) |
|
if err != nil { |
|
// this should not happen |
|
log.Errorf("ec.GetHead returned error: %+v", err) |
|
return h.clock.Now().Add(ecDelay) |
|
} |
|
|
|
// the head of the cert becomes the new base |
|
baseTipSet := cert.ECChain.Head() |
|
// we are not trying to fetch the new base tipset from EC as it might not be available |
|
// instead we compute the relative time from the EC.Head |
|
baseTimestamp := computeTipsetTimestampAtEpoch(head, baseTipSet.Epoch, h.manifest.EC.Period) |
|
|
|
// Try to align instances while catching up, if configured. |
|
if h.manifest.CatchUpAlignment > 0 { |
|
defer func() { |
|
now := h.clock.Now() |
|
|
|
// If we were supposed to start this instance more than one GPBFT round ago, assume |
|
// we're behind and try to align our start times. This helps keep nodes |
|
// in-sync when bootstrapping and catching up. |
|
if _nextStart.Before(now.Add(-h.manifest.CatchUpAlignment)) { |
|
delay := now.Sub(baseTimestamp) |
|
if offset := delay % h.manifest.CatchUpAlignment; offset > 0 { |
|
delay += h.manifest.CatchUpAlignment - offset |
|
} |
|
_nextStart = baseTimestamp.Add(delay) |
|
} |
|
}() |
|
} |
|
|
|
lookbackDelay := h.manifest.EC.Period * time.Duration(h.manifest.EC.HeadLookback) |
|
|
|
if cert.ECChain.HasSuffix() { |
|
// we decided on something new, the tipset that got finalized can at minimum be 30-60s old. |
|
return baseTimestamp.Add(ecDelay).Add(lookbackDelay) |
|
} |
|
if cert.GPBFTInstance == h.manifest.InitialInstance { |
|
// if we are at initial instance, there is no history to look at |
|
return baseTimestamp.Add(ecDelay).Add(lookbackDelay) |
|
} |
|
backoffTable := h.manifest.EC.BaseDecisionBackoffTable |
|
|
|
attempts := 0 |
|
backoffMultipler := 2.0 // baseline 1 + 1 to account for the one ECDelay after which we got the base decistion |
|
for instance := cert.GPBFTInstance - 1; instance > h.manifest.InitialInstance; instance-- { |
|
cert, err := h.certStore.Get(h.runningCtx, instance) |
|
if err != nil { |
|
log.Errorf("error while getting instance %d from certstore: %+v", instance, err) |
|
break |
|
} |
|
if cert.ECChain.HasSuffix() { |
|
break |
|
} |
|
|
|
attempts += 1 |
|
if attempts < len(backoffTable) { |
|
backoffMultipler += backoffTable[attempts] |
|
} else { |
|
// if we are beyond backoffTable, reuse the last element |
|
backoffMultipler += backoffTable[len(backoffTable)-1] |
|
} |
|
} |
|
|
|
backoff := time.Duration(float64(ecDelay) * backoffMultipler) |
|
log.Debugf("backing off for: %v", backoff) |
|
|
|
return baseTimestamp.Add(backoff).Add(lookbackDelay) |
|
} |
The current logic is overly complicated, and burried in
gpbftRunnerdeep enough that unit testing the raw heuristic itself in terms of conformity to basic design goals is cumbersome if not impossible.The heuristic itself uses a number of parameters from manifest, and it's not entirely clear if we really need so many knobs to tune the instance start.
go-f3/host.go
Lines 389 to 460 in f77d509