Simplify the heuristic for computing the next instance start time · Issue #955 · filecoin-project/go-f3

The current logic is overly complicated, and burried in gpbftRunner deep enough that unit testing the raw heuristic itself in terms of conformity to basic design goals is cumbersome if not impossible.

The heuristic itself uses a number of parameters from manifest, and it's not entirely clear if we really need so many knobs to tune the instance start.

Refactor the heuristic into a stateless function that takes clear set of parameters.
Add unit tests to assert what the design goals are/should be.
Then simplify the logic and distil to 1 but no more than 2 parameters.

go-f3/host.go

Lines 389 to 460 in f77d509

    
           func (h *gpbftRunner) computeNextInstanceStart(cert *certs.FinalityCertificate) (_nextStart time.Time) { 
        
           	ecDelay := time.Duration(h.manifest.EC.DelayMultiplier * float64(h.manifest.EC.Period)) 
        
           	head, err := h.ec.GetHead(h.runningCtx) 
        
           	if err != nil { 
        
           		// this should not happen 
        
           		log.Errorf("ec.GetHead returned error: %+v", err) 
        
           		return h.clock.Now().Add(ecDelay) 
        
           	} 
        
           	// the head of the cert becomes the new base 
        
           	baseTipSet := cert.ECChain.Head() 
        
           	// we are not trying to fetch the new base tipset from EC as it might not be available 
        
           	// instead we compute the relative time from the EC.Head 
        
           	baseTimestamp := computeTipsetTimestampAtEpoch(head, baseTipSet.Epoch, h.manifest.EC.Period) 
        
           	// Try to align instances while catching up, if configured. 
        
           	if h.manifest.CatchUpAlignment > 0 { 
        
           		defer func() { 
        
           			now := h.clock.Now() 
        
           			// If we were supposed to start this instance more than one GPBFT round ago, assume 
        
           			// we're behind and try to align our start times. This helps keep nodes 
        
           			// in-sync when bootstrapping and catching up. 
        
           			if _nextStart.Before(now.Add(-h.manifest.CatchUpAlignment)) { 
        
           				delay := now.Sub(baseTimestamp) 
        
           				if offset := delay % h.manifest.CatchUpAlignment; offset > 0 { 
        
           					delay += h.manifest.CatchUpAlignment - offset 
        
           				} 
        
           				_nextStart = baseTimestamp.Add(delay) 
        
           			} 
        
           		}() 
        
           	} 
        
           	lookbackDelay := h.manifest.EC.Period * time.Duration(h.manifest.EC.HeadLookback) 
        
           	if cert.ECChain.HasSuffix() { 
        
           		// we decided on something new, the tipset that got finalized can at minimum be 30-60s old. 
        
           		return baseTimestamp.Add(ecDelay).Add(lookbackDelay) 
        
           	} 
        
           	if cert.GPBFTInstance == h.manifest.InitialInstance { 
        
           		// if we are at initial instance, there is no history to look at 
        
           		return baseTimestamp.Add(ecDelay).Add(lookbackDelay) 
        
           	} 
        
           	backoffTable := h.manifest.EC.BaseDecisionBackoffTable 
        
           	attempts := 0 
        
           	backoffMultipler := 2.0 // baseline 1 + 1 to account for the one ECDelay after which we got the base decistion 
        
           	for instance := cert.GPBFTInstance - 1; instance > h.manifest.InitialInstance; instance-- { 
        
           		cert, err := h.certStore.Get(h.runningCtx, instance) 
        
           		if err != nil { 
        
           			log.Errorf("error while getting instance %d from certstore: %+v", instance, err) 
        
           			break 
        
           		} 
        
           		if cert.ECChain.HasSuffix() { 
        
           			break 
        
           		} 
        
           		attempts += 1 
        
           		if attempts < len(backoffTable) { 
        
           			backoffMultipler += backoffTable[attempts] 
        
           		} else { 
        
           			// if we are beyond backoffTable, reuse the last element 
        
           			backoffMultipler += backoffTable[len(backoffTable)-1] 
        
           		} 
        
           	} 
        
           	backoff := time.Duration(float64(ecDelay) * backoffMultipler) 
        
           	log.Debugf("backing off for: %v", backoff) 
        
           	return baseTimestamp.Add(backoff).Add(lookbackDelay) 
        
           }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Simplify the heuristic for computing the next instance start time #955

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

	func (h gpbftRunner) computeNextInstanceStart(cert certs.FinalityCertificate) (_nextStart time.Time) {
	ecDelay := time.Duration(h.manifest.EC.DelayMultiplier * float64(h.manifest.EC.Period))

	head, err := h.ec.GetHead(h.runningCtx)
	if err != nil {
	// this should not happen
	log.Errorf("ec.GetHead returned error: %+v", err)
	return h.clock.Now().Add(ecDelay)
	}

	// the head of the cert becomes the new base
	baseTipSet := cert.ECChain.Head()
	// we are not trying to fetch the new base tipset from EC as it might not be available
	// instead we compute the relative time from the EC.Head
	baseTimestamp := computeTipsetTimestampAtEpoch(head, baseTipSet.Epoch, h.manifest.EC.Period)

	// Try to align instances while catching up, if configured.
	if h.manifest.CatchUpAlignment > 0 {
	defer func() {
	now := h.clock.Now()

	// If we were supposed to start this instance more than one GPBFT round ago, assume
	// we're behind and try to align our start times. This helps keep nodes
	// in-sync when bootstrapping and catching up.
	if _nextStart.Before(now.Add(-h.manifest.CatchUpAlignment)) {
	delay := now.Sub(baseTimestamp)
	if offset := delay % h.manifest.CatchUpAlignment; offset > 0 {
	delay += h.manifest.CatchUpAlignment - offset
	}
	_nextStart = baseTimestamp.Add(delay)
	}
	}()
	}

	lookbackDelay := h.manifest.EC.Period * time.Duration(h.manifest.EC.HeadLookback)

	if cert.ECChain.HasSuffix() {
	// we decided on something new, the tipset that got finalized can at minimum be 30-60s old.
	return baseTimestamp.Add(ecDelay).Add(lookbackDelay)
	}
	if cert.GPBFTInstance == h.manifest.InitialInstance {
	// if we are at initial instance, there is no history to look at
	return baseTimestamp.Add(ecDelay).Add(lookbackDelay)
	}
	backoffTable := h.manifest.EC.BaseDecisionBackoffTable

	attempts := 0
	backoffMultipler := 2.0 // baseline 1 + 1 to account for the one ECDelay after which we got the base decistion
	for instance := cert.GPBFTInstance - 1; instance > h.manifest.InitialInstance; instance-- {
	cert, err := h.certStore.Get(h.runningCtx, instance)
	if err != nil {
	log.Errorf("error while getting instance %d from certstore: %+v", instance, err)
	break
	}
	if cert.ECChain.HasSuffix() {
	break
	}

	attempts += 1
	if attempts < len(backoffTable) {
	backoffMultipler += backoffTable[attempts]
	} else {
	// if we are beyond backoffTable, reuse the last element
	backoffMultipler += backoffTable[len(backoffTable)-1]
	}
	}

	backoff := time.Duration(float64(ecDelay) * backoffMultipler)
	log.Debugf("backing off for: %v", backoff)

	return baseTimestamp.Add(backoff).Add(lookbackDelay)
	}

Simplify the heuristic for computing the next instance start time #955

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions