Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/network-spec/miniprotocols.tex
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ \subsection{Timeouts per state}
\header{state} & \header{timeout} \\\hline
\StIdle & \texttt{3673}s \\
\StCanAwait & \texttt{10}s \\
\StMustReply & random between \texttt{135}s and \texttt{269}s \\
\StMustReply & random between \texttt{601}s and \texttt{911}s \\
\StIntersect & \texttt{10}s \\
\end{tabular}
\caption{timeouts per state}
Expand Down
4 changes: 4 additions & 0 deletions ouroboros-network/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

### Non-breaking changes

* Limit the number of faulures to 5 before a peer that isn't a localroot, bootstrap peer or public root peer is forgotten.
* Decrease the time blockfetch waits for chainsync to exit in case of an error
* Increase the timeout for chainsync in state StMustReply to between 601 and 911 seconds.

## 0.21.4.0 -- 2025-10-05

### Non-breaking changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2378,7 +2378,7 @@ prop_diffusion_target_established_local ioSimTrace traceNumber =
(fromMaybe Set.empty)
. Signal.fromEvents
. Signal.selectEvents
(\case TracePromoteColdFailed _ _ peer _ _ ->
(\case TracePromoteColdFailed _ _ peer _ _ _ ->
Just (Set.singleton peer)
--TODO: what about TraceDemoteWarmDone ?
-- these are also not immediate candidates
Expand Down Expand Up @@ -3018,7 +3018,7 @@ prop_diffusion_async_demotions ioSimTrace traceNumber =
Just $ Stop failures
where
failures = Set.singleton peeraddr
TracePromoteColdFailed _ _ peeraddr _ _ ->
TracePromoteColdFailed _ _ peeraddr _ _ _ ->
Just $ Stop failures
where
failures = Set.singleton peeraddr
Expand All @@ -3030,7 +3030,7 @@ prop_diffusion_async_demotions ioSimTrace traceNumber =
Just $ Stop failures
where
failures = Set.singleton peeraddr
TracePromoteColdBigLedgerPeerFailed _ _ peeraddr _ _ ->
TracePromoteColdBigLedgerPeerFailed _ _ peeraddr _ _ _ ->
Just $ Stop failures
where
failures = Set.singleton peeraddr
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2514,7 +2514,7 @@ prop_governor_target_established_below (MaxTime maxTime) env =
(fromMaybe Set.empty)
. Signal.fromEvents
. Signal.selectEvents
(\case TracePromoteColdFailed _ _ peer _ _ ->
(\case TracePromoteColdFailed _ _ peer _ _ _ ->
--TODO: the environment does not yet cause this to happen
-- it requires synchronous failure in the establish action
Just $! Set.singleton peer
Expand Down Expand Up @@ -2623,7 +2623,7 @@ prop_governor_target_established_big_ledger_peers_below (MaxTime maxTime) env =
(fromMaybe Set.empty)
. Signal.fromEvents
. Signal.selectEvents
(\case TracePromoteColdBigLedgerPeerFailed _ _ peer _ _ ->
(\case TracePromoteColdBigLedgerPeerFailed _ _ peer _ _ _ ->
--TODO: the environment does not yet cause this to happen
-- it requires synchronous failure in the establish action
Just (Set.singleton peer)
Expand Down Expand Up @@ -3261,7 +3261,7 @@ prop_governor_target_established_local (MaxTime maxTime) env =
(fromMaybe Set.empty)
. Signal.fromEvents
. Signal.selectEvents
(\case TracePromoteColdFailed _ _ peer _ _ ->
(\case TracePromoteColdFailed _ _ peer _ _ _ ->
--TODO: the environment does not yet cause this to happen
-- it requires synchronous failure in the establish action
Just (Set.singleton peer)
Expand Down Expand Up @@ -4108,6 +4108,8 @@ _governorFindingPublicRoots targetNumberOfRootPeers readDomains readUseBootstrap
policyPeerShareBatchWaitTime = 0, -- seconds
policyPeerShareOverallTimeout = 0, -- seconds
policyPeerShareActivationDelay = 2, -- seconds
policyMaxConnectionRetries = 5,
policyClearFailCountDelay = 120, --seconds
policyErrorDelay = 0 -- seconds
}
pickTrivially :: Applicative m => Set SockAddr -> Int -> m (Set SockAddr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,8 @@ mockPeerSelectionPolicy GovernorMockEnvironment {
policyPeerShareBatchWaitTime = 3, -- seconds
policyPeerShareOverallTimeout = 10, -- seconds
policyPeerShareActivationDelay = 300, -- seconds
policyMaxConnectionRetries = 5,
policyClearFailCountDelay = 120, -- seconds
policyErrorDelay = 10 -- seconds
}

Expand Down Expand Up @@ -751,64 +753,64 @@ tracerTracePeerSelection = contramap f tracerTestTraceEvent
-- make the tracer strict
f :: TracePeerSelection extraState extraFlags extraPeers PeerAddr
-> TestTraceEvent extraState extraFlags extraPeers extraCounters
f a@(TraceLocalRootPeersChanged !_ !_) = GovernorEvent a
f a@(TraceTargetsChanged !_ !_) = GovernorEvent a
f a@(TracePublicRootsRequest !_ !_) = GovernorEvent a
f a@(TracePublicRootsResults !_ !_ !_) = GovernorEvent a
f a@(TracePublicRootsFailure !_ !_ !_) = GovernorEvent a
f a@(TraceForgetColdPeers !_ !_ !_) = GovernorEvent a
f a@(TraceBigLedgerPeersRequest !_ !_) = GovernorEvent a
f a@(TraceBigLedgerPeersResults !_ !_ !_) = GovernorEvent a
f a@(TraceBigLedgerPeersFailure !_ !_ !_) = GovernorEvent a
f a@(TraceForgetBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TracePickInboundPeers !_ !_ !_ !_) = GovernorEvent a
f a@(TracePeerShareRequests !_ !_ !_ !_ !_) = GovernorEvent a
f a@(TracePeerShareResults !_) = GovernorEvent a
f a@(TracePeerShareResultsFiltered !_) = GovernorEvent a
f a@(TracePromoteColdPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdLocalPeers !_ !_) = GovernorEvent a
f a@(TracePromoteColdFailed !_ !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdBigLedgerPeerFailed !_ !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmLocalPeers !_ !_) = GovernorEvent a
f a@(TracePromoteWarmFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmAborted !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeerFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeerAborted !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmBigLedgerPeerFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteLocalHotPeers !_ !_) = GovernorEvent a
f a@(TraceDemoteHotFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotBigLedgerPeerFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteAsynchronous !_) = GovernorEvent a
f a@(TraceDemoteLocalAsynchronous !_) = GovernorEvent a
f a@(TraceDemoteBigLedgerPeersAsynchronous !_) = GovernorEvent a
f a@TraceGovernorWakeup = GovernorEvent a
f a@(TraceChurnWait !_) = GovernorEvent a
f a@(TraceChurnMode !_) = GovernorEvent a
f a@(TraceLedgerStateJudgementChanged !_) = GovernorEvent a
f a@TraceOnlyBootstrapPeers = GovernorEvent a
f a@TraceBootstrapPeersFlagChangedWhilstInSensitiveState = GovernorEvent a
f a@(TraceUseBootstrapPeersChanged !_) = GovernorEvent a
f a@(TraceOutboundGovernorCriticalFailure !_) = GovernorEvent a
f a@(TraceDebugState !_ !_) = GovernorEvent a
f a@(TraceChurnAction !_ !_ !_) = GovernorEvent a
f a@(TraceChurnTimeout !_ !_ !_) = GovernorEvent a
f a@(TraceVerifyPeerSnapshot !_) = GovernorEvent a
f a@(TraceLocalRootPeersChanged !_ !_) = GovernorEvent a
f a@(TraceTargetsChanged !_ !_) = GovernorEvent a
f a@(TracePublicRootsRequest !_ !_) = GovernorEvent a
f a@(TracePublicRootsResults !_ !_ !_) = GovernorEvent a
f a@(TracePublicRootsFailure !_ !_ !_) = GovernorEvent a
f a@(TraceForgetColdPeers !_ !_ !_) = GovernorEvent a
f a@(TraceBigLedgerPeersRequest !_ !_) = GovernorEvent a
f a@(TraceBigLedgerPeersResults !_ !_ !_) = GovernorEvent a
f a@(TraceBigLedgerPeersFailure !_ !_ !_) = GovernorEvent a
f a@(TraceForgetBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TracePickInboundPeers !_ !_ !_ !_) = GovernorEvent a
f a@(TracePeerShareRequests !_ !_ !_ !_ !_) = GovernorEvent a
f a@(TracePeerShareResults !_) = GovernorEvent a
f a@(TracePeerShareResultsFiltered !_) = GovernorEvent a
f a@(TracePromoteColdPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdLocalPeers !_ !_) = GovernorEvent a
f a@(TracePromoteColdFailed !_ !_ !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdBigLedgerPeerFailed !_ !_ !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteColdBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmLocalPeers !_ !_) = GovernorEvent a
f a@(TracePromoteWarmFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmAborted !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeerFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TracePromoteWarmBigLedgerPeerAborted !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmBigLedgerPeerFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteWarmBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteLocalHotPeers !_ !_) = GovernorEvent a
f a@(TraceDemoteHotFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotBigLedgerPeers !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotBigLedgerPeerFailed !_ !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteHotBigLedgerPeerDone !_ !_ !_) = GovernorEvent a
f a@(TraceDemoteAsynchronous !_) = GovernorEvent a
f a@(TraceDemoteLocalAsynchronous !_) = GovernorEvent a
f a@(TraceDemoteBigLedgerPeersAsynchronous !_) = GovernorEvent a
f a@TraceGovernorWakeup = GovernorEvent a
f a@(TraceChurnWait !_) = GovernorEvent a
f a@(TraceChurnMode !_) = GovernorEvent a
f a@(TraceLedgerStateJudgementChanged !_) = GovernorEvent a
f a@TraceOnlyBootstrapPeers = GovernorEvent a
f a@TraceBootstrapPeersFlagChangedWhilstInSensitiveState = GovernorEvent a
f a@(TraceUseBootstrapPeersChanged !_) = GovernorEvent a
f a@(TraceOutboundGovernorCriticalFailure !_) = GovernorEvent a
f a@(TraceDebugState !_ !_) = GovernorEvent a
f a@(TraceChurnAction !_ !_ !_) = GovernorEvent a
f a@(TraceChurnTimeout !_ !_ !_) = GovernorEvent a
f a@(TraceVerifyPeerSnapshot !_) = GovernorEvent a

tracerDebugPeerSelection :: Tracer (IOSim s) (DebugPeerSelection Cardano.ExtraState PeerTrustable (Cardano.ExtraPeers PeerAddr) PeerAddr)
tracerDebugPeerSelection = GovernorDebug `contramap` tracerTestTraceEvent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,11 @@ bracketFetchClient (FetchClientRegistry ctxVar
fetchRegistry syncRegistry dqRegistry keepRegistry dyingRegistry)
_version peer action = do
ksVar <- newEmptyTMVarIO
bracket (register ksVar) (uncurry (unregister ksVar)) (action . fst)
fst <$> generalBracket (register ksVar) (unregister ksVar) (action . fst)
where
onExceptionTimeout :: DiffTime
onExceptionTimeout = 1

register :: StrictTMVar m ()
-> m ( FetchClientContext header block m
, (ThreadId m, StrictTMVar m ()) )
Expand Down Expand Up @@ -157,11 +160,15 @@ bracketFetchClient (FetchClientRegistry ctxVar
)

unregister :: StrictTMVar m ()
-> FetchClientContext header block m
-> (ThreadId m, StrictTMVar m ())
-> ( FetchClientContext header block m
, (ThreadId m, StrictTMVar m ()) )
-> ExitCase a
-> m ()
unregister ksVar FetchClientContext { fetchClientCtxStateVars = stateVars }
(tid, doneVar) = uninterruptibleMask $ \unmask -> do
unregister ksVar (FetchClientContext { fetchClientCtxStateVars = stateVars },
(tid, doneVar)) exitCase = uninterruptibleMask $ \unmask -> do
let timeoutLimit = case exitCase of
ExitCaseSuccess _ -> deactivateTimeout
_ -> onExceptionTimeout
dead <- do
-- Signal we are shutting down
dieFast <- atomically $ do
Expand All @@ -183,7 +190,7 @@ bracketFetchClient (FetchClientRegistry ctxVar
else do
-- Give the sync client a chance to exit cleanly before killing it.
res <- onException
(unmask $ timeout deactivateTimeout $ atomically $ readTMVar doneVar)
(unmask $ timeout timeoutLimit $ atomically $ readTMVar doneVar)
(-- no time to wait, die die die!
uninterruptibleMask_ $ do
throwTo tid AsyncCancelled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -991,10 +991,10 @@ fetchRequestDecisions fetchDecisionPolicy fetchMode chains =
nActivePeers :: Set peer
nActivePeers =
Set.fromList
. map snd
. filter (\(inFlight, _) -> inFlight > 0)
. map (\(_, _, PeerFetchInFlight{peerFetchReqsInFlight}, _, p, _) ->
(peerFetchReqsInFlight, p))
. map (\(_,_,e) -> e)
. filter (\(s, inFlight, _) -> inFlight > 0 && s /= PeerFetchStatusShutdown)
. map (\(_, s, PeerFetchInFlight{peerFetchReqsInFlight}, _, p, _) ->
(s, peerFetchReqsInFlight, p))
$ chains

-- Order the peers based on current PeerGSV. The top performing peers will be
Expand All @@ -1009,6 +1009,7 @@ fetchRequestDecisions fetchDecisionPolicy fetchMode chains =
. take (fromIntegral maxConcurrentFetchPeers)
. sortBy (\a b -> comparePeerGSV nActivePeers (peerSalt fetchDecisionPolicy) a b)
. map (\(_, _, _, gsv, p, _) -> (gsv, p))
. filter (\(_, s, _, _, _, _) -> s /= PeerFetchStatusShutdown)
$ chains

maxConcurrentFetchPeers :: Word
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ closeConnectionTimeout = 120
-- | Chain sync `mustReplayTimeout` lower bound.
--
minChainSyncTimeout :: DiffTime
minChainSyncTimeout = 135
minChainSyncTimeout = 601


-- | Chain sync `mustReplayTimeout` upper bound.
--
maxChainSyncTimeout :: DiffTime
maxChainSyncTimeout = 269
maxChainSyncTimeout = 911

-- | Churn timeouts after 60s trying to establish a connection.
--
Expand Down Expand Up @@ -126,6 +126,8 @@ simplePeerSelectionPolicy rngVar metrics errorDelay = PeerSelectionPolicy {
policyPeerShareBatchWaitTime = 3, -- seconds
policyPeerShareOverallTimeout = 10, -- seconds
policyPeerShareActivationDelay = 300, -- seconds
policyMaxConnectionRetries = 5,
policyClearFailCountDelay = 120, -- seconds

policyErrorDelay = ExitPolicy.repromoteDelay errorDelay
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ peerSelectionGovernorLoop tracer
-- Make sure preBlocking set is in the right place
preBlocking policy actions st

<> Monitor.connections actions st
<> Monitor.connections actions policy st
<> Monitor.jobs jobPool st
-- This job monitors for changes in big ledger peer snapshot file (eg. reload)
-- and copies it into the governor's private state. When a change is detected,
Expand Down
Loading
Loading