-
Notifications
You must be signed in to change notification settings - Fork 485
fix(CubeMaster): expire node health after stale nodemeta heartbeat #455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,11 @@ type Node struct { | |
|
|
||
| MetaDataUpdateAt time.Time `json:"MetaDataUpdateAt,omitempty"` | ||
|
|
||
| Healthy bool `json:"Healthy,omitempty"` | ||
| ReportedReady bool `json:"-"` | ||
|
|
||
| Healthy bool `json:"Healthy"` | ||
|
|
||
| UnhealthyReason string `json:"UnhealthyReason,omitempty"` | ||
|
|
||
| Score float64 `json:"Score,omitempty"` | ||
|
|
||
|
|
@@ -84,6 +88,22 @@ type Node struct { | |
| NicQueues int64 `json:"nic_queues,omitempty"` | ||
| } | ||
|
|
||
| func (n *Node) Clone() *Node { | ||
| if n == nil { | ||
| return nil | ||
| } | ||
| // Clone provides a best-effort read-side snapshot. Mutable counters such | ||
| // as LocalCreateNum are refreshed via atomic loads after the structural | ||
| // copy so cloned read models stay aligned with the write path. | ||
| localCreateNum := atomic.LoadInt64(&n.LocalCreateNum) | ||
| cloned := *n | ||
| cloned.LocalCreateNum = localCreateNum | ||
| if n.VirtualNodeQuotaArray != nil { | ||
| cloned.VirtualNodeQuotaArray = append([]int64(nil), n.VirtualNodeQuotaArray...) | ||
| } | ||
| return &cloned | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The struct copy Per the Go memory model this is a data race and would be flagged by If this project only targets amd64, a brief comment acknowledging the deliberate non-atomic copy would help future readers (and suppress race-detector noise). Otherwise, consider using |
||
|
|
||
| func (n *Node) ID() string { | ||
| if n.InsID == "" { | ||
| return n.IP | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| package nodehealth | ||
|
|
||
| import ( | ||
| "time" | ||
|
|
||
| corev1 "k8s.io/api/core/v1" | ||
| ) | ||
|
|
||
| const ( | ||
| ReasonReportedNotReady = "ReportedNotReady" | ||
| ReasonHeartbeatExpired = "HeartbeatExpired" | ||
| ) | ||
|
|
||
| type Status struct { | ||
| Healthy bool | ||
| UnhealthyReason string | ||
| } | ||
|
|
||
| func MetadataTimeout(syncMetaDataInterval time.Duration) time.Duration { | ||
| return syncMetaDataInterval + 10*time.Second | ||
| } | ||
|
|
||
| func ReadyConditionTrue(conditions []corev1.NodeCondition) bool { | ||
| for _, cond := range conditions { | ||
| if cond.Type == corev1.NodeReady { | ||
| return cond.Status == corev1.ConditionTrue | ||
| } | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| func EvaluateFromFacts(reportedReady bool, heartbeatTime, now time.Time, timeout time.Duration) Status { | ||
| if heartbeatTime.IsZero() || now.Sub(heartbeatTime) > timeout { | ||
| return Status{Healthy: false, UnhealthyReason: ReasonHeartbeatExpired} | ||
| } | ||
| if !reportedReady { | ||
| return Status{Healthy: false, UnhealthyReason: ReasonReportedNotReady} | ||
| } | ||
| return Status{Healthy: true} | ||
| } | ||
|
|
||
| func Evaluate(conditions []corev1.NodeCondition, heartbeatTime, now time.Time, timeout time.Duration) Status { | ||
| return EvaluateFromFacts(ReadyConditionTrue(conditions), heartbeatTime, now, timeout) | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
📢 API contract change:
omitemptyremovedThe
Healthyfield changed fromjson:"Healthy,omitempty"tojson:"Healthy", meaning"healthy":falseis now always present in JSON output instead of being omitted. This is deliberate (the nodemeta test confirms it), but it is a wire-format change that existing API consumers may depend on.Consider calling this out in the PR description and the next changelog entry so integrators know to expect the new field shape.