diff --git a/api/flowcollector/v1beta2/flowcollector_alert_types.go b/api/flowcollector/v1beta2/flowcollector_alert_types.go
index bb6b84c63b..9bd10ef80e 100644
--- a/api/flowcollector/v1beta2/flowcollector_alert_types.go
+++ b/api/flowcollector/v1beta2/flowcollector_alert_types.go
@@ -34,7 +34,7 @@ const (
HealthRuleExternalEgressHighTrend HealthRuleTemplate = "ExternalEgressHighTrend"
HealthRuleExternalIngressHighTrend HealthRuleTemplate = "ExternalIngressHighTrend"
HealthRuleIngress5xxErrors HealthRuleTemplate = "Ingress5xxErrors"
- HealthRuleIngressLatencyTrend HealthRuleTemplate = "IngressLatencyTrend"
+ HealthRuleIngressHTTPLatencyTrend HealthRuleTemplate = "IngressHTTPLatencyTrend"
GroupByNode HealthRuleGroupBy = "Node"
GroupByNamespace HealthRuleGroupBy = "Namespace"
@@ -47,10 +47,10 @@ const (
type FLPHealthRule struct {
// Health rule template name.
// Possible values are: `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- // `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressLatencyTrend`.
+ // `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
// Note: `NetObservNoFlows` and `NetObservLokiError` are alert-only and cannot be used as health rules.
- // More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
- // +kubebuilder:validation:Enum:="PacketDropsByKernel";"PacketDropsByDevice";"IPsecErrors";"NetpolDenied";"LatencyHighTrend";"DNSErrors";"DNSNxDomain";"ExternalEgressHighTrend";"ExternalIngressHighTrend";"Ingress5xxErrors";"IngressLatencyTrend"
+ // More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
+ // +kubebuilder:validation:Enum:="PacketDropsByKernel";"PacketDropsByDevice";"IPsecErrors";"NetpolDenied";"LatencyHighTrend";"DNSErrors";"DNSNxDomain";"ExternalEgressHighTrend";"ExternalIngressHighTrend";"Ingress5xxErrors";"IngressHTTPLatencyTrend"
// +required
Template HealthRuleTemplate `json:"template,omitempty"`
@@ -211,7 +211,7 @@ func (g *FLPHealthRule) IsAllowed(spec *FlowCollectorSpec) (bool, string) {
if !spec.Agent.EBPF.IsNetworkEventsEnabled() {
return false, fmt.Sprintf("HealthRule %s requires the %s agent feature to be enabled", g.Template, NetworkEvents)
}
- case AlertNoFlows, AlertLokiError, HealthRulePacketDropsByDevice, HealthRuleExternalEgressHighTrend, HealthRuleExternalIngressHighTrend, HealthRuleIngress5xxErrors, HealthRuleIngressLatencyTrend:
+ case AlertNoFlows, AlertLokiError, HealthRulePacketDropsByDevice, HealthRuleExternalEgressHighTrend, HealthRuleExternalIngressHighTrend, HealthRuleIngress5xxErrors, HealthRuleIngressHTTPLatencyTrend:
return true, ""
}
return true, ""
diff --git a/api/flowcollector/v1beta2/flowcollector_defaults.go b/api/flowcollector/v1beta2/flowcollector_defaults.go
index d18de2e172..0b91ededc3 100644
--- a/api/flowcollector/v1beta2/flowcollector_defaults.go
+++ b/api/flowcollector/v1beta2/flowcollector_defaults.go
@@ -204,7 +204,7 @@ var (
},
},
{
- Template: HealthRuleIngressLatencyTrend,
+ Template: HealthRuleIngressHTTPLatencyTrend,
Variants: []HealthRuleVariant{
{
Thresholds: HealthRuleThresholds{
diff --git a/api/flowcollector/v1beta2/flowcollector_types.go b/api/flowcollector/v1beta2/flowcollector_types.go
index 3be46f5318..8ceef368ec 100644
--- a/api/flowcollector/v1beta2/flowcollector_types.go
+++ b/api/flowcollector/v1beta2/flowcollector_types.go
@@ -588,14 +588,14 @@ type FLPMetrics struct {
// `disableAlerts` is a list of alert groups that should be disabled from the default set of alerts.
// Possible values are: `NetObservNoFlows`, `NetObservLokiError`, `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- // `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
- // More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ // `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+ // More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
// +optional
DisableAlerts []HealthRuleTemplate `json:"disableAlerts"`
// `healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
// Each health rule can be configured to generate either alerts or recording rules based on the mode field.
- // More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ // More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
// +optional
HealthRules *[]FLPHealthRule `json:"healthRules"`
}
@@ -901,6 +901,7 @@ type LokiMicroservicesParams struct {
type LokiMonolithParams struct {
// Set `installDemoLoki` to `true` to automatically create Loki deployment, service and storage.
// This is useful for development and demo purposes. Do not use it in production.
+ // [Unsupported (*)].
//+kubebuilder:default:=false
InstallDemoLoki *bool `json:"installDemoLoki,omitempty"`
diff --git a/api/flowcollector/v1beta2/flowcollector_validation_webhook.go b/api/flowcollector/v1beta2/flowcollector_validation_webhook.go
index 5ae837743b..418bca691e 100644
--- a/api/flowcollector/v1beta2/flowcollector_validation_webhook.go
+++ b/api/flowcollector/v1beta2/flowcollector_validation_webhook.go
@@ -394,7 +394,7 @@ func (v *validator) isFLPHealthRuleGroupBySupported(template HealthRuleTemplate,
return variant.GroupBy != GroupByWorkload
case HealthRuleIPsecErrors:
return variant.GroupBy != GroupByWorkload && variant.GroupBy != GroupByNamespace
- case HealthRuleIngress5xxErrors, HealthRuleIngressLatencyTrend:
+ case HealthRuleIngress5xxErrors, HealthRuleIngressHTTPLatencyTrend:
return variant.GroupBy != GroupByNode && variant.GroupBy != GroupByWorkload
case HealthRulePacketDropsByKernel, HealthRuleDNSErrors, HealthRuleDNSNxDomain, HealthRuleExternalEgressHighTrend, HealthRuleExternalIngressHighTrend, HealthRuleLatencyHighTrend, HealthRuleNetpolDenied:
return true
@@ -463,7 +463,7 @@ func GetElligibleMetricsForAlert(template HealthRuleTemplate, alertDef *HealthRu
case HealthRuleNetpolDenied:
metricPatterns = []string{`%s_network_policy_events_total`}
totalMetricPatterns = []string{"%s_flows_total"}
- case AlertNoFlows, AlertLokiError, HealthRulePacketDropsByDevice, HealthRuleIngress5xxErrors, HealthRuleIngressLatencyTrend:
+ case AlertNoFlows, AlertLokiError, HealthRulePacketDropsByDevice, HealthRuleIngress5xxErrors, HealthRuleIngressHTTPLatencyTrend:
// nothing - these rules don't use NetObserv metrics
return nil, nil
}
diff --git a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml
index 9872b57ad7..168fcc5420 100644
--- a/bundle/manifests/flows.netobserv.io_flowcollectors.yaml
+++ b/bundle/manifests/flows.netobserv.io_flowcollectors.yaml
@@ -4156,6 +4156,7 @@ spec:
description: |-
Set `installDemoLoki` to `true` to automatically create Loki deployment, service and storage.
This is useful for development and demo purposes. Do not use it in production.
+ [Unsupported (*)].
type: boolean
tenantID:
default: netobserv
@@ -5835,8 +5836,8 @@ spec:
description: |-
`disableAlerts` is a list of alert groups that should be disabled from the default set of alerts.
Possible values are: `NetObservNoFlows`, `NetObservLokiError`, `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
- More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+ More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
items:
type: string
type: array
@@ -5844,7 +5845,7 @@ spec:
description: |-
`healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
Each health rule can be configured to generate either alerts or recording rules based on the mode field.
- More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
items:
properties:
mode:
@@ -5863,9 +5864,9 @@ spec:
description: |-
Health rule template name.
Possible values are: `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressLatencyTrend`.
+ `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
Note: `NetObservNoFlows` and `NetObservLokiError` are alert-only and cannot be used as health rules.
- More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
enum:
- PacketDropsByKernel
- PacketDropsByDevice
@@ -5877,7 +5878,7 @@ spec:
- ExternalEgressHighTrend
- ExternalIngressHighTrend
- Ingress5xxErrors
- - IngressLatencyTrend
+ - IngressHTTPLatencyTrend
type: string
variants:
description: A list of variants for this template
diff --git a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml
index 15d5f5d5b5..662e6bba73 100644
--- a/bundle/manifests/netobserv-operator.clusterserviceversion.yaml
+++ b/bundle/manifests/netobserv-operator.clusterserviceversion.yaml
@@ -525,10 +525,10 @@ spec:
name: flowmetrics.flows.netobserv.io
version: v1alpha1
description: |-
- NetObserv Operator is an OpenShift / Kubernetes operator for network observability. It deploys a monitoring pipeline that consists in:
+ NetObserv Operator is an OpenShift / Kubernetes operator for network observability. It deploys a monitoring pipeline consisting in:
- an eBPF agent, that generates network flows from captured packets
- flowlogs-pipeline, a component that collects, enriches and exports these flows
- - when used in OpenShift, a Console plugin for flows visualization with powerful filtering options, a topology representation and more
+ - a web console for flows visualization with powerful filtering options, a topology representation and more
Flow data is then available in multiple ways, each optional:
@@ -548,16 +548,20 @@ spec:
- Installing using [Grafana's official documentation](https://grafana.com/docs/loki/latest/). Here also we wrote a ["distributed Loki" step by step guide](https://github.com/netobserv/documents/blob/main/loki_distributed.md).
- For a quick try that is not suitable for production and not scalable (it deploys a single pod, configures a 10GB storage PVC, with 24 hours of retention), you can simply run the following commands:
+ For a quick try that is not suitable for production and not scalable, the demo mode can be configured in `FlowCollector` with:
- ```
- kubectl create namespace netobserv
- kubectl apply -f <(curl -L https://raw.githubusercontent.com/netobserv/documents/5410e65b8e05aaabd1244a9524cfedd8ac8c56b5/examples/zero-click-loki/1-storage.yaml) -n netobserv
- kubectl apply -f <(curl -L https://raw.githubusercontent.com/netobserv/documents/5410e65b8e05aaabd1244a9524cfedd8ac8c56b5/examples/zero-click-loki/2-loki.yaml) -n netobserv
+ ```yaml
+ spec:
+ loki:
+ mode: Monolithic
+ monolithic:
+ installDemoLoki: true
```
+ It deploys a single pod, configures a 10GB storage PVC, with 24 hours of retention.
+
If you prefer to not use Loki, you must set `spec.loki.enable` to `false` in `FlowCollector`.
- In that case, you can still get the Prometheus metrics or export raw flows to a custom collector. But be aware that some of the Console plugin features will be disabled. For instance, you will not be able to view raw flows there, and the metrics / topology will have a more limited level of details, missing information such as pods or IPs.
+ In that case, you still get the Prometheus metrics or export raw flows to a custom collector. But be aware that some of the Console plugin features will be disabled. For instance, you will not be able to view raw flows there, and the metrics / topology will have a more limited level of details, missing information such as pods or IPs.
### Kafka
@@ -585,8 +589,6 @@ spec:
- Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned above, but you might have to configure differently if you used another installation method. Make sure to disable it (`spec.loki.enable`) if you don't want to use Loki.
- - Quick filters (`spec.consolePlugin.quickFilters`): configure preset filters to be displayed in the Console plugin. They offer a way to quickly switch from filters to others, such as showing / hiding pods network, or infrastructure network, or application network, etc. They can be tuned to reflect the different workloads running on your cluster. For a list of available filters, [check this page](https://github.com/netobserv/network-observability-operator/blob/1.10.1-community/docs/QuickFilters.md).
-
- Kafka (`spec.deploymentModel: Kafka` and `spec.kafka`): when enabled, integrates the flow collection pipeline with Kafka, by splitting ingestion from transformation (kube enrichment, derived metrics, ...). Kafka can provide better scalability, resiliency and high availability ([view more details](https://www.redhat.com/en/topics/integration/what-is-apache-kafka)). Assumes Kafka is already deployed and a topic is created.
- Exporters (`spec.exporters`) an optional list of exporters to which to send enriched flows. KAFKA and IPFIX exporters are supported. This allows you to define any custom storage or processing that can read from Kafka or use the IPFIX standard.
diff --git a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml
index b1f7cf234f..3ccc59be57 100644
--- a/config/crd/bases/flows.netobserv.io_flowcollectors.yaml
+++ b/config/crd/bases/flows.netobserv.io_flowcollectors.yaml
@@ -3824,6 +3824,7 @@ spec:
description: |-
Set `installDemoLoki` to `true` to automatically create Loki deployment, service and storage.
This is useful for development and demo purposes. Do not use it in production.
+ [Unsupported (*)].
type: boolean
tenantID:
default: netobserv
@@ -5397,8 +5398,8 @@ spec:
description: |-
`disableAlerts` is a list of alert groups that should be disabled from the default set of alerts.
Possible values are: `NetObservNoFlows`, `NetObservLokiError`, `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
- More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+ More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
items:
type: string
type: array
@@ -5406,7 +5407,7 @@ spec:
description: |-
`healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
Each health rule can be configured to generate either alerts or recording rules based on the mode field.
- More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
items:
properties:
mode:
@@ -5425,9 +5426,9 @@ spec:
description: |-
Health rule template name.
Possible values are: `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressLatencyTrend`.
+ `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
Note: `NetObservNoFlows` and `NetObservLokiError` are alert-only and cannot be used as health rules.
- More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
enum:
- PacketDropsByKernel
- PacketDropsByDevice
@@ -5439,7 +5440,7 @@ spec:
- ExternalEgressHighTrend
- ExternalIngressHighTrend
- Ingress5xxErrors
- - IngressLatencyTrend
+ - IngressHTTPLatencyTrend
type: string
variants:
description: A list of variants for this template
diff --git a/config/descriptions/ocp.md b/config/descriptions/ocp.md
index 9007a96851..dd19fe78ee 100644
--- a/config/descriptions/ocp.md
+++ b/config/descriptions/ocp.md
@@ -13,20 +13,24 @@ Flow data is then available in multiple ways, each optional:
### Loki
-[Loki](https://grafana.com/oss/loki/), from GrafanaLabs, can optionally be used as the backend to store all collected flows. The Network Observability operator does not install Loki directly, however we provide some guidance to help you there.
+[Loki](https://grafana.com/oss/loki/), from GrafanaLabs, can optionally be used as the backend to store all collected flows. The Network Observability operator does not install Loki directly, except in demo mode; however we provide some guidance to help you there.
- For a production or production-like environment usage, refer to [the operator documentation](https://docs.redhat.com/en/documentation/openshift_container_platform/latest/html/network_observability/installing-network-observability-operators).
-- For a quick try that is not suitable for production and not scalable (it deploys a single pod, configures a 10GB storage PVC, with 24 hours of retention), you can simply run the following commands:
+- For a quick try that is not suitable for production and not scalable, the demo mode can be configured in `FlowCollector` with:
+```yaml
+spec:
+ loki:
+ mode: Monolithic
+ monolithic:
+ installDemoLoki: true
```
-oc create namespace netobserv
-oc apply -f <(curl -L https://raw.githubusercontent.com/netobserv/documents/5410e65b8e05aaabd1244a9524cfedd8ac8c56b5/examples/zero-click-loki/1-storage.yaml) -n netobserv
-oc apply -f <(curl -L https://raw.githubusercontent.com/netobserv/documents/5410e65b8e05aaabd1244a9524cfedd8ac8c56b5/examples/zero-click-loki/2-loki.yaml) -n netobserv
-```
+
+It deploys a single pod, configures a 10GB storage PVC, with 24 hours of retention.
If you prefer to not use Loki, you must set `spec.loki.enable` to `false` in `FlowCollector`.
-In that case, you can still get the Prometheus metrics or export raw flows to a custom collector. But be aware that some of the Console plugin features will be disabled. For instance, you will not be able to view raw flows there, and the metrics / topology will have a more limited level of details, missing information such as pods or IPs.
+In that case, you still get the Prometheus metrics or export raw flows to a custom collector. But be aware that some of the Console plugin features will be disabled. For instance, you will not be able to view raw flows there, and the metrics / topology will have a more limited level of details, missing information such as pods or IPs.
### Kafka
@@ -54,8 +58,6 @@ A couple of settings deserve special attention:
- Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned above, but you might have to configure differently if you used another installation method. Make sure to disable it (`spec.loki.enable`) if you don't want to use Loki.
-- Quick filters (`spec.consolePlugin.quickFilters`): configure preset filters to be displayed in the Console plugin. They offer a way to quickly switch from filters to others, such as showing / hiding pods network, or infrastructure network, or application network, etc. They can be tuned to reflect the different workloads running on your cluster. For a list of available filters, [check this page](https://github.com/netobserv/network-observability-operator/blob/1.10.1-community/docs/QuickFilters.md).
-
- Kafka (`spec.deploymentModel: Kafka` and `spec.kafka`): when enabled, integrates the flow collection pipeline with Kafka, by splitting ingestion from transformation (kube enrichment, derived metrics, ...). Kafka can provide better scalability, resiliency and high availability ([view more details](https://www.redhat.com/en/topics/integration/what-is-apache-kafka)). Assumes Kafka is already deployed and a topic is created.
- Exporters (`spec.exporters`) an optional list of exporters to which to send enriched flows. KAFKA and IPFIX exporters are supported. This allows you to define any custom storage or processing that can read from Kafka or use the IPFIX standard.
diff --git a/config/descriptions/upstream.md b/config/descriptions/upstream.md
index 78c6f76af1..884196ac06 100644
--- a/config/descriptions/upstream.md
+++ b/config/descriptions/upstream.md
@@ -1,7 +1,7 @@
-NetObserv Operator is an OpenShift / Kubernetes operator for network observability. It deploys a monitoring pipeline that consists in:
+NetObserv Operator is an OpenShift / Kubernetes operator for network observability. It deploys a monitoring pipeline consisting in:
- an eBPF agent, that generates network flows from captured packets
- flowlogs-pipeline, a component that collects, enriches and exports these flows
-- when used in OpenShift, a Console plugin for flows visualization with powerful filtering options, a topology representation and more
+- a web console for flows visualization with powerful filtering options, a topology representation and more
Flow data is then available in multiple ways, each optional:
@@ -21,16 +21,20 @@ For normal usage, we recommend two options:
- Installing using [Grafana's official documentation](https://grafana.com/docs/loki/latest/). Here also we wrote a ["distributed Loki" step by step guide](https://github.com/netobserv/documents/blob/main/loki_distributed.md).
-For a quick try that is not suitable for production and not scalable (it deploys a single pod, configures a 10GB storage PVC, with 24 hours of retention), you can simply run the following commands:
+For a quick try that is not suitable for production and not scalable, the demo mode can be configured in `FlowCollector` with:
+```yaml
+spec:
+ loki:
+ mode: Monolithic
+ monolithic:
+ installDemoLoki: true
```
-kubectl create namespace netobserv
-kubectl apply -f <(curl -L https://raw.githubusercontent.com/netobserv/documents/5410e65b8e05aaabd1244a9524cfedd8ac8c56b5/examples/zero-click-loki/1-storage.yaml) -n netobserv
-kubectl apply -f <(curl -L https://raw.githubusercontent.com/netobserv/documents/5410e65b8e05aaabd1244a9524cfedd8ac8c56b5/examples/zero-click-loki/2-loki.yaml) -n netobserv
-```
+
+It deploys a single pod, configures a 10GB storage PVC, with 24 hours of retention.
If you prefer to not use Loki, you must set `spec.loki.enable` to `false` in `FlowCollector`.
-In that case, you can still get the Prometheus metrics or export raw flows to a custom collector. But be aware that some of the Console plugin features will be disabled. For instance, you will not be able to view raw flows there, and the metrics / topology will have a more limited level of details, missing information such as pods or IPs.
+In that case, you still get the Prometheus metrics or export raw flows to a custom collector. But be aware that some of the Console plugin features will be disabled. For instance, you will not be able to view raw flows there, and the metrics / topology will have a more limited level of details, missing information such as pods or IPs.
### Kafka
@@ -58,8 +62,6 @@ A couple of settings deserve special attention:
- Loki (`spec.loki`): configure here how to reach Loki. The default values match the Loki quick install paths mentioned above, but you might have to configure differently if you used another installation method. Make sure to disable it (`spec.loki.enable`) if you don't want to use Loki.
-- Quick filters (`spec.consolePlugin.quickFilters`): configure preset filters to be displayed in the Console plugin. They offer a way to quickly switch from filters to others, such as showing / hiding pods network, or infrastructure network, or application network, etc. They can be tuned to reflect the different workloads running on your cluster. For a list of available filters, [check this page](https://github.com/netobserv/network-observability-operator/blob/1.10.1-community/docs/QuickFilters.md).
-
- Kafka (`spec.deploymentModel: Kafka` and `spec.kafka`): when enabled, integrates the flow collection pipeline with Kafka, by splitting ingestion from transformation (kube enrichment, derived metrics, ...). Kafka can provide better scalability, resiliency and high availability ([view more details](https://www.redhat.com/en/topics/integration/what-is-apache-kafka)). Assumes Kafka is already deployed and a topic is created.
- Exporters (`spec.exporters`) an optional list of exporters to which to send enriched flows. KAFKA and IPFIX exporters are supported. This allows you to define any custom storage or processing that can read from Kafka or use the IPFIX standard.
diff --git a/docs/FlowCollector.md b/docs/FlowCollector.md
index 7dca466269..6b9a932f30 100644
--- a/docs/FlowCollector.md
+++ b/docs/FlowCollector.md
@@ -8155,7 +8155,8 @@ It is ignored for other modes.
boolean |
Set `installDemoLoki` to `true` to automatically create Loki deployment, service and storage.
-This is useful for development and demo purposes. Do not use it in production.
+This is useful for development and demo purposes. Do not use it in production.
+[Unsupported (*)].
Default: false
|
@@ -11544,8 +11545,8 @@ available.
`disableAlerts` is a list of alert groups that should be disabled from the default set of alerts.
Possible values are: `NetObservNoFlows`, `NetObservLokiError`, `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
-`LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
-More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+`LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
|
false |
@@ -11554,7 +11555,7 @@ More information on alerts: https://github.com/netobserv/network-observability-o
`healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
Each health rule can be configured to generate either alerts or recording rules based on the mode field.
-More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
|
false |
@@ -11606,11 +11607,11 @@ More information, with full list of available metrics: https://github.com/netobs
Health rule template name.
Possible values are: `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
-`LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressLatencyTrend`.
+`LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
Note: `NetObservNoFlows` and `NetObservLokiError` are alert-only and cannot be used as health rules.
-More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
- Enum: PacketDropsByKernel, PacketDropsByDevice, IPsecErrors, NetpolDenied, LatencyHighTrend, DNSErrors, DNSNxDomain, ExternalEgressHighTrend, ExternalIngressHighTrend, Ingress5xxErrors, IngressLatencyTrend
+ Enum: PacketDropsByKernel, PacketDropsByDevice, IPsecErrors, NetpolDenied, LatencyHighTrend, DNSErrors, DNSNxDomain, ExternalEgressHighTrend, ExternalIngressHighTrend, Ingress5xxErrors, IngressHTTPLatencyTrend
|
true |
diff --git a/docs/Alerts.md b/docs/HealthRules.md
similarity index 65%
rename from docs/Alerts.md
rename to docs/HealthRules.md
index 5a1f971b12..47964d5563 100644
--- a/docs/Alerts.md
+++ b/docs/HealthRules.md
@@ -1,25 +1,29 @@
-# Alerts in the NetObserv Operator
+# Health rules in NetObserv
-The NetObserv operator comes with a set of predefined alerts, based on its [metrics](./Metrics.md), that you can configure, extend or disable.
-The configured alerts generate a `PrometheusRule` resource that is used to feed Prometheus AlertManager.
+The NetObserv operator comes with a set of predefined health rules, based on its [metrics](./Metrics.md), that you can configure, extend or disable.
+These rules are converted into a `PrometheusRule` resource, either as Alerts or as Recording rules. The alerts are then managed by Prometheus AlertManager. Both recording rules and alerts are displayed in the Network Health page of the Console.
-These alerts are provided as a convenience, to take the most of NetObserv built-in metrics without requiring you to write complexe PromQL or to do fine-tuning. They give a health indication of your cluster network.
+These health rules are provided as a convenience, to take the most of NetObserv built-in metrics without requiring you to write complexe PromQL or to do fine-tuning. They give a health indication of your cluster network.
-## Default alerts
+To get a detailed description of the rules, [check the runbooks](https://github.com/openshift/runbooks/tree/master/alerts/network-observability-operator).
-By default, NetObserv creates some alerts, contextual to the enabled features. For example, packet drops related alerts are only created if the `PacketDrop` feature is enabled. Because alerts are built upon metrics, you may also see configuration warnings if some enabled alerts are missing their required metrics, which can be configured in `spec.processor.metrics.includeList` (see [Metrics.md](./Metrics.md)).
+## Default rules
-Here is the list of alerts installed by default:
+By default, NetObserv creates health rules contextual to the enabled features. For example, packet drops related rules are only created if the `PacketDrop` feature is enabled. Because rules are built upon metrics, you may also see configuration warnings if some enabled rules are missing their required metrics, which can be configured in `spec.processor.metrics.includeList` (see [Metrics.md](./Metrics.md)).
-- `PacketDropsByDevice`: triggered on high percentage of packet drops from devices (`/proc/net/dev`).
-- `PacketDropsByKernel`: triggered on high percentage of packet drops by the kernel; it requires the `PacketDrop` agent feature.
-- `IPsecErrors`: triggered when NetObserv detects IPsec encyption errors; it requires the `IPSec` agent feature.
-- `NetpolDenied`: triggered when NetObserv detects traffic denied by network policies; it requires the `NetworkEvents` agent feature.
-- `LatencyHighTrend`: triggered when NetObserv detects an increase of TCP latency; it requires the `FlowRTT` agent feature.
-- `DNSErrors`: triggered when NetObserv detects DNS errors, other than NX_DOMAIN; it requires the `DNSTracking` agent feature.
-- `DNSNxDomain`: triggered when NetObserv detects DNS NX_DOMAIN errors; it requires the `DNSTracking` agent feature.
-- `ExternalEgressHighTrend`: triggered when NetObserv detects an important increase of external egress traffic.
-- `ExternalIngressHighTrend`: triggered when NetObserv detects an important increase of external ingress traffic.
+These rules are installed by default:
+
+- `PacketDropsByDevice`
+- `PacketDropsByKernel`
+- `IPsecErrors`
+- `NetpolDenied`
+- `LatencyHighTrend`
+- `DNSErrors`
+- `DNSNxDomain`
+- `ExternalEgressHighTrend`
+- `ExternalIngressHighTrend`
+- `Ingress5xxErrors`
+- `IngressHTTPLatencyTrend`
On top of that, there are also some operational alerts that relate to NetObserv's self health:
@@ -42,8 +46,9 @@ Example:
spec:
processor:
metrics:
- alerts:
+ healthRules:
- template: PacketDropsByKernel
+ mode: Alert # or Recording
variants:
# triggered when the whole cluster traffic (no grouping) reaches 10% of drops
- thresholds:
@@ -56,17 +61,19 @@ spec:
groupBy: Node
```
-When you configure an alert, it overrides (replaces) the default configuration for that template. So, if you want to add a new alert on top of the default ones for a template, you may want to replicate the default configuration manually, which is described in the section above.
+The `mode` setting can be either defined per variant, or for the whole template.
+
+When you configure a template, it overrides the default configuration for that template. So, if you want to add a new rule on top of the default ones for a template, you may want to replicate the default configuration manually. All defaults are described in the [runbooks](https://github.com/openshift/runbooks/tree/master/alerts/network-observability-operator).
## Disable predefined alerts
Alert templates can be disabled in `spec.processor.metrics.disableAlerts`. This settings accepts a list of template names, as listed above.
-If a template is disabled _and_ overridden in `spec.processor.metrics.alerts`, the disable setting takes precedence: the alert rule will not be created.
+If a template is disabled _and_ overridden in `spec.processor.metrics.healthRules`, the disable setting takes precedence: the alert rule will not be created.
-## Creating your own alerts that contribute to the Health dashboard
+## Creating your own rules that contribute to the Health dashboard
-This alerting API in NetObserv `FlowCollector` is simply a mapping to the Prometheus operator API, generating a `PrometheusRule`.
+This health rule API in NetObserv `FlowCollector` is simply a mapping to the Prometheus operator API, generating a `PrometheusRule`.
You can check what is the actual generated resource by running:
@@ -74,7 +81,7 @@ You can check what is the actual generated resource by running:
kubectl get prometheusrules -n netobserv -oyaml
```
-While the above sections explain how you can customize those opinionated alerts, you are not limited to them: you can go further and create your own `AlertingRule` (or `PrometheusRule`) resources. You'll just need to be familiar with PromQL (or to learn).
+While the above sections explain how you can customize those opinionated rules, you are not limited to them: you can go further and create your own `AlertingRule` (or `PrometheusRule`) resources. You'll just need to be familiar with PromQL (or to learn).
[Click here](../config/samples/alerts) to see sample alerts, that are not built-in NetObserv.
@@ -125,7 +132,7 @@ As you can see, you can leverage the output labels from the PromQL defined previ
The severity label should be "critical", "warning" or "info".
-On top of that, in order to have the alert picked up in the Health dashboard, NetObserv needs other information:
+On top of that, in order to have the rule picked up in the Health dashboard, NetObserv needs other information:
```yaml
annotations:
@@ -139,12 +146,16 @@ The label `netobserv: "true"` is required.
The annotation `netobserv_io_network_health` is optional, and gives you some control on how the alert renders in the Health page. It is a JSON string that consists in:
- `namespaceLabels`: one or more labels that hold namespaces. When provided, the alert will show up under the "Namespaces" tab.
- `nodeLabels`: one or more labels that hold node names. When provided, the alert will show up under the "Nodes" tab.
+- `ownerLabels`: one or more labels that hold owner/workload names. When provided, the alert will show up under the "Owners" tab.
- `threshold`: the alert threshold as a string, expected to match the one defined in PromQL.
- `unit`: the data unit, used only for display purpose.
- `upperBound`: an upper bound value used to compute score on a closed scale. It doesn't necessarily have to be a maximum of the metric values, but metric values will be clamped if they are above the upper bound.
- `links`: a list of links to be displayed contextually to the alert. Each link consists in:
- `name`: display name.
- `url`: the link URL.
-- `trafficLinkFilter`: an additional filter to inject into the URL for the Network Traffic page.
+- `trafficLink`: information related to the link to the Network Traffic page, for URL building. Some filters will be set automatically, such as the node or namespace filter.
+ - `extraFilter`: an additional filter to inject (e.g: a DNS response code, for DNS-related alerts).
+ - `backAndForth`: should the filter include return traffic? (true/false)
+ - `filterDestination`: should the filter target the destination of the traffic instead of the source? (true/false)
`namespaceLabels` and `nodeLabels` are mutually exclusive. If none of them is provided, the alert will show up under the "Global" tab.
diff --git a/docs/Metrics.md b/docs/Metrics.md
index 622cad9e51..ec6d0d9598 100644
--- a/docs/Metrics.md
+++ b/docs/Metrics.md
@@ -8,7 +8,7 @@ There are two ways to configure metrics:
- By enabling or disabling any of the predefined metrics
- Using the FlowMetrics API to create custom metrics
-For alerts documentation, see [Alerts.md](./Alerts.md).
+For alerts and health documentation, see [HealthRules.md](./HealthRules.md).
## Predefined metrics
diff --git a/docs/flowcollector-flows-netobserv-io-v1beta2.adoc b/docs/flowcollector-flows-netobserv-io-v1beta2.adoc
index dbf0aa9631..91e27d5ce1 100644
--- a/docs/flowcollector-flows-netobserv-io-v1beta2.adoc
+++ b/docs/flowcollector-flows-netobserv-io-v1beta2.adoc
@@ -94,11 +94,15 @@ Type::
| `string`
| `deploymentModel` defines the desired type of deployment for flow processing. Possible values are: +
-- `Direct` (default) to make the flow processor listen directly from the agents. +
+- `Service` (default) to make the flow processor listen as a Kubernetes Service, backed by a scalable Deployment. +
- `Kafka` to make flows sent to a Kafka pipeline before consumption by the processor. +
-Kafka can provide better scalability, resiliency, and high availability (for more details, see https://www.redhat.com/en/topics/integration/what-is-apache-kafka).
+- `Direct` to make the flow processor listen directly from the agents using the host network, backed by a DaemonSet. Only recommended on small clusters, below 15 nodes. +
+
+Kafka can provide better scalability, resiliency, and high availability (for more details, see https://www.redhat.com/en/topics/integration/what-is-apache-kafka). +
+
+`Direct` is not recommended on large clusters as it is less memory efficient.
| `exporters`
| `array`
@@ -802,7 +806,8 @@ such as `GOGC` and `GOMAXPROCS` environment variables. Set these values at your
| `autoscaler`
| `object`
-| `autoscaler` spec of a horizontal pod autoscaler to set up for the plugin Deployment. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
+| `autoscaler` [deprecated (*)] spec of a horizontal pod autoscaler to set up for the plugin Deployment.
+Deprecation notice: managed autoscaler will be removed in a future version. You might configure instead an autoscaler of your choice, and set `spec.consolePlugin.unmanagedReplicas` to `true`. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
| `enable`
| `boolean`
@@ -810,19 +815,20 @@ such as `GOGC` and `GOMAXPROCS` environment variables. Set these values at your
| `imagePullPolicy`
| `string`
-| `imagePullPolicy` is the Kubernetes pull policy for the image defined above
+| `imagePullPolicy` is the Kubernetes pull policy for the image defined above.
| `logLevel`
| `string`
-| `logLevel` for the console plugin backend
+| `logLevel` for the console plugin backend.
| `portNaming`
| `object`
-| `portNaming` defines the configuration of the port-to-service name translation
+| `portNaming` defines the configuration of the port-to-service name translation.
| `quickFilters`
| `array`
-| `quickFilters` configures quick filter presets for the Console plugin
+| `quickFilters` configures quick filter presets for the Console plugin.
+Filters for external traffic assume the subnet labels are configured to distinguish internal and external traffic (see `spec.processor.subnetLabels`).
| `replicas`
| `integer`
@@ -831,7 +837,17 @@ such as `GOGC` and `GOMAXPROCS` environment variables. Set these values at your
| `resources`
| `object`
| `resources`, in terms of compute resources, required by this container.
-For more information, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+For more information, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/.
+
+| `standalone`
+| `boolean`
+| Deploy as a standalone console, instead of a plugin of the {product-title} Console.
+This is not recommended when using with {product-title}, as it doesn't provide an integrated experience.
+[Unsupported (*)].
+
+| `unmanagedReplicas`
+| `boolean`
+| If `unmanagedReplicas` is `true`, the operator will not reconcile `replicas`. This is useful when using a pod autoscaler.
|===
== .spec.consolePlugin.advanced
@@ -950,7 +966,8 @@ Type::
Description::
+
--
-`autoscaler` spec of a horizontal pod autoscaler to set up for the plugin Deployment. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
+`autoscaler` [deprecated (*)] spec of a horizontal pod autoscaler to set up for the plugin Deployment.
+Deprecation notice: managed autoscaler will be removed in a future version. You might configure instead an autoscaler of your choice, and set `spec.consolePlugin.unmanagedReplicas` to `true`. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
--
Type::
@@ -963,7 +980,7 @@ Type::
Description::
+
--
-`portNaming` defines the configuration of the port-to-service name translation
+`portNaming` defines the configuration of the port-to-service name translation.
--
Type::
@@ -990,7 +1007,8 @@ for example, `portNames: {"3100": "loki"}`.
Description::
+
--
-`quickFilters` configures quick filter presets for the Console plugin
+`quickFilters` configures quick filter presets for the Console plugin.
+Filters for external traffic assume the subnet labels are configured to distinguish internal and external traffic (see `spec.processor.subnetLabels`).
--
Type::
@@ -1038,7 +1056,7 @@ Description::
+
--
`resources`, in terms of compute resources, required by this container.
-For more information, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+For more information, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/.
--
Type::
@@ -1124,6 +1142,7 @@ Type::
`object`
Required::
+ - `enterpriseID`
- `targetHost`
- `targetPort`
@@ -1133,6 +1152,12 @@ Required::
|===
| Property | Type | Description
+| `enterpriseID`
+| `integer`
+| EnterpriseID, or Private Enterprise Number (PEN). To date, Network Observability does not own an assigned number,
+so it is left open for configuration. The PEN is needed to collect non standard data, such as Kubernetes names,
+RTT, etc.
+
| `targetHost`
| `string`
| Address of the IPFIX external receiver.
@@ -2521,6 +2546,12 @@ Type::
|===
| Property | Type | Description
+| `installDemoLoki`
+| `boolean`
+| Set `installDemoLoki` to `true` to automatically create Loki deployment, service and storage.
+This is useful for development and demo purposes. Do not use it in production.
+[Unsupported (*)].
+
| `tenantID`
| `string`
| `tenantID` is the Loki `X-Scope-OrgID` header that identifies the tenant for each request.
@@ -2672,9 +2703,10 @@ configuration, you can disable it and install your own instead.
| `enable`
| `boolean`
-| Set `enable` to `true` to deploy network policies on the namespaces used by Network Observability (main and privileged). It is disabled by default.
-These network policies better isolate the Network Observability components to prevent undesired connections to them.
-This option is enabled by default, disable it to manually manage network policies
+| Deploys network policies on the namespaces used by Network Observability (main and privileged).
+These network policies better isolate the Network Observability components to prevent undesired connections from and to them.
+This option is enabled by default when using with OVNKubernetes, and disabled otherwise (it has not been tested with other CNIs).
+When disabled, you can manually create the network policies for the Network Observability components.
|===
== .spec.processor
@@ -2697,7 +2729,7 @@ Type::
| `addZone`
| `boolean`
-| `addZone` allows availability zone awareness by labelling flows with their source and destination zones.
+| `addZone` allows availability zone awareness by labeling flows with their source and destination zones.
This feature requires the "topology.kubernetes.io/zone" label to be set on nodes.
| `advanced`
@@ -2710,6 +2742,11 @@ such as `GOGC` and `GOMAXPROCS` environment variables. Set these values at your
| `string`
| `clusterName` is the name of the cluster to appear in the flows data. This is useful in a multi-cluster context. When using {product-title}, leave empty to make it automatically determined.
+| `consumerReplicas`
+| `integer`
+| `consumerReplicas` defines the number of replicas (pods) to start for `flowlogs-pipeline`, default is 3.
+This setting is ignored when `spec.deploymentModel` is `Direct` or when `spec.processor.unmanagedReplicas` is `true`.
+
| `deduper`
| `object`
| `deduper` allows you to sample or drop flows identified as duplicates, in order to save on resource usage.
@@ -2726,8 +2763,9 @@ but with a lesser improvement in performance.
| `kafkaConsumerAutoscaler`
| `object`
-| `kafkaConsumerAutoscaler` is the spec of a horizontal pod autoscaler to set up for `flowlogs-pipeline-transformer`, which consumes Kafka messages.
-This setting is ignored when Kafka is disabled. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
+| `kafkaConsumerAutoscaler` [deprecated (*)] is the spec of a horizontal pod autoscaler to set up for `flowlogs-pipeline-transformer`, which consumes Kafka messages.
+This setting is ignored when Kafka is disabled.
+Deprecation notice: managed autoscaler will be removed in a future version. You might configure instead an autoscaler of your choice, and set `spec.processor.unmanagedReplicas` to `true`. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
| `kafkaConsumerBatchSize`
| `integer`
@@ -2739,8 +2777,9 @@ This setting is ignored when Kafka is disabled. Refer to HorizontalPodAutoscaler
| `kafkaConsumerReplicas`
| `integer`
-| `kafkaConsumerReplicas` defines the number of replicas (pods) to start for `flowlogs-pipeline-transformer`, which consumes Kafka messages.
+| `kafkaConsumerReplicas` [deprecated (*)] defines the number of replicas (pods) to start for `flowlogs-pipeline-transformer`, which consumes Kafka messages.
This setting is ignored when Kafka is disabled.
+Deprecation notice: use `spec.processor.consumerReplicas` instead.
| `logLevel`
| `string`
@@ -2772,11 +2811,19 @@ This setting is ignored when Kafka is disabled.
| `resources` are the compute resources required by this container.
For more information, see https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+| `slicesConfig`
+| `object`
+| Global configuration managing FlowCollectorSlices custom resources.
+
| `subnetLabels`
| `object`
-| `subnetLabels` allows to define custom labels on subnets and IPs or to enable automatic labelling of recognized subnets in {product-title}, which is used to identify cluster external traffic.
+| `subnetLabels` allows to define custom labels on subnets and IPs or to enable automatic labeling of recognized subnets in {product-title}, which is used to identify cluster external traffic.
When a subnet matches the source or destination IP of a flow, a corresponding field is added: `SrcSubnetLabel` or `DstSubnetLabel`.
+| `unmanagedReplicas`
+| `boolean`
+| If `unmanagedReplicas` is `true`, the operator will not reconcile `consumerReplicas`. This is useful when using a pod autoscaler.
+
|===
== .spec.processor.advanced
Description::
@@ -3042,8 +3089,9 @@ Type::
Description::
+
--
-`kafkaConsumerAutoscaler` is the spec of a horizontal pod autoscaler to set up for `flowlogs-pipeline-transformer`, which consumes Kafka messages.
-This setting is ignored when Kafka is disabled. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
+`kafkaConsumerAutoscaler` [deprecated (*)] is the spec of a horizontal pod autoscaler to set up for `flowlogs-pipeline-transformer`, which consumes Kafka messages.
+This setting is ignored when Kafka is disabled.
+Deprecation notice: managed autoscaler will be removed in a future version. You might configure instead an autoscaler of your choice, and set `spec.processor.unmanagedReplicas` to `true`. Refer to HorizontalPodAutoscaler documentation (autoscaling/v2).
--
Type::
@@ -3069,18 +3117,18 @@ Type::
|===
| Property | Type | Description
-| `alerts`
-| `array`
-| `alerts` is a list of alerts to be created for Prometheus AlertManager, organized by templates and variants [Unsupported (*)].
-This is currently an experimental feature behind a feature gate. To enable, edit `spec.processor.advanced.env` by adding `EXPERIMENTAL_ALERTS_HEALTH` set to `true`.
-More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
-
| `disableAlerts`
| `array (string)`
| `disableAlerts` is a list of alert groups that should be disabled from the default set of alerts.
Possible values are: `NetObservNoFlows`, `NetObservLokiError`, `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
-`LatencyHighTrend`, `DNSErrors`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
-More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+`LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
+
+| `healthRules`
+| `array`
+| `healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
+Each health rule can be configured to generate either alerts or recording rules based on the mode field.
+More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
| `includeList`
| `array (string)`
@@ -3100,13 +3148,13 @@ More information, with full list of available metrics: https://github.com/netobs
| Metrics server endpoint configuration for Prometheus scraper
|===
-== .spec.processor.metrics.alerts
+== .spec.processor.metrics.healthRules
Description::
+
--
-`alerts` is a list of alerts to be created for Prometheus AlertManager, organized by templates and variants [Unsupported (*)].
-This is currently an experimental feature behind a feature gate. To enable, edit `spec.processor.advanced.env` by adding `EXPERIMENTAL_ALERTS_HEALTH` set to `true`.
-More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+`healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
+Each health rule can be configured to generate either alerts or recording rules based on the mode field.
+More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
--
Type::
@@ -3115,7 +3163,7 @@ Type::
-== .spec.processor.metrics.alerts[]
+== .spec.processor.metrics.healthRules[]
Description::
+
--
@@ -3135,19 +3183,28 @@ Required::
|===
| Property | Type | Description
+| `mode`
+| `string`
+| Mode defines whether this health rule should be generated as an alert or a recording rule.
+Possible values are: `Alert` (default), `Recording`.
+Recording rules violations are visible in the Network Health dashboard without generating any Prometheus alert.
+This provides an alternative way of getting Health information for SRE and cluster admins who might find
+many new alerts burdensome.
+
| `template`
| `string`
-| Alert template name.
+| Health rule template name.
Possible values are: `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
-`LatencyHighTrend`, `DNSErrors`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
-More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+`LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+Note: `NetObservNoFlows` and `NetObservLokiError` are alert-only and cannot be used as health rules.
+More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
| `variants`
| `array`
| A list of variants for this template
|===
-== .spec.processor.metrics.alerts[].variants
+== .spec.processor.metrics.healthRules[].variants
Description::
+
--
@@ -3160,7 +3217,7 @@ Type::
-== .spec.processor.metrics.alerts[].variants[]
+== .spec.processor.metrics.healthRules[].variants[]
Description::
+
--
@@ -3189,26 +3246,34 @@ Required::
It is provided as an absolute rate (bytes per second or packets per second, depending on the context).
When provided, it must be parsable as a float.
+| `mode`
+| `string`
+| Mode overrides the health rule mode for this specific variant.
+If not specified, inherits from the parent health rule's mode.
+Possible values are: `Alert`, `Recording`.
+
| `thresholds`
| `object`
-| Thresholds of the alert per severity.
+| Thresholds of the health rule per severity.
They are expressed as a percentage of errors above which the alert is triggered. They must be parsable as floats.
+Required for both alert and recording modes
| `trendDuration`
| `string`
-| For trending alerts, the duration interval for baseline comparison. For example, "2h" means comparing against a 2-hours average. Defaults to 2h.
+| For trending health rules, the duration interval for baseline comparison. For example, "2h" means comparing against a 2-hours average. Defaults to 2h.
| `trendOffset`
| `string`
-| For trending alerts, the time offset for baseline comparison. For example, "1d" means comparing against yesterday. Defaults to 1d.
+| For trending health rules, the time offset for baseline comparison. For example, "1d" means comparing against yesterday. Defaults to 1d.
|===
-== .spec.processor.metrics.alerts[].variants[].thresholds
+== .spec.processor.metrics.healthRules[].variants[].thresholds
Description::
+
--
-Thresholds of the alert per severity.
+Thresholds of the health rule per severity.
They are expressed as a percentage of errors above which the alert is triggered. They must be parsable as floats.
+Required for both alert and recording modes
--
Type::
@@ -3405,12 +3470,51 @@ If Requests is omitted for a container, it defaults to Limits if that is explici
otherwise to an implementation-defined value. Requests cannot exceed Limits.
More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+|===
+== .spec.processor.slicesConfig
+Description::
++
+--
+Global configuration managing FlowCollectorSlices custom resources.
+--
+
+Type::
+ `object`
+
+Required::
+ - `enable`
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `collectionMode`
+| `string`
+| `collectionMode` determines how the FlowCollectorSlice custom resources impacts the flow collection process: +
+
+- When set to `AlwaysCollect`, all flows are collected regardless of the presence of FlowCollectorSlice. +
+
+- When set to `AllowList`, only the flows related to namespaces where a FlowCollectorSlice resource is present, or configured via the global `namespacesAllowList`, are collected. +
+
+
+| `enable`
+| `boolean`
+| `enable` determines if the FlowCollectorSlice feature is enabled. If not, all resources of kind FlowCollectorSlice are simply ignored.
+
+| `namespacesAllowList`
+| `array (string)`
+| `namespacesAllowList` is a list of namespaces for which flows are always collected, regardless of the presence of FlowCollectorSlice in those namespaces.
+An entry enclosed by slashes, such as `/openshift-.*/`, is matched as a regular expression.
+This setting is ignored if `collectionMode` is different from `AllowList`.
+
|===
== .spec.processor.subnetLabels
Description::
+
--
-`subnetLabels` allows to define custom labels on subnets and IPs or to enable automatic labelling of recognized subnets in {product-title}, which is used to identify cluster external traffic.
+`subnetLabels` allows to define custom labels on subnets and IPs or to enable automatic labeling of recognized subnets in {product-title}, which is used to identify cluster external traffic.
When a subnet matches the source or destination IP of a flow, a corresponding field is added: `SrcSubnetLabel` or `DstSubnetLabel`.
--
@@ -3426,8 +3530,13 @@ Type::
| `customLabels`
| `array`
-| `customLabels` allows to customize subnets and IPs labelling, such as to identify cluster-external workloads or web services.
-If you enable `openShiftAutoDetect`, `customLabels` can override the detected subnets in case they overlap.
+| `customLabels` allows you to customize subnets and IPs labeling, such as to identify cluster external workloads or web services.
+External subnets must be labeled with the prefix `EXT:`, or not labeled at all, in order to work with default quick filters and some metrics examples provided. +
+
+If `openShiftAutoDetect` is disabled or you are not using {product-title}, it is recommended to manually configure labels for the cluster subnets, to distinguish internal traffic from external traffic. +
+
+If `openShiftAutoDetect` is enabled, `customLabels` overrides the detected subnets when they overlap. +
+
| `openShiftAutoDetect`
| `boolean`
@@ -3440,8 +3549,13 @@ external traffic: flows that are not labeled for those subnets are external to t
Description::
+
--
-`customLabels` allows to customize subnets and IPs labelling, such as to identify cluster-external workloads or web services.
-If you enable `openShiftAutoDetect`, `customLabels` can override the detected subnets in case they overlap.
+`customLabels` allows you to customize subnets and IPs labeling, such as to identify cluster external workloads or web services.
+External subnets must be labeled with the prefix `EXT:`, or not labeled at all, in order to work with default quick filters and some metrics examples provided. +
+
+If `openShiftAutoDetect` is disabled or you are not using {product-title}, it is recommended to manually configure labels for the cluster subnets, to distinguish internal traffic from external traffic. +
+
+If `openShiftAutoDetect` is enabled, `customLabels` overrides the detected subnets when they overlap. +
+
--
Type::
@@ -3538,9 +3652,9 @@ If they are both disabled, the Console plugin is not deployed.
| `string`
| `mode` must be set according to the type of Prometheus installation that stores Network Observability metrics: +
-- Use `Auto` to try configuring automatically. In {product-title}, it uses the Thanos querier from {product-title} Cluster Monitoring +
+- Use `Auto` to try configuring automatically. In {product-title}, it uses the Thanos querier from {product-title} Cluster Monitoring. +
-- Use `Manual` for a manual setup +
+- Use `Manual` for a manual setup. +
| `timeout`
@@ -3566,6 +3680,12 @@ Type::
|===
| Property | Type | Description
+| `alertManager`
+| `object`
+| AlertManager configuration. This is used in the console to query silenced alerts, for displaying health information.
+When used in {product-title} it can be left empty to use the Console API instead.
+[Unsupported (*)].
+
| `forwardUserToken`
| `boolean`
| Set `true` to forward logged in user token in queries to Prometheus
@@ -3578,6 +3698,147 @@ Type::
| `string`
| `url` is the address of an existing Prometheus service to use for querying metrics.
+|===
+== .spec.prometheus.querier.manual.alertManager
+Description::
++
+--
+AlertManager configuration. This is used in the console to query silenced alerts, for displaying health information.
+When used in {product-title} it can be left empty to use the Console API instead.
+[Unsupported (*)].
+--
+
+Type::
+ `object`
+
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `tls`
+| `object`
+| TLS client configuration for Prometheus AlertManager URL.
+
+| `url`
+| `string`
+| `url` is the address of an existing Prometheus AlertManager service to use for querying alerts.
+
+|===
+== .spec.prometheus.querier.manual.alertManager.tls
+Description::
++
+--
+TLS client configuration for Prometheus AlertManager URL.
+--
+
+Type::
+ `object`
+
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `caCert`
+| `object`
+| `caCert` defines the reference of the certificate for the Certificate Authority.
+
+| `enable`
+| `boolean`
+| Enable TLS
+
+| `insecureSkipVerify`
+| `boolean`
+| `insecureSkipVerify` allows skipping client-side verification of the server certificate.
+If set to `true`, the `caCert` field is ignored.
+
+| `userCert`
+| `object`
+| `userCert` defines the user certificate reference and is used for mTLS. When you use one-way TLS, you can ignore this property.
+
+|===
+== .spec.prometheus.querier.manual.alertManager.tls.caCert
+Description::
++
+--
+`caCert` defines the reference of the certificate for the Certificate Authority.
+--
+
+Type::
+ `object`
+
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `certFile`
+| `string`
+| `certFile` defines the path to the certificate file name within the config map or secret.
+
+| `certKey`
+| `string`
+| `certKey` defines the path to the certificate private key file name within the config map or secret. Omit when the key is not necessary.
+
+| `name`
+| `string`
+| Name of the config map or secret containing certificates.
+
+| `namespace`
+| `string`
+| Namespace of the config map or secret containing certificates. If omitted, the default is to use the same namespace as where Network Observability is deployed.
+If the namespace is different, the config map or the secret is copied so that it can be mounted as required.
+
+| `type`
+| `string`
+| Type for the certificate reference: `configmap` or `secret`.
+
+|===
+== .spec.prometheus.querier.manual.alertManager.tls.userCert
+Description::
++
+--
+`userCert` defines the user certificate reference and is used for mTLS. When you use one-way TLS, you can ignore this property.
+--
+
+Type::
+ `object`
+
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `certFile`
+| `string`
+| `certFile` defines the path to the certificate file name within the config map or secret.
+
+| `certKey`
+| `string`
+| `certKey` defines the path to the certificate private key file name within the config map or secret. Omit when the key is not necessary.
+
+| `name`
+| `string`
+| Name of the config map or secret containing certificates.
+
+| `namespace`
+| `string`
+| Namespace of the config map or secret containing certificates. If omitted, the default is to use the same namespace as where Network Observability is deployed.
+If the namespace is different, the config map or the secret is copied so that it can be mounted as required.
+
+| `type`
+| `string`
+| Type for the certificate reference: `configmap` or `secret`.
+
|===
== .spec.prometheus.querier.manual.tls
Description::
diff --git a/docs/flowcollectorslice-flows-netobserv-io-v1alpha1.adoc b/docs/flowcollectorslice-flows-netobserv-io-v1alpha1.adoc
new file mode 100644
index 0000000000..87acb2cffd
--- /dev/null
+++ b/docs/flowcollectorslice-flows-netobserv-io-v1alpha1.adoc
@@ -0,0 +1,128 @@
+// Automatically generated by 'openshift-apidocs-gen'. Do not edit.
+:_mod-docs-content-type: REFERENCE
+[id="flowcollectorslice-flows-netobserv-io-v1alpha1"]
+= FlowCollectorSlice [flows.netobserv.io/v1alpha1]
+
+
+
+Description::
++
+--
+FlowMetric is the API allowing to create custom metrics from the collected flow logs.
+--
+
+Type::
+ `object`
+
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `apiVersion`
+| `string`
+| APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and might reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+
+| `kind`
+| `string`
+| Kind is a string value representing the REST resource this object represents. Servers might infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+
+| `metadata`
+| `object`
+| Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
+
+| `spec`
+| `object`
+| FlowCollectorSliceSpec defines the desired state of FlowCollectorSlice
+
+|===
+== .metadata
+Description::
++
+--
+Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
+--
+
+Type::
+ `object`
+
+
+
+
+== .spec
+Description::
++
+--
+FlowCollectorSliceSpec defines the desired state of FlowCollectorSlice
+--
+
+Type::
+ `object`
+
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `sampling`
+| `integer`
+| `sampling` is an optional sampling interval to apply to this slice. For example, a value of `50` means that 1 matching flow in 50 is sampled.
+
+| `subnetLabels`
+| `array`
+| `subnetLabels` allows to customize subnets and IPs labelling, such as to identify cluster-external workloads or web services.
+Beware that the subnet labels configured in FlowCollectorSlice are not limited to the flows of the related namespace: any flow
+in the whole cluster can be labelled using this configuration. However, subnet labels defined in the cluster-scoped FlowCollector take
+precedence in case of conflicting rules.
+
+|===
+== .spec.subnetLabels
+Description::
++
+--
+`subnetLabels` allows to customize subnets and IPs labelling, such as to identify cluster-external workloads or web services.
+Beware that the subnet labels configured in FlowCollectorSlice are not limited to the flows of the related namespace: any flow
+in the whole cluster can be labelled using this configuration. However, subnet labels defined in the cluster-scoped FlowCollector take
+precedence in case of conflicting rules.
+--
+
+Type::
+ `array`
+
+
+
+
+== .spec.subnetLabels[]
+Description::
++
+--
+SubnetLabel allows to label subnets and IPs, such as to identify cluster-external workloads or web services.
+--
+
+Type::
+ `object`
+
+Required::
+ - `cidrs`
+ - `name`
+
+
+
+[cols="1,1,1",options="header"]
+|===
+| Property | Type | Description
+
+| `cidrs`
+| `array (string)`
+| List of CIDRs, such as `["1.2.3.4/32"]`.
+
+| `name`
+| `string`
+| Label name, used to flag matching flows.
+
+|===
+
diff --git a/docs/flowmetric-flows-netobserv-io-v1alpha1.adoc b/docs/flowmetric-flows-netobserv-io-v1alpha1.adoc
index 24b966849a..d117023063 100644
--- a/docs/flowmetric-flows-netobserv-io-v1alpha1.adoc
+++ b/docs/flowmetric-flows-netobserv-io-v1alpha1.adoc
@@ -110,6 +110,10 @@ Refer to the documentation for the list of available fields: https://docs.redhat
| `flatten` is a list of array-type fields that must be flattened, such as Interfaces or NetworkEvents. Flattened fields generate one metric per item in that field.
For instance, when flattening `Interfaces` on a bytes counter, a flow having Interfaces [br-ex, ens5] increases one counter for `br-ex` and another for `ens5`.
+| `help`
+| `string`
+| Help text of the metric, as it appears in Prometheus.
+
| `labels`
| `array (string)`
| `labels` is a list of fields that should be used as Prometheus labels, also known as dimensions (for example: `SrcK8S_Namespace`).
diff --git a/docs/flows-format.adoc b/docs/flows-format.adoc
index dbda95d5b6..b2d1c329e7 100644
--- a/docs/flows-format.adoc
+++ b/docs/flows-format.adoc
@@ -57,6 +57,13 @@ The "Cardinality" column gives information about the implied metric cardinality
| no
| avoid
| dns.latency
+| `DnsName`
+| string
+| DNS queried name
+| `dns_name`
+| no
+| careful
+| n/a
| `Dscp`
| number
| Differentiated Services Code Point (DSCP) value
diff --git a/hack/asciidoc-gen.sh b/hack/asciidoc-gen.sh
index 1dc3aabdf6..73df225711 100755
--- a/hack/asciidoc-gen.sh
+++ b/hack/asciidoc-gen.sh
@@ -50,7 +50,7 @@ amend_doc() {
sed -i -r '/^= API endpoints/Q' docs/$filename
sed -i -r 's/OpenShift/{product-title}/g' docs/$filename
sed -i -r 's/\/Network Observability/g' docs/$filename
- sed -i -r 's/
/ +\n/g' docs/$filename
+ sed -i -r 's~
~ +\n~g' docs/$filename
sed -i -r 's//_/g' docs/$filename
sed -i -r 's/<\/i>/_/g' docs/$filename
sed -i -r 's/ may / might /g' docs/$filename
diff --git a/helm/crds/flows.netobserv.io_flowcollectors.yaml b/helm/crds/flows.netobserv.io_flowcollectors.yaml
index 39d7af3cd3..360026bde7 100644
--- a/helm/crds/flows.netobserv.io_flowcollectors.yaml
+++ b/helm/crds/flows.netobserv.io_flowcollectors.yaml
@@ -3828,6 +3828,7 @@ spec:
description: |-
Set `installDemoLoki` to `true` to automatically create Loki deployment, service and storage.
This is useful for development and demo purposes. Do not use it in production.
+ [Unsupported (*)].
type: boolean
tenantID:
default: netobserv
@@ -5401,8 +5402,8 @@ spec:
description: |-
`disableAlerts` is a list of alert groups that should be disabled from the default set of alerts.
Possible values are: `NetObservNoFlows`, `NetObservLokiError`, `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`.
- More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
+ More information on alerts: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
items:
type: string
type: array
@@ -5410,7 +5411,7 @@ spec:
description: |-
`healthRules` is a list of health rules to be created for Prometheus, organized by templates and variants.
Each health rule can be configured to generate either alerts or recording rules based on the mode field.
- More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
items:
properties:
mode:
@@ -5429,9 +5430,9 @@ spec:
description: |-
Health rule template name.
Possible values are: `PacketDropsByKernel`, `PacketDropsByDevice`, `IPsecErrors`, `NetpolDenied`,
- `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressLatencyTrend`.
+ `LatencyHighTrend`, `DNSErrors`, `DNSNxDomain`, `ExternalEgressHighTrend`, `ExternalIngressHighTrend`, `Ingress5xxErrors`, `IngressHTTPLatencyTrend`.
Note: `NetObservNoFlows` and `NetObservLokiError` are alert-only and cannot be used as health rules.
- More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/Alerts.md
+ More information on health rules: https://github.com/netobserv/network-observability-operator/blob/main/docs/HealthRules.md
enum:
- PacketDropsByKernel
- PacketDropsByDevice
@@ -5443,7 +5444,7 @@ spec:
- ExternalEgressHighTrend
- ExternalIngressHighTrend
- Ingress5xxErrors
- - IngressLatencyTrend
+ - IngressHTTPLatencyTrend
type: string
variants:
description: A list of variants for this template
diff --git a/internal/pkg/helper/cardinality/cardinality.json b/internal/pkg/helper/cardinality/cardinality.json
index eef215fc73..b9c1d2c9fb 100644
--- a/internal/pkg/helper/cardinality/cardinality.json
+++ b/internal/pkg/helper/cardinality/cardinality.json
@@ -48,6 +48,7 @@
"DnsLatencyMs": "avoid",
"DnsFlags": "fine",
"DnsFlagsResponseCode": "fine",
+ "DnsName": "careful",
"DnsErrno": "fine",
"TimeFlowRttNs": "avoid",
"NetworkEvents": "avoid",
diff --git a/internal/pkg/metrics/alerts/alerts.go b/internal/pkg/metrics/alerts/alerts.go
index 614e769599..c08ecf213a 100644
--- a/internal/pkg/metrics/alerts/alerts.go
+++ b/internal/pkg/metrics/alerts/alerts.go
@@ -329,7 +329,7 @@ func (rb *ruleBuilder) ingressErrors() (*monitoringv1.Rule, error) {
return rb.createRule(promql, summary, description)
}
-func (rb *ruleBuilder) ingressLatencyTrend() (*monitoringv1.Rule, error) {
+func (rb *ruleBuilder) ingressHTTPLatencyTrend() (*monitoringv1.Rule, error) {
if rb.side == asDest {
return nil, nil
}
@@ -341,13 +341,13 @@ func (rb *ruleBuilder) ingressLatencyTrend() (*monitoringv1.Rule, error) {
switch rb.healthRule.GroupBy {
case flowslatest.GroupByNode:
- return nil, fmt.Errorf("IngressLatencyTrend health rule does not support grouping per node")
+ return nil, fmt.Errorf("IngressHTTPLatencyTrend health rule does not support grouping per node")
case flowslatest.GroupByNamespace:
legend = " [namespace={{ $labels.namespace }}]"
currentMetric = `avg(label_replace(haproxy_server_http_average_response_latency_milliseconds, "namespace", "$1", "exported_namespace", "(.*)")) by (namespace)`
baselineMetric = fmt.Sprintf(`avg(label_replace(haproxy_server_http_average_response_latency_milliseconds offset %s, "namespace", "$1", "exported_namespace", "(.*)")) by (namespace)`, offset)
case flowslatest.GroupByWorkload:
- return nil, fmt.Errorf("IngressLatencyTrend health rule does not support grouping per workload")
+ return nil, fmt.Errorf("IngressHTTPLatencyTrend health rule does not support grouping per workload")
default:
currentMetric = `avg(haproxy_server_http_average_response_latency_milliseconds)`
baselineMetric = fmt.Sprintf(`avg(haproxy_server_http_average_response_latency_milliseconds offset %s)`, offset)
diff --git a/internal/pkg/metrics/alerts/alerts_test.go b/internal/pkg/metrics/alerts/alerts_test.go
index c2f390c6f2..57ede896ab 100644
--- a/internal/pkg/metrics/alerts/alerts_test.go
+++ b/internal/pkg/metrics/alerts/alerts_test.go
@@ -24,7 +24,7 @@ func allTemplates() []flowslatest.HealthRuleTemplate {
flowslatest.HealthRuleExternalEgressHighTrend,
flowslatest.HealthRuleExternalIngressHighTrend,
flowslatest.HealthRuleIngress5xxErrors,
- flowslatest.HealthRuleIngressLatencyTrend,
+ flowslatest.HealthRuleIngressHTTPLatencyTrend,
}
}
@@ -48,7 +48,7 @@ func TestBuildRules_DefaultWithDisabled(t *testing.T) {
flowslatest.HealthRuleExternalEgressHighTrend,
flowslatest.HealthRuleExternalIngressHighTrend,
flowslatest.HealthRuleIngress5xxErrors,
- flowslatest.HealthRuleIngressLatencyTrend,
+ flowslatest.HealthRuleIngressHTTPLatencyTrend,
},
},
},
@@ -117,8 +117,8 @@ func TestBuildRules_DefaultWithFeaturesAndDisabled(t *testing.T) {
"LatencyHighTrend_PerDstNamespaceInfo",
"Ingress5xxErrors_PerSrcNamespaceWarning",
"Ingress5xxErrors_PerSrcNamespaceInfo",
- "IngressLatencyTrend_PerSrcNamespaceWarning",
- "IngressLatencyTrend_PerSrcNamespaceInfo",
+ "IngressHTTPLatencyTrend_PerSrcNamespaceWarning",
+ "IngressHTTPLatencyTrend_PerSrcNamespaceInfo",
"NetObservNoFlows",
}, allNames(rules))
assert.Contains(t, rules[0].Annotations["description"], "NetObserv is detecting more than 20% of packets dropped by the kernel [source namespace={{ $labels.namespace }}]")
@@ -126,7 +126,7 @@ func TestBuildRules_DefaultWithFeaturesAndDisabled(t *testing.T) {
assert.Contains(t, rules[3].Annotations["description"], "NetObserv is detecting more than 10% of packets dropped by the kernel [dest. namespace={{ $labels.namespace }}]")
assert.Equal(t, `{"links":[{"name":"View runbook","url":"`+runbookURLBase+`/PacketDropsByKernel.md"}],"namespaceLabels":["namespace"],"threshold":"10","unit":"%"}`, rules[3].Annotations["netobserv_io_network_health"])
assert.Contains(t, rules[4].Annotations["description"], "NetObserv is detecting more than 10% of packets dropped by the kernel [source node={{ $labels.node }}]")
- assert.Contains(t, rules[8].Annotations["description"], "node-exporter is detecting more than 5% of dropped packets [node={{ $labels.instance }}]")
+ assert.Contains(t, rules[8].Annotations["description"], "node-exporter is reporting more than 5% of dropped packets [node={{ $labels.instance }}]")
assert.Contains(t, rules[len(rules)-1].Annotations["description"], "NetObserv flowlogs-pipeline is not receiving any flow")
}
diff --git a/internal/pkg/metrics/alerts/builder.go b/internal/pkg/metrics/alerts/builder.go
index b2f67232aa..a83d4280be 100644
--- a/internal/pkg/metrics/alerts/builder.go
+++ b/internal/pkg/metrics/alerts/builder.go
@@ -179,8 +179,8 @@ func (rb *ruleBuilder) convertToRule() (*monitoringv1.Rule, error) {
return rb.externalTrend(true)
case flowslatest.HealthRuleIngress5xxErrors:
return rb.ingressErrors()
- case flowslatest.HealthRuleIngressLatencyTrend:
- return rb.ingressLatencyTrend()
+ case flowslatest.HealthRuleIngressHTTPLatencyTrend:
+ return rb.ingressHTTPLatencyTrend()
case flowslatest.AlertLokiError, flowslatest.AlertNoFlows:
// error
}
diff --git a/internal/pkg/metrics/alerts/templates.go b/internal/pkg/metrics/alerts/templates.go
index 27ca6ecd6d..966444b72f 100644
--- a/internal/pkg/metrics/alerts/templates.go
+++ b/internal/pkg/metrics/alerts/templates.go
@@ -19,7 +19,7 @@ var TemplateMetadata = map[flowslatest.HealthRuleTemplate]TemplateInfo{
},
flowslatest.HealthRulePacketDropsByDevice: {
Summary: "Too many drops from device",
- DescriptionPattern: "node-exporter is detecting more than %s%% of dropped packets%s",
+ DescriptionPattern: "node-exporter is reporting more than %s%% of dropped packets%s",
},
flowslatest.HealthRuleIPsecErrors: {
Summary: "Too many IPsec errors",
@@ -53,8 +53,8 @@ var TemplateMetadata = map[flowslatest.HealthRuleTemplate]TemplateInfo{
Summary: "Too many ingress 5xx errors",
DescriptionPattern: "HAProxy is reporting more than %s%% of 5xx HTTP response codes from ingress traffic%s",
},
- flowslatest.HealthRuleIngressLatencyTrend: {
- Summary: "Ingress latency increased",
- DescriptionPattern: "HAProxy ingress average response latency increased by more than %s%%%s, compared to baseline (offset: %s)",
+ flowslatest.HealthRuleIngressHTTPLatencyTrend: {
+ Summary: "Ingress HTTP latency increase",
+ DescriptionPattern: "HAProxy ingress average HTTP response latency increased by more than %s%%%s, compared to baseline (offset: %s)",
},
}