Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: [NPM] Reduce/Refactor Noisy NPM Logs #3468

Open
wants to merge 35 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
221479e
removed all logs from npm dataplane (except error/warning logs)
rayaisaiah Mar 3, 2025
d603453
removed all logs from npm controller (except error/warning logs)
rayaisaiah Mar 3, 2025
36eb2d6
restored logs that are ununused by current npm (v2)
rayaisaiah Mar 3, 2025
13501a7
removed test files
rayaisaiah Mar 3, 2025
6a2eda5
resolved comments
rayaisaiah Mar 4, 2025
a373499
keep log related to reconciling chain placement when the chain is not…
rayaisaiah Mar 4, 2025
2004053
added bootup logs back
rayaisaiah Mar 4, 2025
8a8eff1
Removed two more noisy logs
rayaisaiah Mar 4, 2025
ba84f91
Add loglevel config option when printing application insight logs
rayaisaiah Mar 5, 2025
b72bec7
Updated all non-error/warning logs to commented out and with a vap TODO
rayaisaiah Mar 5, 2025
821c9ef
fixed typo
rayaisaiah Mar 5, 2025
207dadb
small typo fix
rayaisaiah Mar 5, 2025
f208a60
updated configmap with loglevel
rayaisaiah Mar 6, 2025
3004ec6
updated default value
rayaisaiah Mar 6, 2025
d8ec874
added a default value for loglevel
rayaisaiah Mar 6, 2025
28c5322
fixed typo in json
rayaisaiah Mar 6, 2025
1449775
removed comma
rayaisaiah Mar 6, 2025
38cbdd9
changed loglevel to info in configmap
rayaisaiah Mar 6, 2025
cf04343
add a short sleep in TestNetPolInBackgroundSkipAddAfterRemove
rayaisaiah Mar 6, 2025
08697eb
test remove dataplane changes to see if race condition fixes
rayaisaiah Mar 6, 2025
394dd0b
Revert "test remove dataplane changes to see if race condition fixes"
rayaisaiah Mar 6, 2025
449c2af
test
rayaisaiah Mar 6, 2025
a69d632
Revert "test"
rayaisaiah Mar 6, 2025
1aef051
test
rayaisaiah Mar 6, 2025
dd0869f
update dataplane to test if changes are flagged in race
rayaisaiah Mar 6, 2025
34c3aea
added stop channels to unit tests to avoid race condtiions
rayaisaiah Mar 7, 2025
52b8247
add non noisy logs back
rayaisaiah Mar 7, 2025
0e91284
increased time
rayaisaiah Mar 7, 2025
b416f07
revert time change after RunPeriodicTasks
rayaisaiah Mar 7, 2025
f92231b
test with 1000 seconds
rayaisaiah Mar 7, 2025
1e41f52
5000 milliseconds
rayaisaiah Mar 7, 2025
0a2d1ba
tweaked the delay
rayaisaiah Mar 7, 2025
117c5d8
update to 1500 for defer
rayaisaiah Mar 7, 2025
cabf807
increased to 1500
rayaisaiah Mar 7, 2025
4e88cd2
increase to 2000
rayaisaiah Mar 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion npm/cmd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
)
}

err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), npmconfig.DefaultConfig.LogLevel)
if err != nil {
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
}
Expand Down
2 changes: 1 addition & 1 deletion npm/cmd/start_daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func startDaemon(config npmconfig.Config) error {
return fmt.Errorf("failed to create dataplane: %w", err)
}

err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), npmconfig.DefaultConfig.LogLevel)
if err != nil {
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
}
Expand Down
2 changes: 1 addition & 1 deletion npm/cmd/start_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func startControlplane(config npmconfig.Config, flags npmconfig.Flags) error {
return fmt.Errorf("failed to create NPM controlplane manager: %w", err)
}

err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), npmconfig.DefaultConfig.LogLevel)
if err != nil {
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
}
Expand Down
4 changes: 4 additions & 0 deletions npm/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ var DefaultConfig = Config{
NetPolInBackground: true,
EnableNPMLite: false,
},

// Setting LogLevel to "info" by default. Set to "debug" to get application insight logs (creates a listener that outputs diagnosticMessageWriter logs).
LogLevel: "debug",
}

type GrpcServerConfig struct {
Expand Down Expand Up @@ -82,6 +85,7 @@ type Config struct {
MaxPendingNetPols int `json:"MaxPendingNetPols,omitempty"`
NetPolInvervalInMilliseconds int `json:"NetPolInvervalInMilliseconds,omitempty"`
Toggles Toggles `json:"Toggles,omitempty"`
LogLevel string `json:"LogLevel,omitempty"`
}

type Toggles struct {
Expand Down
8 changes: 6 additions & 2 deletions npm/metrics/ai-utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package metrics
import (
"fmt"
"strconv"
"strings"
"time"

"github.com/Azure/azure-container-networking/aitelemetry"
Expand All @@ -21,15 +22,18 @@ var (
)

// CreateTelemetryHandle creates a handler to initialize AI telemetry
func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata string) error {
func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata, logLevel string) error {
npmVersion = npmVersionNum
debugMode := strings.EqualFold(logLevel, "debug")
klog.Infof("LogLevel is %s. Debugmode is set to: %v.", logLevel, debugMode)

aiConfig := aitelemetry.AIConfig{
AppName: util.AzureNpmFlag,
AppVersion: imageVersion,
BatchSize: util.BatchSizeInBytes,
BatchInterval: util.BatchIntervalInSecs,
RefreshTimeout: util.RefreshTimeoutInSecs,
DebugMode: util.DebugMode,
DebugMode: debugMode,
GetEnvRetryCount: util.GetEnvRetryCount,
GetEnvRetryWaitTimeInSecs: util.GetEnvRetryWaitTimeInSecs,
}
Expand Down
15 changes: 0 additions & 15 deletions npm/pkg/controlplane/controllers/v2/namespaceController.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,10 @@ func (nsc *NamespaceController) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer nsc.workqueue.ShutDown()

klog.Info("Starting Namespace controller\n")
klog.Info("Starting workers")
// Launch workers to process namespace resources
go wait.Until(nsc.runWorker, time.Second, stopCh)

klog.Info("Started workers")
<-stopCh
klog.Info("Shutting down workers")
}

func (nsc *NamespaceController) runWorker() {
Expand Down Expand Up @@ -209,7 +205,6 @@ func (nsc *NamespaceController) processNextWorkItem() bool {
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
nsc.workqueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
Expand Down Expand Up @@ -321,7 +316,6 @@ func (nsc *NamespaceController) syncAddNamespace(nsObj *corev1.Namespace) error
// Add the namespace to its label's ipset list.
for nsLabelKey, nsLabelVal := range nsObj.ObjectMeta.Labels {
nsLabelKeyValue := util.GetIpSetFromLabelKV(nsLabelKey, nsLabelVal)
klog.Infof("Adding namespace %s to ipset list %s and %s", nsObj.ObjectMeta.Name, nsLabelKey, nsLabelKeyValue)
labelIPSets := []*ipsets.IPSetMetadata{
ipsets.NewIPSetMetadata(nsLabelKey, ipsets.KeyLabelOfNamespace),
ipsets.NewIPSetMetadata(nsLabelKeyValue, ipsets.KeyValueLabelOfNamespace),
Expand All @@ -344,7 +338,6 @@ func (nsc *NamespaceController) syncAddNamespace(nsObj *corev1.Namespace) error
func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace) (metrics.OperationKind, error) {
var err error
newNsName, newNsLabel := newNsObj.ObjectMeta.Name, newNsObj.ObjectMeta.Labels
klog.Infof("NAMESPACE UPDATING:\n namespace: [%s/%v]", newNsName, newNsLabel)

// If previous syncAddNamespace failed for some reasons
// before caching npm namespace object or syncUpdateNamespace is called due to namespace creation event,
Expand Down Expand Up @@ -373,7 +366,6 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace)
}
toBeRemoved := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(newNsName, ipsets.Namespace)}

klog.Infof("Deleting namespace %s from ipset list %s", newNsName, nsLabelVal)
if err = nsc.dp.RemoveFromList(labelSet, toBeRemoved); err != nil {
metrics.SendErrorLogAndMetric(util.NSID, "[UpdateNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", newNsName, nsLabelVal, err)
return metrics.UpdateOp, fmt.Errorf("failed to remove from list during sync update namespace with err %w", err)
Expand All @@ -389,8 +381,6 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace)

// Add the namespace to its label's ipset list.
for _, nsLabelVal := range addToIPSets {
klog.Infof("Adding namespace %s to ipset list %s", newNsName, nsLabelVal)

var labelSet []*ipsets.IPSetMetadata
if util.IsKeyValueLabelSetName(nsLabelVal) {
labelSet = []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(nsLabelVal, ipsets.KeyValueLabelOfNamespace)}
Expand Down Expand Up @@ -422,29 +412,24 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace)

// cleanDeletedNamespace handles deleting namespace from ipset.
func (nsc *NamespaceController) cleanDeletedNamespace(cachedNsKey string) error {
klog.Infof("NAMESPACE DELETING: [%s]", cachedNsKey)
cachedNsObj, exists := nsc.npmNamespaceCache.NsMap[cachedNsKey]
if !exists {
return nil
}

klog.Infof("NAMESPACE DELETING cached labels: [%s/%v]", cachedNsKey, cachedNsObj.LabelsMap)

var err error
toBeDeletedNs := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(cachedNsKey, ipsets.Namespace)}
// Delete the namespace from its label's ipset list.
for nsLabelKey, nsLabelVal := range cachedNsObj.LabelsMap {

labelKey := ipsets.NewIPSetMetadata(nsLabelKey, ipsets.KeyLabelOfNamespace)
klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelKey)
if err = nsc.dp.RemoveFromList(labelKey, toBeDeletedNs); err != nil {
metrics.SendErrorLogAndMetric(util.NSID, "[DeleteNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", cachedNsKey, labelKey, err)
return fmt.Errorf("failed to clean deleted namespace when deleting key with err %w", err)
}

labelIpsetName := util.GetIpSetFromLabelKV(nsLabelKey, nsLabelVal)
labelKeyValue := ipsets.NewIPSetMetadata(labelIpsetName, ipsets.KeyValueLabelOfNamespace)
klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelIpsetName)
if err = nsc.dp.RemoveFromList(labelKeyValue, toBeDeletedNs); err != nil {
metrics.SendErrorLogAndMetric(util.NSID, "[DeleteNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", cachedNsKey, labelIpsetName, err)
return fmt.Errorf("failed to clean deleted namespace when deleting key value with err %w", err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,9 @@ func (c *NetworkPolicyController) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer c.workqueue.ShutDown()

klog.Infof("Starting Network Policy worker")
go wait.Until(c.runWorker, time.Second, stopCh)

klog.Infof("Started Network Policy worker")
<-stopCh
klog.Info("Shutting down Network Policy workers")
}

func (c *NetworkPolicyController) runWorker() {
Expand Down Expand Up @@ -190,7 +187,6 @@ func (c *NetworkPolicyController) processNextWorkItem() bool {
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.workqueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
Expand Down
24 changes: 0 additions & 24 deletions npm/pkg/controlplane/controllers/v2/podController.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ func (c *PodController) deletePod(obj interface{}) {
}
}

klog.Infof("[POD DELETE EVENT] for %s in %s", podObj.Name, podObj.Namespace)
if isHostNetworkPod(podObj) {
return
}
Expand All @@ -191,12 +190,9 @@ func (c *PodController) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer c.workqueue.ShutDown()

klog.Infof("Starting Pod worker")
go wait.Until(c.runWorker, time.Second, stopCh)

klog.Info("Started Pod workers")
<-stopCh
klog.Info("Shutting down Pod workers")
}

func (c *PodController) runWorker() {
Expand Down Expand Up @@ -234,7 +230,6 @@ func (c *PodController) processNextWorkItem() bool {
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.workqueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
Expand Down Expand Up @@ -347,9 +342,6 @@ func (c *PodController) syncPod(key string) error {
}

func (c *PodController) syncAddedPod(podObj *corev1.Pod) error {
klog.Infof("POD CREATING: [%s/%s/%s/%s/%+v/%s]", string(podObj.GetUID()), podObj.Namespace,
podObj.Name, podObj.Spec.NodeName, podObj.Labels, podObj.Status.PodIP)

if !util.IsIPV4(podObj.Status.PodIP) {
msg := fmt.Sprintf("[syncAddedPod] warning: ADD POD [%s/%s/%s/%+v] ignored as the PodIP is not valid ipv4 address. ip: [%s]", podObj.Namespace,
podObj.Name, podObj.Spec.NodeName, podObj.Labels, podObj.Status.PodIP)
Expand All @@ -369,7 +361,6 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error {
namespaceSet := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(podObj.Namespace, ipsets.Namespace)}

// Add the pod ip information into namespace's ipset.
klog.Infof("Adding pod %s (ip : %s) to ipset %s", podKey, podObj.Status.PodIP, podObj.Namespace)
if err = c.dp.AddToSets(namespaceSet, podMetadata); err != nil {
return fmt.Errorf("[syncAddedPod] Error: failed to add pod to namespace ipset with err: %w", err)
}
Expand All @@ -387,16 +378,13 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error {
targetSetKeyValue := ipsets.NewIPSetMetadata(labelKeyValue, ipsets.KeyValueLabelOfPod)
allSets := []*ipsets.IPSetMetadata{targetSetKey, targetSetKeyValue}

klog.Infof("Creating ipsets %+v and %+v if they do not exist", targetSetKey, targetSetKeyValue)
klog.Infof("Adding pod %s (ip : %s) to ipset %s and %s", podKey, npmPodObj.PodIP, labelKey, labelKeyValue)
if err = c.dp.AddToSets(allSets, podMetadata); err != nil {
return fmt.Errorf("[syncAddedPod] Error: failed to add pod to label ipset with err: %w", err)
}
npmPodObj.AppendLabels(map[string]string{labelKey: labelVal}, common.AppendToExistingLabels)
}

// Add pod's named ports from its ipset.
klog.Infof("Adding named port ipsets")
containerPorts := common.GetContainerPortList(podObj)
if err = c.manageNamedPortIpsets(containerPorts, podKey, npmPodObj.PodIP, podObj.Spec.NodeName, addNamedPort); err != nil {
return fmt.Errorf("[syncAddedPod] Error: failed to add pod to named port ipset with err: %w", err)
Expand Down Expand Up @@ -430,7 +418,6 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper
c.npmNamespaceCache.Unlock()

cachedNpmPod, exists := c.podMap[podKey]
klog.Infof("[syncAddAndUpdatePod] updating Pod with key %s", podKey)
// No cached npmPod exists. start adding the pod in a cache
if !exists {
return metrics.CreateOp, c.syncAddedPod(newPodObj)
Expand All @@ -446,15 +433,11 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper
// NPM should clean up existing references of cached pod obj and its IP.
// then, re-add new pod obj.
if cachedNpmPod.PodIP != newPodObj.Status.PodIP {
klog.Infof("Pod (Namespace:%s, Name:%s, newUid:%s), has cachedPodIp:%s which is different from PodIp:%s",
newPodObj.Namespace, newPodObj.Name, string(newPodObj.UID), cachedNpmPod.PodIP, newPodObj.Status.PodIP)

klog.Infof("Deleting cached Pod with key:%s first due to IP Mistmatch", podKey)
if er := c.cleanUpDeletedPod(podKey); er != nil {
return metrics.UpdateOp, er
}

klog.Infof("Adding back Pod with key:%s after IP Mistmatch", podKey)
return metrics.UpdateOp, c.syncAddedPod(newPodObj)
}

Expand All @@ -468,7 +451,6 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper
cachedPodMetadata := dataplane.NewPodMetadata(podKey, cachedNpmPod.PodIP, newPodMetadata.NodeName)
// Delete the pod from its label's ipset.
for _, removeIPSetName := range deleteFromIPSets {
klog.Infof("Deleting pod %s (ip : %s) from ipset %s", podKey, cachedNpmPod.PodIP, removeIPSetName)

var toRemoveSet *ipsets.IPSetMetadata
if util.IsKeyValueLabelSetName(removeIPSetName) {
Expand All @@ -490,16 +472,13 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper
// Add the pod to its label's ipset.
for _, addIPSetName := range addToIPSets {

klog.Infof("Creating ipset %s if it doesn't already exist", addIPSetName)

var toAddSet *ipsets.IPSetMetadata
if util.IsKeyValueLabelSetName(addIPSetName) {
toAddSet = ipsets.NewIPSetMetadata(addIPSetName, ipsets.KeyValueLabelOfPod)
} else {
toAddSet = ipsets.NewIPSetMetadata(addIPSetName, ipsets.KeyLabelOfPod)
}

klog.Infof("Adding pod %s (ip : %s) to ipset %s", podKey, newPodObj.Status.PodIP, addIPSetName)
if err = c.dp.AddToSets([]*ipsets.IPSetMetadata{toAddSet}, newPodMetadata); err != nil {
return metrics.UpdateOp, fmt.Errorf("[syncAddAndUpdatePod] Error: failed to add pod to label ipset with err: %w", err)
}
Expand Down Expand Up @@ -542,7 +521,6 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper

// cleanUpDeletedPod cleans up all ipset associated with this pod
func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error {
klog.Infof("[cleanUpDeletedPod] deleting Pod with key %s", cachedNpmPodKey)
// If cached npmPod does not exist, return nil
cachedNpmPod, exist := c.podMap[cachedNpmPodKey]
if !exist {
Expand All @@ -562,7 +540,6 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error {
// Get lists of podLabelKey and podLabelKey + podLavelValue ,and then start deleting them from ipsets
for labelKey, labelVal := range cachedNpmPod.Labels {
labelKeyValue := util.GetIpSetFromLabelKV(labelKey, labelVal)
klog.Infof("Deleting pod %s (ip : %s) from ipsets %s and %s", cachedNpmPodKey, cachedNpmPod.PodIP, labelKey, labelKeyValue)
if err = c.dp.RemoveFromSets(
[]*ipsets.IPSetMetadata{
ipsets.NewIPSetMetadata(labelKey, ipsets.KeyLabelOfPod),
Expand Down Expand Up @@ -595,7 +572,6 @@ func (c *PodController) manageNamedPortIpsets(portList []corev1.ContainerPort, p
return nil
}
for _, port := range portList {
klog.Infof("port is %+v", port)
if port.Name == "" {
continue
}
Expand Down
Loading
Loading