From ecba504974efac6ff2945f470e0f958594db6310 Mon Sep 17 00:00:00 2001 From: Yassine TIJANI Date: Thu, 14 Dec 2017 00:57:23 +0000 Subject: [PATCH] implementing predicates ordering --- .../algorithm/predicates/predicates.go | 45 ++++++++++++++-- .../pkg/scheduler/core/generic_scheduler.go | 53 ++++++++++--------- .../scheduler/core/generic_scheduler_test.go | 11 +++- plugin/pkg/scheduler/scheduler_test.go | 3 ++ 4 files changed, 83 insertions(+), 29 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index 6a8d78ada1d15..8d458401f98e0 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -49,9 +49,25 @@ import ( ) const ( - MatchInterPodAffinity = "MatchInterPodAffinity" - CheckVolumeBinding = "CheckVolumeBinding" - + MatchInterPodAffinity = "MatchInterPodAffinity" + CheckVolumeBinding = "CheckVolumeBinding" + CheckNodeConditionPred = "CheckNodeCondition" + GeneralPred = "GeneralPredicates" + HostNamePred = "HostName" + PodFitsHostPortsPred = "PodFitsHostPorts" + MatchNodeSelectorPred = "MatchNodeSelector" + PodFitsResourcesPred = "PodFitsResources" + NoDiskConflictPred = "NoDiskConflict" + PodToleratesNodeTaintsPred = "PodToleratesNodeTaints" + PodToleratesNodeNoExecuteTaintsPred = "PodToleratesNodeNoExecuteTaints" + CheckNodeLabelPresencePred = "CheckNodeLabelPresence" + checkServiceAffinityPred = "checkServiceAffinity" + MaxEBSVolumeCountPred = "MaxEBSVolumeCount" + MaxGCEPDVolumeCountPred = "MaxGCEPDVolumeCount" + MaxAzureDiskVolumeCountPred = "MaxAzureDiskVolumeCount" + NoVolumeZoneConflictPred = "NoVolumeZoneConflict" + CheckNodeMemoryPressurePred = "CheckNodeMemoryPressure" + CheckNodeDiskPressure = "CheckNodeDiskPressure" // DefaultMaxGCEPDVolumes defines the maximum number of PD Volumes for GCE // GCE instances can have up to 16 PD volumes attached. DefaultMaxGCEPDVolumes = 16 @@ -79,6 +95,21 @@ const ( // For example: // https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422 +// IMPORTANT: this list contains the ordering of the predicates, if you develop a new predicates +// it is mandatory to add its name on this list. +// otherwise it won't be processed, see generic_scheduler#podFitsOnNode() +// the order is based on the restrictiveness & complexity of predicates +// design doc: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/predicates-ordering.md +var ( + predicatesOrdering = []string{CheckNodeConditionPred, + GeneralPred, HostNamePred, PodFitsHostPortsPred, + MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred, + PodToleratesNodeTaintsPred, PodToleratesNodeNoExecuteTaintsPred, CheckNodeLabelPresencePred, + checkServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred, + MaxAzureDiskVolumeCountPred, CheckVolumeBinding, NoVolumeZoneConflictPred, + CheckNodeMemoryPressurePred, CheckNodeDiskPressure, MatchInterPodAffinity} +) + // NodeInfo: Other types for predicate functions... type NodeInfo interface { GetNodeInfo(nodeID string) (*v1.Node, error) @@ -93,6 +124,14 @@ type CachedPersistentVolumeInfo struct { corelisters.PersistentVolumeLister } +func GetPredicatesOrdering() []string { + return predicatesOrdering +} + +func SetPredicatesOrdering(names []string) { + predicatesOrdering = names +} + func (c *CachedPersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) { return c.Get(pvID) } diff --git a/plugin/pkg/scheduler/core/generic_scheduler.go b/plugin/pkg/scheduler/core/generic_scheduler.go index 6505cbe58a4d2..ae2ccdfac3ef1 100644 --- a/plugin/pkg/scheduler/core/generic_scheduler.go +++ b/plugin/pkg/scheduler/core/generic_scheduler.go @@ -444,34 +444,37 @@ func podFitsOnNode( // TODO(bsalamat): consider using eCache and adding proper eCache invalidations // when pods are nominated or their nominations change. eCacheAvailable = eCacheAvailable && !podsAdded - for predicateKey, predicate := range predicateFuncs { - if eCacheAvailable { - // PredicateWithECache will return its cached predicate results. - fit, reasons, invalid = ecache.PredicateWithECache(pod.GetName(), info.Node().GetName(), predicateKey, equivalenceHash) - } - - // TODO(bsalamat): When one predicate fails and fit is false, why do we continue - // checking other predicates? - if !eCacheAvailable || invalid { - // we need to execute predicate functions since equivalence cache does not work - fit, reasons, err = predicate(pod, metaToUse, nodeInfoToUse) - if err != nil { - return false, []algorithm.PredicateFailureReason{}, err - } + for _, predicateKey := range predicates.GetPredicatesOrdering() { + //TODO (yastij) : compute average predicate restrictiveness to export it as promethus metric + if predicate, exist := predicateFuncs[predicateKey]; exist { if eCacheAvailable { - // Store data to update eCache after this loop. - if res, exists := predicateResults[predicateKey]; exists { - res.Fit = res.Fit && fit - res.FailReasons = append(res.FailReasons, reasons...) - predicateResults[predicateKey] = res - } else { - predicateResults[predicateKey] = HostPredicate{Fit: fit, FailReasons: reasons} + // PredicateWithECache will return its cached predicate results. + fit, reasons, invalid = ecache.PredicateWithECache(pod.GetName(), info.Node().GetName(), predicateKey, equivalenceHash) + } + + // TODO(bsalamat): When one predicate fails and fit is false, why do we continue + // checking other predicates? + if !eCacheAvailable || invalid { + // we need to execute predicate functions since equivalence cache does not work + fit, reasons, err = predicate(pod, metaToUse, nodeInfoToUse) + if err != nil { + return false, []algorithm.PredicateFailureReason{}, err + } + if eCacheAvailable { + // Store data to update eCache after this loop. + if res, exists := predicateResults[predicateKey]; exists { + res.Fit = res.Fit && fit + res.FailReasons = append(res.FailReasons, reasons...) + predicateResults[predicateKey] = res + } else { + predicateResults[predicateKey] = HostPredicate{Fit: fit, FailReasons: reasons} + } } } - } - if !fit { - // eCache is available and valid, and predicates result is unfit, record the fail reasons - failedPredicates = append(failedPredicates, reasons...) + if !fit { + // eCache is available and valid, and predicates result is unfit, record the fail reasons + failedPredicates = append(failedPredicates, reasons...) + } } } } diff --git a/plugin/pkg/scheduler/core/generic_scheduler_test.go b/plugin/pkg/scheduler/core/generic_scheduler_test.go index baa76414f09cc..1b1b9d15c6f33 100644 --- a/plugin/pkg/scheduler/core/generic_scheduler_test.go +++ b/plugin/pkg/scheduler/core/generic_scheduler_test.go @@ -42,6 +42,10 @@ import ( schedulertesting "k8s.io/kubernetes/plugin/pkg/scheduler/testing" ) +var ( + order = []string{"false", "true", "matches", "nopods", predicates.MatchInterPodAffinity} +) + func falsePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) { return false, []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate}, nil } @@ -181,6 +185,7 @@ func TestSelectHost(t *testing.T) { } func TestGenericScheduler(t *testing.T) { + predicates.SetPredicatesOrdering(order) tests := []struct { name string predicates map[string]algorithm.FitPredicate @@ -401,6 +406,7 @@ func TestGenericScheduler(t *testing.T) { } func TestFindFitAllError(t *testing.T) { + predicates.SetPredicatesOrdering(order) nodes := []string{"3", "2", "1"} predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "false": falsePredicate} nodeNameToInfo := map[string]*schedulercache.NodeInfo{ @@ -430,8 +436,9 @@ func TestFindFitAllError(t *testing.T) { } func TestFindFitSomeError(t *testing.T) { + predicates.SetPredicatesOrdering(order) nodes := []string{"3", "2", "1"} - predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "match": matchesPredicate} + predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "matches": matchesPredicate} pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "1"}} nodeNameToInfo := map[string]*schedulercache.NodeInfo{ "3": schedulercache.NewNodeInfo(), @@ -741,6 +748,7 @@ var negPriority, lowPriority, midPriority, highPriority, veryHighPriority = int3 // TestSelectNodesForPreemption tests selectNodesForPreemption. This test assumes // that podsFitsOnNode works correctly and is tested separately. func TestSelectNodesForPreemption(t *testing.T) { + predicates.SetPredicatesOrdering(order) tests := []struct { name string predicates map[string]algorithm.FitPredicate @@ -879,6 +887,7 @@ func TestSelectNodesForPreemption(t *testing.T) { // TestPickOneNodeForPreemption tests pickOneNodeForPreemption. func TestPickOneNodeForPreemption(t *testing.T) { + predicates.SetPredicatesOrdering(order) tests := []struct { name string predicates map[string]algorithm.FitPredicate diff --git a/plugin/pkg/scheduler/scheduler_test.go b/plugin/pkg/scheduler/scheduler_test.go index c3c8ccab9a672..c017cc4219cf8 100644 --- a/plugin/pkg/scheduler/scheduler_test.go +++ b/plugin/pkg/scheduler/scheduler_test.go @@ -43,6 +43,8 @@ import ( "k8s.io/kubernetes/plugin/pkg/scheduler/volumebinder" ) +var order = []string{"VolumeBindingChecker"} + type fakeBinder struct { b func(binding *v1.Binding) error } @@ -637,6 +639,7 @@ func makePredicateError(failReason string) error { } func TestSchedulerWithVolumeBinding(t *testing.T) { + predicates.SetPredicatesOrdering(order) findErr := fmt.Errorf("find err") assumeErr := fmt.Errorf("assume err") bindErr := fmt.Errorf("bind err")