extensions/gc_nova_stable-2023.2.patch

diff --git a/nova/conf/compute.py b/nova/conf/compute.py
index de2743d850..9cf7b3499c 100644
--- a/nova/conf/compute.py
+++ b/nova/conf/compute.py
@@ -878,7 +878,56 @@ Related options:
   where ``VCPU`` resources should be allocated from.
 * ``vcpu_pin_set``: A legacy option that this option partially replaces.
 """),
-    cfg.BoolOpt('live_migration_wait_for_vif_plug',
+cfg.StrOpt('cpu_dynamic_set',
+        help="""
+Mask of host CPUs that can undergone dynamic performance changes, such as being strictly low powered.
+
+Possible values:
+
+* A comma-separated list of physical CPU numbers that instance VCPUs can be
+  allocated from. Each element should be either a single CPU number, a range of
+  CPU numbers, or a caret followed by a CPU number to be excluded from a
+  previous range. For example::
+
+    cpu_dedicated_set = "4-12,^8,15"
+
+Related options:
+
+* ``[compute] cpu_stable_set``: This is the parent option for defining the superset
+  where ``VCPU`` resources should be allocated from. ``[compute] cpu_dynamic_set`` option complements 
+  this option by enabling physical CPU prioritization.
+"""),
+   cfg.StrOpt('cpu_stable_set',
+        help="""
+Mask of host CPUs that can be used for prioritized ``PCPU`` resources.
+
+Possible values:
+
+* A comma-separated list of physical CPU numbers that instance VCPUs can be
+  allocated from. Each element should be either a single CPU number, a range of
+  CPU numbers, or a caret followed by a CPU number to be excluded from a
+  previous range. For example::
+
+    cpu_dedicated_set = "4-12,^8,15"
+
+Related options:
+
+* ``[compute] cpu_dedicated_set``: This is the parent option for defining the superset
+  where ``VCPU`` resources should be allocated from. ``[compute] cpu_stable_set`` option complements 
+  this option by enabling physical CPU prioritization.
+"""),
+   cfg.StrOpt('cpu_sleep_info_endpoint',
+        help="""
+An endpoint which provides information cpu ids that are at the sleep mode. Such cpus are then considered offline in 
+openstack.
+
+Possible values:
+
+* A url to which this node can make a HTTP GET request without authentication (a security established resource). For example::
+
+    cpu_sleep_info_endpoint = "http://localhost:3000/gc-controller/is-asleep"
+"""),
+   cfg.BoolOpt('live_migration_wait_for_vif_plug',
         default=True,
         help="""
 Determine if the source compute host should wait for a ``network-vif-plugged``
diff --git a/nova/objects/numa.py b/nova/objects/numa.py
index 36f51201b0..d443581221 100644
--- a/nova/objects/numa.py
+++ b/nova/objects/numa.py
@@ -98,6 +98,14 @@ class NUMACell(base.NovaObject):
         self.pinned_cpus |= cpus
 
     def unpin_cpus(self, cpus):
+        # if the core is offline, below check can fail, although there are cpus that are not a part of available cpus.
+        # For openstack-gc, we disable this check, assuming its a core that went to sleep. When prepping for production,
+        # an additional check to verify that state (call external endpoint to see if the cpu is actually green and went
+        # asleep) can be included.
+        if (cpus - self.pcpuset) and ((self.pinned_cpus & cpus) != cpus):
+            self.pinned_cpus -= cpus
+            return
+
         if cpus - self.pcpuset:
             raise exception.CPUUnpinningUnknown(requested=list(cpus),
                                                 available=list(self.pcpuset))
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
index 620519d403..4c0e05f111 100644
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@@ -23,6 +23,7 @@ import collections
 import copy
 import random
 
+import requests
 from keystoneauth1 import exceptions as ks_exc
 from oslo_log import log as logging
 import oslo_messaging as messaging
@@ -52,6 +53,8 @@ QUOTAS = quota.QUOTAS
 
 HOST_MAPPING_EXISTS_WARNING = False
 
+CORE_USAGE = {}
+
 
 class SchedulerManager(manager.Manager):
     """Chooses a host to run instances on.
@@ -702,6 +705,11 @@ class SchedulerManager(manager.Manager):
         scheduling constraints for the request spec object and have been sorted
         according to the weighers.
         """
+        # todo following url needs to be read from the configuration file.
+        # gc-emulation service tracks green core usage across nodes.
+        core_usages = requests.get(url='http://{GC_EMULATION_SERVICE_HOST}:{GC_EMULATION_SERVICE_PORT}/gc/core-usage').json()
+        global CORE_USAGE
+        CORE_USAGE['core_usage'] = core_usages
         filtered_hosts = self.host_manager.get_filtered_hosts(host_states,
             spec_obj, index)
 
diff --git a/nova/scheduler/weights/cpu.py b/nova/scheduler/weights/cpu.py
index 904a788b46..e2cdb6711c 100644
--- a/nova/scheduler/weights/cpu.py
+++ b/nova/scheduler/weights/cpu.py
@@ -21,10 +21,12 @@ stacking, you can set the 'cpu_weight_multiplier' option (by configuration
 or aggregate metadata) to a negative number and the weighing has the opposite
 effect of the default.
 """
+import math
 
 import nova.conf
 from nova.scheduler import utils
 from nova.scheduler import weights
+from ..manager import CORE_USAGE
 
 CONF = nova.conf.CONF
 
@@ -40,7 +42,51 @@ class CPUWeigher(weights.BaseHostWeigher):
 
     def _weigh_object(self, host_state, weight_properties):
         """Higher weights win.  We want spreading to be the default."""
-        vcpus_free = (
-            host_state.vcpus_total * host_state.cpu_allocation_ratio -
-            host_state.vcpus_used)
-        return vcpus_free
+        hints = weight_properties.scheduler_hints
+        if not (type in hints):
+            vcpus_free = (
+                    host_state.vcpus_total * host_state.cpu_allocation_ratio -
+                    host_state.vcpus_used)
+            return vcpus_free
+
+        # get criticality of the VM.
+        is_evct = hints['type'][0] == 'evictable'
+
+        # get green cores metrics.
+        host_ip = host_state.host_ip
+        core_usage = list(filter(lambda x: x['host-ip'] == str(host_ip), CORE_USAGE['core_usage']))
+        core_usage = core_usage[0]
+
+        # map host to the Euclidean space.
+        rcpus_avl = core_usage['reg-cores-avl']
+        rcpus_used = core_usage['reg-cores-usg']
+        gcpus_avl = core_usage['green-cores-avl']
+        gcpus_used = core_usage['green-cores-usg']
+        p_host = {
+            'deficit': abs(rcpus_avl - rcpus_used) / rcpus_avl,
+            'promise': abs(gcpus_avl - gcpus_used) / gcpus_avl
+        }
+
+        ''' Tuned reference values.
+        Values were evaluated from large-scale experiments, tuned to provide a balance between evictions and 
+        harvest.
+        '''
+        ref_vals = {
+            'p_ref_reg': {
+                'promise': 1.0,
+                'deficit': 0.0625
+            },
+            'p_ref_evct': {
+                'promise': 0.2,
+                'deficit': 0.0
+            }
+        }
+
+        # calculate the distance between the host and the reference point.
+        p_ref = ref_vals['p_ref_evct'] if is_evct else ref_vals['p_ref_reg']
+        distance = math.sqrt(
+            math.pow(p_host['deficit'] - p_ref['deficit'], 2)
+            + math.pow(p_host['promise'] - p_ref['promise'], 2)
+        )
+        final_weight = 1 - distance
+        return final_weight
diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
index 2727cffee5..75eb4f33ff 100644
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -76,6 +76,36 @@ def get_cpu_dedicated_set():
     return cpu_ids
 
 
+def get_cpu_stable_set():
+    """Parse ``[compute] cpu_stable_set`` config.
+
+    :returns: A set of host CPU IDs that can be used for prioritized PCPU allocations.
+    """
+    if not CONF.compute.cpu_stable_set:
+        return None
+
+    cpu_ids = parse_cpu_spec(CONF.compute.cpu_stable_set)
+    if not cpu_ids:
+        msg = _("No CPUs available after parsing '[compute] "
+                "cpu_stable_set' config, %r")
+        raise exception.Invalid(msg % CONF.compute.cpu_stable_set)
+    return cpu_ids
+
+def get_cpu_dynamic_set():
+    """Parse ``[compute] cpu_dynamic_set`` config.
+
+    :returns: A set of host CPU IDs that can be used for prioritized PCPU allocations.
+    """
+    if not CONF.compute.cpu_dynamic_set:
+        return None
+
+    cpu_ids = parse_cpu_spec(CONF.compute.cpu_dynamic_set)
+    if not cpu_ids:
+        msg = _("No CPUs available after parsing '[compute] "
+                "cpu_dynamic_set' config, %r")
+        raise exception.Invalid(msg % CONF.compute.cpu_dynamic_set)
+    return cpu_ids
+
 def get_cpu_dedicated_set_nozero():
     """Return cpu_dedicated_set without CPU0, if present"""
     return (get_cpu_dedicated_set() or set()) - {0}
@@ -720,8 +750,29 @@ def _pack_instance_onto_cores(host_cell, instance_cell,
         #
         # For an instance_cores=[2, 3], usable_cores=[[0], [4]]
         # vcpus_pinning=[(2, 0), (3, 4)]
-        vcpus_pinning = list(zip(sorted(instance_cores),
-                                 itertools.chain(*usable_cores)))
+
+        def get_priority_weight(val, high_p_list):
+            if val in high_p_list:
+                return 0
+            return 1
+
+        cpu_stable_set = get_cpu_stable_set()
+        usable_cores_list = list(itertools.chain(*usable_cores))
+        if len(cpu_stable_set) > 1:
+            usable_cores_list = sorted(usable_cores_list)
+            usable_cores_list = sorted(usable_cores_list, key=lambda x: get_priority_weight(x, cpu_stable_set))
+            msg = ("Using priority core pinning: high priority cores: "
+                   "%(cpu_stable_set)s, priority ordered host cores: %(usable_cores_list)s")
+            msg_args = {
+                'cpu_stable_set': cpu_stable_set,
+                'usable_cores_list': usable_cores_list,
+            }
+            LOG.info(msg, msg_args)
+        vcpus_pinning = list(zip(
+            sorted(instance_cores),
+            usable_cores_list
+        ))
+
         msg = ("Computed NUMA topology CPU pinning: usable pCPUs: "
                "%(usable_cores)s, vCPUs mapping: %(vcpus_pinning)s")
         msg_args = {
diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py
index b57751093e..6e296ce23f 100644
--- a/nova/virt/libvirt/host.py
+++ b/nova/virt/libvirt/host.py
@@ -38,6 +38,7 @@ import queue
 import socket
 import threading
 import typing as ty
+import requests as rq
 
 from eventlet import greenio
 from eventlet import greenthread
@@ -60,7 +61,7 @@ from nova.objects import fields
 from nova.pci import utils as pci_utils
 from nova import rpc
 from nova import utils
-from nova.virt import event as virtevent
+from nova.virt import event as virtevent, hardware
 from nova.virt.libvirt import config as vconfig
 from nova.virt.libvirt import event as libvirtevent
 from nova.virt.libvirt import guest as libvirt_guest
@@ -760,7 +761,29 @@ class Host(object):
             if cpu_map[cpu]:
                 online_cpus.add(cpu)
 
-        return online_cpus
+        asleep_cpus = self._get_sleeping_cpus()
+        os_online_cpus = online_cpus - asleep_cpus
+        if len(asleep_cpus) > 0:
+            LOG.info('Asleep cpus detected. %(libvirsh_online_cpus)s:%(asleep_cpus)s for '
+                     'os_online %(os_online_cpus)s',
+                     {'libvirsh_online_cpus': online_cpus, 'asleep_cpus': asleep_cpus,
+                      'os_online_cpus': os_online_cpus})
+
+        return os_online_cpus
+
+    def _get_sleeping_cpus(self):
+        """Get the ids of the cores that are sleeping.
+
+        :returns: list of ids that are asleep.
+        """
+        endpoint = CONF.compute.cpu_sleep_info_endpoint
+        r = rq.get(url=endpoint)
+        data = r.json()
+        is_awake = data['is-awake']
+        sleeping_cpus = set()
+        if not is_awake:
+            sleeping_cpus = set(hardware.get_cpu_dynamic_set())
+        return sleeping_cpus
 
     def get_cpu_model_names(self):
         """Get the cpu models based on host CPU arch