-
Notifications
You must be signed in to change notification settings - Fork 0
/
gc_nova_stable-2023.2.patch
312 lines (297 loc) · 11.8 KB
/
gc_nova_stable-2023.2.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
diff --git a/nova/conf/compute.py b/nova/conf/compute.py
index de2743d850..9cf7b3499c 100644
--- a/nova/conf/compute.py
+++ b/nova/conf/compute.py
@@ -878,7 +878,56 @@ Related options:
where ``VCPU`` resources should be allocated from.
* ``vcpu_pin_set``: A legacy option that this option partially replaces.
"""),
- cfg.BoolOpt('live_migration_wait_for_vif_plug',
+cfg.StrOpt('cpu_dynamic_set',
+ help="""
+Mask of host CPUs that can undergone dynamic performance changes, such as being strictly low powered.
+
+Possible values:
+
+* A comma-separated list of physical CPU numbers that instance VCPUs can be
+ allocated from. Each element should be either a single CPU number, a range of
+ CPU numbers, or a caret followed by a CPU number to be excluded from a
+ previous range. For example::
+
+ cpu_dedicated_set = "4-12,^8,15"
+
+Related options:
+
+* ``[compute] cpu_stable_set``: This is the parent option for defining the superset
+ where ``VCPU`` resources should be allocated from. ``[compute] cpu_dynamic_set`` option complements
+ this option by enabling physical CPU prioritization.
+"""),
+ cfg.StrOpt('cpu_stable_set',
+ help="""
+Mask of host CPUs that can be used for prioritized ``PCPU`` resources.
+
+Possible values:
+
+* A comma-separated list of physical CPU numbers that instance VCPUs can be
+ allocated from. Each element should be either a single CPU number, a range of
+ CPU numbers, or a caret followed by a CPU number to be excluded from a
+ previous range. For example::
+
+ cpu_dedicated_set = "4-12,^8,15"
+
+Related options:
+
+* ``[compute] cpu_dedicated_set``: This is the parent option for defining the superset
+ where ``VCPU`` resources should be allocated from. ``[compute] cpu_stable_set`` option complements
+ this option by enabling physical CPU prioritization.
+"""),
+ cfg.StrOpt('cpu_sleep_info_endpoint',
+ help="""
+An endpoint which provides information cpu ids that are at the sleep mode. Such cpus are then considered offline in
+openstack.
+
+Possible values:
+
+* A url to which this node can make a HTTP GET request without authentication (a security established resource). For example::
+
+ cpu_sleep_info_endpoint = "http://localhost:3000/gc-controller/is-asleep"
+"""),
+ cfg.BoolOpt('live_migration_wait_for_vif_plug',
default=True,
help="""
Determine if the source compute host should wait for a ``network-vif-plugged``
diff --git a/nova/objects/numa.py b/nova/objects/numa.py
index 36f51201b0..d443581221 100644
--- a/nova/objects/numa.py
+++ b/nova/objects/numa.py
@@ -98,6 +98,14 @@ class NUMACell(base.NovaObject):
self.pinned_cpus |= cpus
def unpin_cpus(self, cpus):
+ # if the core is offline, below check can fail, although there are cpus that are not a part of available cpus.
+ # For openstack-gc, we disable this check, assuming its a core that went to sleep. When prepping for production,
+ # an additional check to verify that state (call external endpoint to see if the cpu is actually green and went
+ # asleep) can be included.
+ if (cpus - self.pcpuset) and ((self.pinned_cpus & cpus) != cpus):
+ self.pinned_cpus -= cpus
+ return
+
if cpus - self.pcpuset:
raise exception.CPUUnpinningUnknown(requested=list(cpus),
available=list(self.pcpuset))
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
index 620519d403..4c0e05f111 100644
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@@ -23,6 +23,7 @@ import collections
import copy
import random
+import requests
from keystoneauth1 import exceptions as ks_exc
from oslo_log import log as logging
import oslo_messaging as messaging
@@ -52,6 +53,8 @@ QUOTAS = quota.QUOTAS
HOST_MAPPING_EXISTS_WARNING = False
+CORE_USAGE = {}
+
class SchedulerManager(manager.Manager):
"""Chooses a host to run instances on.
@@ -702,6 +705,11 @@ class SchedulerManager(manager.Manager):
scheduling constraints for the request spec object and have been sorted
according to the weighers.
"""
+ # todo following url needs to be read from the configuration file.
+ # gc-emulation service tracks green core usage across nodes.
+ core_usages = requests.get(url='http://{GC_EMULATION_SERVICE_HOST}:{GC_EMULATION_SERVICE_PORT}/gc/core-usage').json()
+ global CORE_USAGE
+ CORE_USAGE['core_usage'] = core_usages
filtered_hosts = self.host_manager.get_filtered_hosts(host_states,
spec_obj, index)
diff --git a/nova/scheduler/weights/cpu.py b/nova/scheduler/weights/cpu.py
index 904a788b46..e2cdb6711c 100644
--- a/nova/scheduler/weights/cpu.py
+++ b/nova/scheduler/weights/cpu.py
@@ -21,10 +21,12 @@ stacking, you can set the 'cpu_weight_multiplier' option (by configuration
or aggregate metadata) to a negative number and the weighing has the opposite
effect of the default.
"""
+import math
import nova.conf
from nova.scheduler import utils
from nova.scheduler import weights
+from ..manager import CORE_USAGE
CONF = nova.conf.CONF
@@ -40,7 +42,51 @@ class CPUWeigher(weights.BaseHostWeigher):
def _weigh_object(self, host_state, weight_properties):
"""Higher weights win. We want spreading to be the default."""
- vcpus_free = (
- host_state.vcpus_total * host_state.cpu_allocation_ratio -
- host_state.vcpus_used)
- return vcpus_free
+ hints = weight_properties.scheduler_hints
+ if not (type in hints):
+ vcpus_free = (
+ host_state.vcpus_total * host_state.cpu_allocation_ratio -
+ host_state.vcpus_used)
+ return vcpus_free
+
+ # get criticality of the VM.
+ is_evct = hints['type'][0] == 'evictable'
+
+ # get green cores metrics.
+ host_ip = host_state.host_ip
+ core_usage = list(filter(lambda x: x['host-ip'] == str(host_ip), CORE_USAGE['core_usage']))
+ core_usage = core_usage[0]
+
+ # map host to the Euclidean space.
+ rcpus_avl = core_usage['reg-cores-avl']
+ rcpus_used = core_usage['reg-cores-usg']
+ gcpus_avl = core_usage['green-cores-avl']
+ gcpus_used = core_usage['green-cores-usg']
+ p_host = {
+ 'deficit': abs(rcpus_avl - rcpus_used) / rcpus_avl,
+ 'promise': abs(gcpus_avl - gcpus_used) / gcpus_avl
+ }
+
+ ''' Tuned reference values.
+ Values were evaluated from large-scale experiments, tuned to provide a balance between evictions and
+ harvest.
+ '''
+ ref_vals = {
+ 'p_ref_reg': {
+ 'promise': 1.0,
+ 'deficit': 0.0625
+ },
+ 'p_ref_evct': {
+ 'promise': 0.2,
+ 'deficit': 0.0
+ }
+ }
+
+ # calculate the distance between the host and the reference point.
+ p_ref = ref_vals['p_ref_evct'] if is_evct else ref_vals['p_ref_reg']
+ distance = math.sqrt(
+ math.pow(p_host['deficit'] - p_ref['deficit'], 2)
+ + math.pow(p_host['promise'] - p_ref['promise'], 2)
+ )
+ final_weight = 1 - distance
+ return final_weight
diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
index 2727cffee5..75eb4f33ff 100644
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -76,6 +76,36 @@ def get_cpu_dedicated_set():
return cpu_ids
+def get_cpu_stable_set():
+ """Parse ``[compute] cpu_stable_set`` config.
+
+ :returns: A set of host CPU IDs that can be used for prioritized PCPU allocations.
+ """
+ if not CONF.compute.cpu_stable_set:
+ return None
+
+ cpu_ids = parse_cpu_spec(CONF.compute.cpu_stable_set)
+ if not cpu_ids:
+ msg = _("No CPUs available after parsing '[compute] "
+ "cpu_stable_set' config, %r")
+ raise exception.Invalid(msg % CONF.compute.cpu_stable_set)
+ return cpu_ids
+
+def get_cpu_dynamic_set():
+ """Parse ``[compute] cpu_dynamic_set`` config.
+
+ :returns: A set of host CPU IDs that can be used for prioritized PCPU allocations.
+ """
+ if not CONF.compute.cpu_dynamic_set:
+ return None
+
+ cpu_ids = parse_cpu_spec(CONF.compute.cpu_dynamic_set)
+ if not cpu_ids:
+ msg = _("No CPUs available after parsing '[compute] "
+ "cpu_dynamic_set' config, %r")
+ raise exception.Invalid(msg % CONF.compute.cpu_dynamic_set)
+ return cpu_ids
+
def get_cpu_dedicated_set_nozero():
"""Return cpu_dedicated_set without CPU0, if present"""
return (get_cpu_dedicated_set() or set()) - {0}
@@ -720,8 +750,29 @@ def _pack_instance_onto_cores(host_cell, instance_cell,
#
# For an instance_cores=[2, 3], usable_cores=[[0], [4]]
# vcpus_pinning=[(2, 0), (3, 4)]
- vcpus_pinning = list(zip(sorted(instance_cores),
- itertools.chain(*usable_cores)))
+
+ def get_priority_weight(val, high_p_list):
+ if val in high_p_list:
+ return 0
+ return 1
+
+ cpu_stable_set = get_cpu_stable_set()
+ usable_cores_list = list(itertools.chain(*usable_cores))
+ if len(cpu_stable_set) > 1:
+ usable_cores_list = sorted(usable_cores_list)
+ usable_cores_list = sorted(usable_cores_list, key=lambda x: get_priority_weight(x, cpu_stable_set))
+ msg = ("Using priority core pinning: high priority cores: "
+ "%(cpu_stable_set)s, priority ordered host cores: %(usable_cores_list)s")
+ msg_args = {
+ 'cpu_stable_set': cpu_stable_set,
+ 'usable_cores_list': usable_cores_list,
+ }
+ LOG.info(msg, msg_args)
+ vcpus_pinning = list(zip(
+ sorted(instance_cores),
+ usable_cores_list
+ ))
+
msg = ("Computed NUMA topology CPU pinning: usable pCPUs: "
"%(usable_cores)s, vCPUs mapping: %(vcpus_pinning)s")
msg_args = {
diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py
index b57751093e..6e296ce23f 100644
--- a/nova/virt/libvirt/host.py
+++ b/nova/virt/libvirt/host.py
@@ -38,6 +38,7 @@ import queue
import socket
import threading
import typing as ty
+import requests as rq
from eventlet import greenio
from eventlet import greenthread
@@ -60,7 +61,7 @@ from nova.objects import fields
from nova.pci import utils as pci_utils
from nova import rpc
from nova import utils
-from nova.virt import event as virtevent
+from nova.virt import event as virtevent, hardware
from nova.virt.libvirt import config as vconfig
from nova.virt.libvirt import event as libvirtevent
from nova.virt.libvirt import guest as libvirt_guest
@@ -760,7 +761,29 @@ class Host(object):
if cpu_map[cpu]:
online_cpus.add(cpu)
- return online_cpus
+ asleep_cpus = self._get_sleeping_cpus()
+ os_online_cpus = online_cpus - asleep_cpus
+ if len(asleep_cpus) > 0:
+ LOG.info('Asleep cpus detected. %(libvirsh_online_cpus)s:%(asleep_cpus)s for '
+ 'os_online %(os_online_cpus)s',
+ {'libvirsh_online_cpus': online_cpus, 'asleep_cpus': asleep_cpus,
+ 'os_online_cpus': os_online_cpus})
+
+ return os_online_cpus
+
+ def _get_sleeping_cpus(self):
+ """Get the ids of the cores that are sleeping.
+
+ :returns: list of ids that are asleep.
+ """
+ endpoint = CONF.compute.cpu_sleep_info_endpoint
+ r = rq.get(url=endpoint)
+ data = r.json()
+ is_awake = data['is-awake']
+ sleeping_cpus = set()
+ if not is_awake:
+ sleeping_cpus = set(hardware.get_cpu_dynamic_set())
+ return sleeping_cpus
def get_cpu_model_names(self):
"""Get the cpu models based on host CPU arch