diff --git a/etc/qmemman.conf b/etc/qmemman.conf index e86c158f5..0175ad97a 100644 --- a/etc/qmemman.conf +++ b/etc/qmemman.conf @@ -14,4 +14,4 @@ cache-margin-factor = 1.3 # log-level - Warning 30, Info 20, Debug 10 # Default: 30 -log-level = 30 +log-level = 20 diff --git a/qubes/tests/__init__.py b/qubes/tests/__init__.py index c20d0c073..305733701 100644 --- a/qubes/tests/__init__.py +++ b/qubes/tests/__init__.py @@ -750,6 +750,7 @@ def setUp(self): ha_syslog.setFormatter( logging.Formatter('%(name)s[%(process)d]: %(message)s')) logging.root.addHandler(ha_syslog) + logging.root.setLevel("DEBUG") self.log.critical('starting') diff --git a/qubes/tools/qmemmand.py b/qubes/tools/qmemmand.py index df61f172f..2c901c791 100644 --- a/qubes/tools/qmemmand.py +++ b/qubes/tools/qmemmand.py @@ -20,6 +20,7 @@ # pylint: disable=global-statement import configparser +import functools import socketserver import logging import logging.handlers @@ -41,15 +42,6 @@ system_state = qubes.qmemman.systemstate.SystemState() global_lock = threading.Lock() -# If XSWatcher will -# handle meminfo event before @introduceDomain, it will use -# incomplete domain list for that and may redistribute memory -# allocated to some VM, but not yet used (see #1389). -# To fix that, system_state should be updated (refresh domain -# list) before processing other changes, every time some process requested -# memory for a new VM, before releasing the lock. Then XS_Watcher will check -# this flag before processing other event. -force_refresh_domain_list = False def only_in_first_list(list1, list2): ret = [] @@ -94,11 +86,11 @@ def domain_list_changed(self, refresh_only=False): got_lock = False if not refresh_only: - self.log.debug('acquiring global_lock') + self.log.info('acquiring global_lock - list changed') # pylint: disable=consider-using-with global_lock.acquire() got_lock = True - self.log.debug('global_lock acquired') + self.log.info('global_lock acquired - list changed') try: curr = self.handle.ls('', '/local/domain') if curr is None: @@ -128,19 +120,18 @@ def domain_list_changed(self, refresh_only=False): self.watch_token_dict[i]) self.watch_token_dict.pop(i) system_state.del_domain(i) + + if not refresh_only: + try: + system_state.do_balance() + except: # pylint: disable=bare-except + self.log.exception('do_balance() failed') except: # pylint: disable=bare-except self.log.exception('Updating domain list failed') finally: if got_lock: global_lock.release() - self.log.debug('global_lock released') - - if not refresh_only: - try: - system_state.do_balance() - except: # pylint: disable=bare-except - self.log.exception('do_balance() failed') - + self.log.info('global_lock released - list changed') def meminfo_changed(self, domain_id): self.log.debug('meminfo_changed(domain_id={!r})'.format(domain_id)) @@ -149,14 +140,10 @@ def meminfo_changed(self, domain_id): if untrusted_meminfo_key is None or untrusted_meminfo_key == b'': return - self.log.debug('acquiring global_lock') + self.log.info('acquiring global_lock - meminfo changed') with global_lock: - self.log.debug('global_lock acquired') + self.log.info('global_lock acquired - meminfo changed') try: - global force_refresh_domain_list - if force_refresh_domain_list: - self.domain_list_changed(refresh_only=True) - force_refresh_domain_list = False if domain_id not in self.watch_token_dict: # domain just destroyed return @@ -164,7 +151,7 @@ def meminfo_changed(self, domain_id): system_state.refresh_meminfo(domain_id, untrusted_meminfo_key) except: # pylint: disable=bare-except self.log.exception('Updating meminfo for %s failed', domain_id) - self.log.debug('global_lock released') + self.log.info('global_lock released - meminfo changed') def watch_loop(self): self.log.debug('watch_loop()') @@ -184,6 +171,10 @@ class QMemmanReqHandler(socketserver.BaseRequestHandler): client. """ + def __init__(self, watcher: XSWatcher, request, client_address, server): + self.watcher = watcher + super().__init__(request, client_address, server) + def handle(self): self.log = logging.getLogger('qmemman.daemon.reqhandler') @@ -192,12 +183,14 @@ def handle(self): # self.request is the TCP socket connected to the client while True: self.data = self.request.recv(1024).strip() - self.log.debug('data=%r', self.data) + self.log.info('data=%r', self.data) if len(self.data) == 0: self.log.info('client disconnected, resuming membalance') if got_lock: - global force_refresh_domain_list - force_refresh_domain_list = True + # got_lock = True means some VM may have been just created + # ensure next watch or client request handler will use + # updated domain list + self.watcher.domain_list_changed(refresh_only=True) return # XXX something is wrong here: return without release? @@ -205,10 +198,10 @@ def handle(self): self.log.warning('Second request over qmemman.sock?') return - self.log.debug('acquiring global_lock') + self.log.info('acquiring global_lock - handle') # pylint: disable=consider-using-with global_lock.acquire() - self.log.debug('global_lock acquired') + self.log.info('global_lock acquired - handle') got_lock = True if (self.data.isdigit() and @@ -224,7 +217,7 @@ def handle(self): finally: if got_lock: global_lock.release() - self.log.debug('global_lock released') + self.log.info('global_lock released - handle') parser = qubes.tools.QubesArgumentParser(want_app=False) @@ -290,7 +283,11 @@ def main(): # Initialize the connection to Xen and to XenStore system_state.init() - server = socketserver.UnixStreamServer(SOCK_PATH, QMemmanReqHandler) + watcher = XSWatcher() + server = socketserver.UnixStreamServer( + SOCK_PATH, + functools.partial(QMemmanReqHandler, watcher) + ) os.umask(0o077) # notify systemd @@ -305,4 +302,4 @@ def main(): sock.close() threading.Thread(target=server.serve_forever).start() - XSWatcher().watch_loop() + watcher.watch_loop() diff --git a/qubes/vm/qubesvm.py b/qubes/vm/qubesvm.py index dd76c4a00..5001838e7 100644 --- a/qubes/vm/qubesvm.py +++ b/qubes/vm/qubesvm.py @@ -1222,8 +1222,13 @@ async def start(self, start_guid=True, notify_function=None, self._update_libvirt_domain() + free_mem = subprocess.check_output(["xl", "info", "free_memory"]).decode() + self.log.debug("free mem before start: %s", free_mem) + subprocess.run(["xl", "list"]) self.libvirt_domain.createWithFlags( libvirt.VIR_DOMAIN_START_PAUSED) + free_mem = subprocess.check_output(["xl", "info", "free_memory"]).decode() + self.log.debug("free mem after start: %s", free_mem) # the above allocates xid, lets announce that self.fire_event('property-reset:xid', name='xid') @@ -1239,6 +1244,10 @@ async def start(self, start_guid=True, notify_function=None, exc = qubes.exc.QubesException( 'Failed to start an HVM qube with PCI devices assigned ' '- hardware does not support IOMMU/VT-d/AMD-Vi') + free_mem = subprocess.check_output(["xl", "info", "free_memory"]).decode() + self.log.debug("free mem after failed start: %s", free_mem) + subprocess.run(["xl", "list"]) + subprocess.run(["xl", "info"]) self.log.error('Start failed: %s', str(exc)) await self.fire_event_async('domain-start-failed', reason=str(exc)) @@ -1757,16 +1766,19 @@ def request_memory(self, mem_required=None): initial_memory = self.memory mem_required = int(initial_memory + stubdom_mem) * 1024 * 1024 + self.log.debug(f"mem required: {mem_required}") qmemman_client = qubes.qmemman.client.QMemmanClient() try: mem_required_with_overhead = mem_required + MEM_OVERHEAD_BASE \ + self.vcpus * MEM_OVERHEAD_PER_VCPU - if self.use_memory_hotplug: - # extra overhead to account future hotplug memory - # 1 page per 1MB of RAM, see libxl__get_required_paging_memory() - mem_required_with_overhead += self.maxmem * 4096 + maxmem = self.maxmem if self.maxmem else self.memory + if self.virt_mode != "pv": + # extra overhead to account (possibly future hotplug) memory + # 2 pages per 1MB of RAM, see libxl__get_required_paging_memory() + mem_required_with_overhead += maxmem * 8192 got_memory = qmemman_client.request_memory( mem_required_with_overhead) + self.log.debug(f"mem required with overhead: {mem_required_with_overhead}") except IOError as e: raise IOError('Failed to connect to qmemman: {!s}'.format(e))