Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions qubes/tests/integ/dispvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,9 @@ def cleanup_preload(self):
)
logger.info("deleting global feature")
del self.app.domains["dom0"].features["preload-dispvm-max"]
if "preload-dispvm-threshold" in self.app.domains["dom0"].features:
logger.info("deleting global threshold feature")
del self.app.domains["dom0"].features["preload-dispvm-threshold"]
logger.info("end")

async def no_preload(self):
Expand All @@ -327,9 +330,11 @@ def log_preload(self):
global_preload_max = None
if default_dispvm:
global_preload_max = default_dispvm.get_feat_global_preload_max()
threshold = self.adminvm.features.get("preload-dispvm-threshold", None)
preload_dict["global"] = {
"name": default_dispvm.name if default_dispvm else None,
"max": global_preload_max,
"threshold": threshold,
}
for qube in [self.disp_base, self.disp_base_alt]:
preload = qube.get_feat_preload()
Expand Down Expand Up @@ -739,6 +744,10 @@ async def _test_019_preload_refresh(self):
await self.wait_preload(preload_max)
old_preload = self.disp_base.get_feat_preload()
await qube.start()
# If services are still starting, it may delay shutdown longer than
# the default timeout. Because we can't just kill default
# templates, wait gracefully for system services to have started.
await qube.run_service_for_stdio("qubes.WaitForRunningSystem")
logger.info("shutdown '%s'", qube.name)
await qube.shutdown(wait=True)
await self.wait_preload(preload_max)
Expand Down
7 changes: 0 additions & 7 deletions qubes/tests/integ/dispvm_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,6 @@ def tearDown(self: qubes.tests.SystemTestCase):
self.vm2.shutdown(),
)
)
if not os.getenv("QUBES_TEST_SKIP_TEARDOWN_SLEEP"):
# Avoid previous test load interfering with new test.
if self._testMethodName.startswith("vm"):
delay = 5
else:
delay = 15
time.sleep(delay)

def run_test(self, name):
dvm = self.dvm.name
Expand Down
157 changes: 127 additions & 30 deletions tests/dispvm_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,23 @@
import asyncio
import concurrent.futures
import dataclasses
import logging
import os
import subprocess
import time


import qubesadmin

# nose will duplicate this logger.
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
formatter = logging.Formatter(
"%(asctime)s: %(levelname)s: %(funcName)s: %(message)s"
)
handler.setFormatter(formatter)
logger.addHandler(handler)


@dataclasses.dataclass
class TestConfig:
Expand All @@ -61,7 +71,7 @@ class TestConfig:
via wrappers such as qvm-run.
- vm-dispvm:
- qrexec-client-vm: scripts
- *-vm: Fast way to test if the tests itself are working.
- *-vm: Fast way to test if the tests are working.
-
GUI VS non-GUI:
- GUI tests workflows that uses disposables to open untrusted files or
Expand All @@ -81,7 +91,7 @@ class TestConfig:
the reverse is not true. Normal disposables are a control group for
preloaded disposables.
- Preloading more than 2 is not useful on sequential calls as long as
long as on the next call, there is a preload that has been completed.
on the next call, there is a preload that has been completed.
"""

name: str
Expand Down Expand Up @@ -170,10 +180,17 @@ class TestConfig:
preload_max=MAX_CONCURRENCY,
),
TestConfig("dispvm-api", admin_api=True),
TestConfig("dispvm-concurrent-api", concurrent=True, admin_api=True),
TestConfig(
"dispvm-concurrent-api",
concurrent=True,
admin_api=True,
),
TestConfig("dispvm-gui-api", gui=True, admin_api=True),
TestConfig(
"dispvm-gui-concurrent-api", gui=True, concurrent=True, admin_api=True
"dispvm-gui-concurrent-api",
gui=True,
concurrent=True,
admin_api=True,
),
TestConfig(
"dispvm-preload-more-api",
Expand All @@ -189,7 +206,7 @@ class TestConfig:
TestConfig(
"dispvm-preload-concurrent-api",
concurrent=True,
preload_max=MAX_PRELOAD,
preload_max=MAX_CONCURRENCY,
admin_api=True,
),
TestConfig(
Expand All @@ -202,12 +219,18 @@ class TestConfig:
"dispvm-preload-gui-concurrent-api",
gui=True,
concurrent=True,
preload_max=MAX_PRELOAD,
preload_max=MAX_CONCURRENCY,
admin_api=True,
),
]


def get_load() -> str:
with open("/proc/loadavg", "r", encoding="ascii") as file:
load = file.read()
return load.rstrip()


def get_time():
return time.clock_gettime(time.CLOCK_MONOTONIC)

Expand All @@ -218,6 +241,8 @@ def __init__(self, dom0, dvm, vm1, vm2):
self.dvm = dvm
self.vm1 = vm1
self.vm2 = vm2
self.app = self.dom0.app
self.adminvm = self.dom0
self.iterations = ITERATIONS

async def wait_preload(
Expand All @@ -229,6 +254,7 @@ async def wait_preload(
timeout=60,
):
"""Waiting for completion avoids coroutine objects leaking."""
logger.info("preload_max: '%s'", preload_max)
if not appvm:
appvm = self.dvm
for _ in range(timeout):
Expand All @@ -241,14 +267,15 @@ async def wait_preload(
if fail_on_timeout:
raise Exception("didn't preload in time")
if not wait_completion:
logger.info("end")
return
preload_dispvm = appvm.features.get("preload-dispvm", "")
preload_dispvm = preload_dispvm.split(" ") or []
preload_unfinished = preload_dispvm
for _ in range(timeout):
for qube in preload_unfinished.copy():
self.dom0.app.domains.refresh_cache(force=True)
qube = self.dom0.app.domains[qube]
self.app.domains.refresh_cache(force=True)
qube = self.app.domains[qube]
completed = qube.features.get("preload-dispvm-completed")
if completed:
preload_unfinished.remove(qube)
Expand All @@ -259,12 +286,14 @@ async def wait_preload(
else:
if fail_on_timeout:
raise Exception("last preloaded didn't complete in time")
logger.info("end")

def wait_for_dispvm_destroy(self, dispvm_names):
logger.info("Waiting for destruction of disposables: %s", dispvm_names)
timeout = 60
while True:
self.dom0.app.domains.refresh_cache(force=True)
if set(dispvm_names).isdisjoint(self.dom0.app.domains):
self.app.domains.refresh_cache(force=True)
if set(dispvm_names).isdisjoint(self.app.domains):
break
time.sleep(1)
timeout -= 1
Expand Down Expand Up @@ -303,7 +332,8 @@ def run_latency_calls(self, test):
"set -eu --; "
f'max_concurrency="{MAX_CONCURRENCY}"; '
f"for i in $(seq {self.iterations}); do "
f" out=$({cmd}) {term}"
' echo "$i"; '
f" {cmd} {term}"
' pid="${!-}"; '
' if test -n "${pid}"; then '
' set -- "${@}" "${pid}"; '
Expand All @@ -318,7 +348,13 @@ def run_latency_calls(self, test):
start_time = get_time()
try:
if test.from_dom0:
subprocess.run(code, shell=True, check=True, timeout=timeout)
subprocess.run(
code,
shell=True,
check=True,
capture_output=True,
timeout=timeout,
)
else:
self.vm1.run(code, timeout=timeout)
except subprocess.CalledProcessError as e:
Expand All @@ -327,32 +363,46 @@ def run_latency_calls(self, test):
f" {e.stdout},"
f" {e.stderr}"
)
except subprocess.TimeoutExpired:
raise Exception(f"service '{cmd}' failed: timeout expired")
except subprocess.TimeoutExpired as e:
raise Exception(
f"service '{cmd}' failed: timeout expired:"
f" {e.stdout},"
f" {e.stderr}"
)
end_time = get_time()
return round(end_time - start_time, ROUND_PRECISION)

def call_api(self, test, service, qube):
start_time = get_time()
app = qubesadmin.Qubes()
domains = app.domains
appvm = domains[qube]
domain_time = get_time()
if test.non_dispvm:
target_qube = self.vm1
# Even though we already have the qube object passed from the
# class, assume we don't so we can calculate gathering.
target_qube = domains[self.vm1.name]
domain_time = get_time()
else:
appvm = domains[qube]
domain_time = get_time()
target_qube = qubesadmin.vm.DispVM.from_appvm(app, appvm)
name = target_qube.name
# A very small number, if it appears, it will show a bottleneck at
# DispVM.from_appvm.
target_time = get_time()
try:
target_qube.run_service_for_stdio(service, timeout=60)
except subprocess.CalledProcessError as e:
raise Exception(
f"service '{service}' failed ({e.returncode}):"
f"'{name}': service '{service}' failed ({e.returncode}):"
f" {e.stdout},"
f" {e.stderr}"
)
except subprocess.TimeoutExpired as e:
raise Exception(
f"'{name}': service '{service}' failed: timeout expired:"
f" {e.stdout},"
f" {e.stderr}"
)
except subprocess.TimeoutExpired:
raise Exception(f"service '{service}' failed: timeout expired")
run_service_time = get_time()
if not test.non_dispvm:
target_qube.cleanup()
Expand Down Expand Up @@ -505,35 +555,81 @@ def run_test(self, test: TestConfig):
f"{gui_prefix} {target} allow\n"
f"{nogui_prefix} {target} allow\n"
)
if test.preload_max:
orig_preload_max = self.dom0.features.get("preload-dispvm-max")
if orig_preload_max is not None:
del self.dom0.features["preload-dispvm-max"]
orig_preload_threshold = self.dom0.features.get(
"preload-dispvm-threshold"
)
orig_preload_max = self.dom0.features.get("preload-dispvm-max")
if orig_preload_threshold is not None:
logger.info("Deleting threshold feature")
del self.dom0.features["preload-dispvm-threshold"]
if orig_preload_max is not None:
logger.info("Deleting global max feature")
del self.dom0.features["preload-dispvm-max"]
try:
if test.preload_max:
preload_max = test.preload_max
logger.info("Setting local max feature: '%s'", preload_max)
self.dvm.features["preload-dispvm-max"] = str(preload_max)
asyncio.run(self.wait_preload(preload_max))
for qube in [self.vm1, self.vm2]:
if not qube:
# Might be an empty string.
continue
logger.info(
"Waiting for VM '%s' to finish startup",
qube.name,
)
qube.run_service_for_stdio("qubes.WaitForSession", timeout=60)
logger.info("Load before test: '%s'", get_load())
if test.admin_api:
result = self.run_latency_api_calls(test)
else:
result = self.run_latency_calls(test)
self.report_result(test, result)
except:
logger.error("Failed to run test: '%s'", test.name)
raise
finally:
if test.preload_max:
old_preload_max = int(
self.dvm.features.get("preload-dispvm-max", 0) or 0
)
logger.info(
"Waiting to preload the old test setting: '%s'",
old_preload_max,
)
asyncio.run(self.wait_preload(old_preload_max))
old_preload = self.dvm.features.get("preload-dispvm", "")
old_preload = old_preload.split(" ") or []
logger.info("Deleting local max feature")
del self.dvm.features["preload-dispvm-max"]
self.wait_for_dispvm_destroy(old_preload)
if orig_preload_max is not None:
self.dom0.features["preload-dispvm-max"] = orig_preload_max
if orig_preload_max != 0:
asyncio.run(self.wait_preload(orig_preload_max))
if orig_preload_threshold is not None:
logger.info(
"Setting the original threshold feature: '%s'",
orig_preload_threshold,
)
self.dom0.features["preload-dispvm-threshold"] = (
orig_preload_threshold
)
if orig_preload_max is not None:
logger.info(
"Setting the global max feature: '%s'", orig_preload_max
)
self.dom0.features["preload-dispvm-max"] = orig_preload_max
os.unlink(POLICY_FILE)
if not os.getenv("QUBES_TEST_SKIP_TEARDOWN_SLEEP"):
logger.info("Load before sleep: '%s'", get_load())
delay = 5
if not test.non_dispvm:
delay += 10
if test.gui:
delay += 2
if test.concurrent:
delay += 8
logger.info("Sleeping for '%d' seconds", delay)
time.sleep(delay)
logger.info("Load after sleep: '%s'", get_load())


def main():
Expand Down Expand Up @@ -567,7 +663,8 @@ def main():
if args.iterations:
run.iterations = args.iterations

for index, test in enumerate(tests):
for test in tests:
logger.info("Running test '%s'", test.name)
run.run_test(test)


Expand Down