diff --git a/src/winml/modelkit/telemetry/consent.py b/src/winml/modelkit/telemetry/consent.py index d484b930f..359c61b9e 100644 --- a/src/winml/modelkit/telemetry/consent.py +++ b/src/winml/modelkit/telemetry/consent.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -r"""Consent decision for ModelKit telemetry. +r"""Consent decision for WinML CLI telemetry. A first-run interactive prompt collects user consent (default: accept) and persists it to ``%USERPROFILE%\.winml\config.json``. This module @@ -48,12 +48,12 @@ def _default_config_path() -> Path | None: _CONSENT_VERSION: int = 1 _PROMPT_TEXT = """\ -ModelKit can collect anonymous usage data to help improve the product. +WinML CLI can collect anonymous usage data to help improve the product. What is collected: - Command name, duration, success/failure - Target device/EP (when the command specifies them) - - OS, architecture, ModelKit version + - OS, architecture, WinML CLI version - Unhandled exception types, code locations, and scrubbed error messages (paths trimmed, length capped, PII patterns scrubbed) diff --git a/src/winml/modelkit/telemetry/library/exporter.py b/src/winml/modelkit/telemetry/library/exporter.py index 5630606e3..49e86c076 100644 --- a/src/winml/modelkit/telemetry/library/exporter.py +++ b/src/winml/modelkit/telemetry/library/exporter.py @@ -8,6 +8,7 @@ from __future__ import annotations import logging +import time from datetime import datetime, timezone from typing import TYPE_CHECKING @@ -15,7 +16,7 @@ from opentelemetry.sdk._logs.export import LogRecordExporter, LogRecordExportResult from .._cache import _PersistentCache -from .serialization import _build_envelope, _serialize_batch +from .serialization import _build_envelope, _envelope_ikey, _serialize_batch if TYPE_CHECKING: @@ -26,6 +27,9 @@ _LOGGER = logging.getLogger(__name__) _HTTP_TIMEOUT = 10.0 +# Truncate response body excerpts in DEBUG logs so a backend that decides +# to dump a large diagnostic payload doesn't flood the log. +_RESPONSE_BODY_LOG_LIMIT = 200 class OneCollectorLogExporter(LogRecordExporter): @@ -46,12 +50,26 @@ def __init__( raise ValueError("ikey must be non-empty") if not endpoint: raise ValueError("endpoint must be non-empty") - self._ikey = ikey + # OneCollector requires the envelope's iKey field to be + # ``o:`` (just the prefix portion of the full ikey), + # while the x-apikey HTTP header carries the full ikey. Compute + # the envelope form once and cache it; a malformed ikey raises + # ValueError, which Telemetry._try_init catches to disable + # telemetry rather than crash the CLI. + self._envelope_ikey = _envelope_ikey(ikey) + # Bare tenant_token (no "o:" prefix) is what the ``kill-tokens`` + # response header lists, so we keep it around for membership checks. + self._tenant_token = self._envelope_ikey.removeprefix("o:") self._endpoint = endpoint self._cache = cache if cache is not None else _PersistentCache() # First export() flushes the cache before sending the new batch; # subsequent exports go straight through. self._cache_flushed = False + # OneCollector's ``kill-tokens`` directive: when set, our tenant + # is on the backend's deny list and we must stop sending until + # this epoch second. In-memory only -- process restart re-hits + # the 401 once and re-records the kill before short-circuiting. + self._killed_until: float | None = None # _shutdown is read on the BatchLogRecordProcessor export thread and # written on the shutdown thread; bool assignment is atomic under the # CPython GIL, so no lock is needed. @@ -82,10 +100,22 @@ def export(self, batch: Sequence[ReadableLogRecord]) -> LogRecordExportResult: call. Persists the current batch to the cache on POST failure so the next process can retry — :class:`BatchLogRecordProcessor` only re-queues in memory and loses the queue on process exit. + + While the tenant is under ``kill-tokens``, this is a no-op that + returns ``SUCCESS`` to keep the BatchLogRecordProcessor from + re-queueing events the backend has explicitly told us to stop + sending; envelopes for that window are dropped, not cached. """ if self._shutdown or not batch: return LogRecordExportResult.SUCCESS + if self._is_killed(): + _LOGGER.debug( + "telemetry export skipped: tenant under kill-tokens for %.0fs more", + (self._killed_until or 0) - time.time(), + ) + return LogRecordExportResult.SUCCESS + try: envelopes = [self._to_envelope(ld) for ld in batch] except Exception: @@ -94,20 +124,37 @@ def export(self, batch: Sequence[ReadableLogRecord]) -> LogRecordExportResult: # First-call cache flush: try to send anything left over from a # previous run. Best-effort, single shot — don't loop if the - # backend is still down. + # backend is still down. If the failure is because we just got + # killed, drop the cached batch instead of looping it forever. if not self._cache_flushed: self._cache_flushed = True cached = self._cache.drain() - if cached and not self._post_envelopes(cached): + if cached and not self._post_envelopes(cached) and not self._is_killed(): self._cache.append(cached) + # If the cache-flush POST just killed us, skip the new-batch POST + # too -- another network round-trip would just re-confirm the + # kill. Drop the envelopes (don't cache, same rationale as the + # top-of-export guard) and return SUCCESS so the + # BatchLogRecordProcessor doesn't re-queue them. + if self._is_killed(): + return LogRecordExportResult.SUCCESS + if not self._post_envelopes(envelopes): - self._cache.append(envelopes) + if not self._is_killed(): + self._cache.append(envelopes) return LogRecordExportResult.FAILURE return LogRecordExportResult.SUCCESS def _post_envelopes(self, envelopes: list[dict]) -> bool: - """POST a list of envelopes; return True on 2xx, False otherwise.""" + """POST a list of envelopes; return True on 2xx, False otherwise. + + On non-2xx, parses ``kill-tokens`` / ``kill-duration`` to honor the + backend's tenant-level backoff, and emits a DEBUG log line that + captures the ``Collector-Error`` header and a body excerpt — the + two pieces of info the OneCollector backend uses to communicate + the actual rejection reason. + """ if not envelopes: return True try: @@ -126,9 +173,43 @@ def _post_envelopes(self, envelopes: list[dict]) -> bool: return False if 200 <= response.status_code < 300: return True - _LOGGER.debug("telemetry backend returned %s", response.status_code) + self._record_kill_if_present(response) + _LOGGER.debug( + "telemetry backend returned %s: error=%r body=%s", + response.status_code, + response.headers.get("Collector-Error"), + (response.text or "")[:_RESPONSE_BODY_LOG_LIMIT].replace("\n", " "), + ) return False + def _is_killed(self) -> bool: + """True iff our tenant is currently under a ``kill-tokens`` window.""" + return self._killed_until is not None and time.time() < self._killed_until + + def _record_kill_if_present(self, response: requests.Response) -> None: + """Honor an inbound ``kill-tokens`` directive that names our tenant. + + No-op for any other 4xx/5xx response. + """ + kill_header = response.headers.get("kill-tokens") + duration_header = response.headers.get("kill-duration") + if not kill_header or not duration_header: + return + try: + duration_s = int(duration_header) + except ValueError: + return + if duration_s <= 0: + return + if self._tenant_token not in _parse_kill_tokens(kill_header): + return + self._killed_until = time.time() + duration_s + _LOGGER.debug( + "telemetry tenant under kill-tokens for %ss (until epoch %.0f)", + duration_s, + self._killed_until, + ) + def force_flush(self, timeout_millis: int = 30_000) -> bool: """No-op: all exports are synchronous.""" return True @@ -150,13 +231,37 @@ def _to_envelope(self, ld: ReadableLogRecord) -> dict: ext = _resource_to_ext(ld.resource) return _build_envelope( name=str(record.body), - ikey=self._ikey, + ikey=self._envelope_ikey, timestamp=timestamp, data=data, ext=ext, ) +def _parse_kill_tokens(header_value: str) -> set[str]: + """Parse the OneCollector ``kill-tokens`` header into a set of tenant_token strings. + + The header is a comma-separated list. Each entry is in the form + ``o:`` optionally followed by ``:`` (e.g. + ``o:abc:all`` or ``o:abc:event_name``). We treat any entry naming + a tenant as a full kill for that tenant — per-event kills aren't + something we exploit today. + """ + if not header_value: + return set() + tokens: set[str] = set() + for raw in header_value.split(","): + entry = raw.strip() + if not entry.startswith("o:"): + continue + rest = entry[2:] + # Strip optional ":" suffix. + tenant = rest.split(":", 1)[0] + if tenant: + tokens.add(tenant) + return tokens + + def _ns_to_datetime(ts_ns: int) -> datetime: return datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=timezone.utc) diff --git a/src/winml/modelkit/telemetry/library/serialization.py b/src/winml/modelkit/telemetry/library/serialization.py index 3dcc966e2..3fdcb91d0 100644 --- a/src/winml/modelkit/telemetry/library/serialization.py +++ b/src/winml/modelkit/telemetry/library/serialization.py @@ -15,6 +15,29 @@ from datetime import datetime +def _envelope_ikey(full_ikey: str) -> str: + """Derive the envelope ``iKey`` value from a OneCollector instrumentation key. + + OneCollector expects two distinct iKey forms on the wire: + + - ``x-apikey`` HTTP header: the full instrumentation key + (``--``). + - Envelope ``iKey`` field: ``o:``, where ``tenant_token`` + is the portion of the ikey before the first ``-``. + + Embedding the full ikey in the envelope's ``iKey`` field is what causes + the backend to reject the request with + ``Collector-Error: Invalid Tenant Token``. + """ + dash = full_ikey.find("-") + if dash <= 0: + raise ValueError( + "OneCollector instrumentation key must contain a non-empty " + "tenant_token portion before the first '-'" + ) + return f"o:{full_ikey[:dash]}" + + def _build_envelope( name: str, ikey: str, @@ -28,7 +51,9 @@ def _build_envelope( ver: schema version, always "4.0" name: event name (e.g. "ModelKitAction") time: ISO8601 UTC, millisecond precision, trailing Z - iKey: OneCollector InstrumentationKey (in "o:" form) + iKey: envelope iKey value -- caller is responsible for supplying + it in the ``o:`` form (use + :func:`_envelope_ikey` to derive it from the full ikey) data: event-specific flat payload ext: common context slots (os, app, device) """ diff --git a/tests/unit/telemetry/conftest.py b/tests/unit/telemetry/conftest.py index d73e8f4d3..d4d9e7b1c 100644 --- a/tests/unit/telemetry/conftest.py +++ b/tests/unit/telemetry/conftest.py @@ -46,7 +46,9 @@ def enabled_telemetry(monkeypatch, isolated_config, clean_env): ready instance should use :func:`running_telemetry`, or call ``Telemetry.get_or_init()`` from inside the test body. """ - monkeypatch.setattr("winml.modelkit.telemetry.constants.INSTRUMENTATION_KEY", "o:test-key") + monkeypatch.setattr( + "winml.modelkit.telemetry.constants.INSTRUMENTATION_KEY", "test-tenant-1234" + ) consent_mod._write_stored_consent("enabled") monkeypatch.setattr("sys.stdin.isatty", lambda: True) diff --git a/tests/unit/telemetry/library/test_exporter.py b/tests/unit/telemetry/library/test_exporter.py index bd1419c17..2765d7f3b 100644 --- a/tests/unit/telemetry/library/test_exporter.py +++ b/tests/unit/telemetry/library/test_exporter.py @@ -9,6 +9,8 @@ # LogRecordExportResult (the old names are deprecated aliases). Tests use the # current names throughout. +import logging +import time from unittest.mock import MagicMock, patch import pytest @@ -19,6 +21,7 @@ from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.util.instrumentation import InstrumentationScope +from winml.modelkit.telemetry._cache import _PersistentCache from winml.modelkit.telemetry.library import OneCollectorLogExporter @@ -36,18 +39,29 @@ def _make_log_data(body: str, attrs: dict, resource: Resource | None = None) -> @pytest.fixture -def exporter(): - return OneCollectorLogExporter( - ikey="o:abc", +def exporter(tmp_path): + # Use a tmp_path-scoped cache so the test never reads or writes the + # real user-scoped persistent cache (which can be polluted by prior + # modelkit runs on the dev machine and would inject extra POSTs into + # the test). + # Yield + shutdown so the underlying ``requests.Session`` (and its + # connection pool) is closed at teardown rather than leaked across + # tests. + cache = _PersistentCache(path=tmp_path / "modelkit.cache") + exp = OneCollectorLogExporter( + ikey="abc-def", endpoint="https://example.invalid/OneCollector/1.0/", + cache=cache, ) + yield exp + exp.shutdown() @pytest.mark.parametrize( "ikey,endpoint", [ ("", "https://example.invalid/"), - ("o:abc", ""), + ("abc-def", ""), ("", ""), ], ) @@ -59,6 +73,20 @@ def test_constructor_rejects_empty_ikey_or_endpoint(ikey, endpoint): OneCollectorLogExporter(ikey=ikey, endpoint=endpoint) +@pytest.mark.parametrize( + "ikey", + [ + "noseparator", # no dash at all + "-leading-dash", # empty tenant_token portion + ], +) +def test_constructor_rejects_malformed_ikey(ikey): + """The full ikey must contain a non-empty tenant_token portion before + the first '-', otherwise the envelope iKey can't be derived.""" + with pytest.raises(ValueError): + OneCollectorLogExporter(ikey=ikey, endpoint="https://example.invalid/OneCollector/1.0/") + + def test_export_success_returns_success(exporter): ld = _make_log_data( body="ModelKitAction", @@ -72,16 +100,22 @@ def test_export_success_returns_success(exporter): mock_post.assert_called_once() # OneCollector /OneCollector/1.0/ ingest only accepts x-json-stream # (NDJSON) or bond-compact-binary; application/json is rejected with - # HTTP 415. Auth is via the x-apikey header, not the envelope iKey. + # HTTP 415. Auth is via the x-apikey header (full ikey), and the + # envelope iKey field carries the "o:" form -- the two + # values are intentionally different on the wire. headers = exporter._session.headers assert headers["Content-Type"] == "application/x-json-stream; charset=utf-8" - assert headers["x-apikey"] == "o:abc" + assert headers["x-apikey"] == "abc-def" # Body is NDJSON (one envelope per line, no enclosing array). _, kwargs = mock_post.call_args body = kwargs["data"] assert not body.startswith(b"[") assert b'"ModelKitAction"' in body + # Regression guard: envelope iKey is "o:", NOT the full + # ikey. Sending the full ikey here triggers + # ``Collector-Error: Invalid Tenant Token`` from OneCollector. assert b'"iKey":"o:abc"' in body + assert b'"iKey":"abc-def"' not in body def test_export_connection_error_returns_failure(exporter): @@ -220,3 +254,212 @@ def test_resource_to_ext_unknown_attributes_are_ignored(): # Attributes outside the mapping table do not leak into ext. ext = _resource_to_ext(Resource.create({"os.name": "Windows", "custom.attr": "x"})) assert ext == {"os": {"name": "Windows"}} + + +# --- _parse_kill_tokens direct unit tests --- + + +from winml.modelkit.telemetry.library.exporter import _parse_kill_tokens # noqa: E402 + + +@pytest.mark.parametrize( + "header,expected", + [ + ("", set()), + ("o:abc:all", {"abc"}), + ("o:abc", {"abc"}), # no reason suffix + ("o:abc:all,o:def:event_x", {"abc", "def"}), + (" o:abc:all , o:def ", {"abc", "def"}), # whitespace tolerant + ("garbage,o:abc:all", {"abc"}), # entries without "o:" prefix are skipped + ("o::all", set()), # empty tenant_token portion is rejected + ], +) +def test_parse_kill_tokens(header, expected): + assert _parse_kill_tokens(header) == expected + + +# --- kill-tokens / DEBUG-log behavior on failed POSTs --- + + +def _killed_response(tenant: str = "abc", duration: int = 86_400): + """Build a 401 response that mimics OneCollector's tenant-killed reply.""" + resp = MagicMock(status_code=401) + resp.headers = { + "Collector-Error": "Invalid Tenant Token.", + "kill-tokens": f"o:{tenant}:all", + "kill-duration": str(duration), + } + resp.text = '{"acc":0,"efi":{"InvalidTenantToken":"all"}}' + return resp + + +def test_kill_tokens_recorded_on_failure(exporter): + """On a 401 with kill-tokens for our tenant, exporter records the + kill window and ``_is_killed()`` returns True until it expires.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + assert exporter._is_killed() is False + + with patch.object(exporter._session, "post", return_value=_killed_response()): + exporter.export([ld]) + + assert exporter._is_killed() is True + assert exporter._killed_until is not None + assert exporter._killed_until > time.time() + + +def test_kill_tokens_for_other_tenant_is_ignored(exporter): + """If kill-tokens names a different tenant, our exporter is unaffected.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + other = _killed_response(tenant="not-our-tenant") + with patch.object(exporter._session, "post", return_value=other): + exporter.export([ld]) + assert exporter._is_killed() is False + + +def test_export_skipped_during_kill_window(exporter): + """While killed, export() is a no-op that returns SUCCESS without + even touching the HTTP session.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + + # First export: triggers the kill window. + with patch.object(exporter._session, "post", return_value=_killed_response()) as p1: + exporter.export([ld]) + assert p1.call_count == 1 + assert exporter._is_killed() + + # Second export: must not POST at all. + with patch.object(exporter._session, "post") as p2: + result = exporter.export([ld]) + assert result == LogRecordExportResult.SUCCESS + p2.assert_not_called() + + +def test_kill_drops_envelopes_instead_of_caching(exporter, tmp_path): + """A failed POST that triggered a kill must NOT enqueue the batch in + the persistent cache — caching it just guarantees forever-failure on + future startups within the kill window.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + cache_path = tmp_path / "modelkit.cache" + with patch.object(exporter._session, "post", return_value=_killed_response()): + exporter.export([ld]) + assert exporter._is_killed() + assert not cache_path.exists(), "kill-induced failures must not persist to cache" + + +def test_cache_flush_kill_skips_new_batch_post(tmp_path): + """If the cache-flush POST triggers a kill, the new-batch POST in + the same export() call must be skipped — otherwise we waste a + network round-trip just to re-confirm the kill, and the new + envelopes would either be dropped or wrongly re-cached.""" + cache_path = tmp_path / "modelkit.cache" + cache = _PersistentCache(path=cache_path) + cache.append([{"name": "ModelKitHeartbeat", "iKey": "o:abc"}]) + + exp = OneCollectorLogExporter( + ikey="abc-def", + endpoint="https://example.invalid/OneCollector/1.0/", + cache=cache, + ) + try: + ld = _make_log_data("ModelKitHeartbeat", {}) + with patch.object(exp._session, "post", return_value=_killed_response()) as p: + result = exp.export([ld]) + + # Exactly one POST: the cache flush. The new-batch POST is + # short-circuited by the mid-export kill check. + assert p.call_count == 1 + assert exp._is_killed() + # Returned SUCCESS so BatchLogRecordProcessor doesn't re-queue. + assert result == LogRecordExportResult.SUCCESS + finally: + exp.shutdown() + + +def test_kill_window_expiry_re_enables_post(exporter): + """Past the kill window, export() resumes normal POST behavior.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + + # Use a short window so we can fast-forward past it via monkeypatching. + short_kill = _killed_response(duration=10) + with patch.object(exporter._session, "post", return_value=short_kill): + exporter.export([ld]) + assert exporter._is_killed() + + # Fast-forward: pretend we're 11s past the kill window. + fake_now = (exporter._killed_until or 0) + 1 + with ( + patch("winml.modelkit.telemetry.library.exporter.time.time", return_value=fake_now), + patch.object(exporter._session, "post", return_value=MagicMock(status_code=200)) as p, + ): + result = exporter.export([ld]) + + assert result == LogRecordExportResult.SUCCESS + p.assert_called_once() + + +@pytest.mark.parametrize( + "kill_duration_value", + [ + None, # header absent entirely + "", # header present but empty + "0", # non-positive + "abc", # non-numeric + ], +) +def test_kill_tokens_with_unusable_duration_is_ignored(exporter, kill_duration_value): + """``kill-tokens`` is meaningless without a positive integer + ``kill-duration``. Any of: absent, empty, non-positive, or non-numeric + must leave the exporter unkilled.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + resp = MagicMock(status_code=401) + headers = {"kill-tokens": "o:abc:all"} + if kill_duration_value is not None: + headers["kill-duration"] = kill_duration_value + resp.headers = headers + resp.text = "" + with patch.object(exporter._session, "post", return_value=resp): + exporter.export([ld]) + assert exporter._is_killed() is False + + +def test_post_failure_logs_collector_error_and_body_excerpt(exporter, caplog): + """The DEBUG log on non-2xx must capture both the ``Collector-Error`` + header and a body excerpt — the two pieces OneCollector uses to + communicate the actual rejection reason. Without these in the log, + diagnosing tenant/format misconfigurations requires a live probe.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + resp = MagicMock(status_code=401) + resp.headers = {"Collector-Error": "Invalid Tenant Token."} + resp.text = '{"acc":0,"rej":1,"efi":{"InvalidTenantToken":[0]}}' + + caplog.set_level(logging.DEBUG, logger="winml.modelkit.telemetry.library.exporter") + with patch.object(exporter._session, "post", return_value=resp): + exporter.export([ld]) + + backend_logs = [ + r.getMessage() for r in caplog.records if "telemetry backend returned" in r.getMessage() + ] + assert backend_logs, "expected a DEBUG log line for the 401" + msg = backend_logs[0] + assert "401" in msg + assert "Invalid Tenant Token." in msg + assert "InvalidTenantToken" in msg + + +def test_post_failure_log_truncates_long_body(exporter, caplog): + """A backend that returns a huge body shouldn't flood the DEBUG log.""" + ld = _make_log_data("ModelKitHeartbeat", {}) + resp = MagicMock(status_code=500) + resp.headers = {} + resp.text = "x" * 10_000 + + caplog.set_level(logging.DEBUG, logger="winml.modelkit.telemetry.library.exporter") + with patch.object(exporter._session, "post", return_value=resp): + exporter.export([ld]) + + msg = next( + r.getMessage() for r in caplog.records if "telemetry backend returned" in r.getMessage() + ) + # The truncation cap is _RESPONSE_BODY_LOG_LIMIT (200) bytes. + assert "x" * 200 in msg + assert "x" * 1_000 not in msg diff --git a/tests/unit/telemetry/library/test_factory.py b/tests/unit/telemetry/library/test_factory.py index 727759058..2ace2fccc 100644 --- a/tests/unit/telemetry/library/test_factory.py +++ b/tests/unit/telemetry/library/test_factory.py @@ -43,14 +43,14 @@ def test_default_endpoint_points_at_one_collector(): def test_create_logger_provider_returns_configured_provider(make_provider): resource = Resource.create({"app_version": "0.0.1"}) - provider = make_provider(ikey="o:test", resource=resource) + provider = make_provider(ikey="test-tenant-1234", resource=resource) assert isinstance(provider, LoggerProvider) assert provider.resource.attributes.get("app_version") == "0.0.1" def test_create_logger_provider_with_custom_endpoint(make_provider): provider = make_provider( - ikey="o:test", + ikey="test-tenant-1234", endpoint="https://example.invalid/OneCollector/1.0/", ) assert isinstance(provider, LoggerProvider) diff --git a/tests/unit/telemetry/library/test_serialization.py b/tests/unit/telemetry/library/test_serialization.py index 85ed984f6..acc358a30 100644 --- a/tests/unit/telemetry/library/test_serialization.py +++ b/tests/unit/telemetry/library/test_serialization.py @@ -7,7 +7,11 @@ import pytest -from winml.modelkit.telemetry.library.serialization import _build_envelope, _serialize_batch +from winml.modelkit.telemetry.library.serialization import ( + _build_envelope, + _envelope_ikey, + _serialize_batch, +) def test_build_envelope_basic_shape(): @@ -74,3 +78,35 @@ def test_timestamp_millisecond_precision(microsecond, expected_ms): ts = datetime(2026, 4, 17, 10, 30, 0, microsecond, tzinfo=timezone.utc) envelope = _build_envelope("X", "o:k", ts, {}, {}) assert envelope["time"] == f"2026-04-17T10:30:00.{expected_ms}Z" + + +@pytest.mark.parametrize( + "full_ikey,expected", + [ + # Realistic OneCollector iKey shape: <32hex>--. + ( + "abc123abc123abc123abc123abc12345-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee-1234", + "o:abc123abc123abc123abc123abc12345", + ), + # Minimal valid form: anything non-empty before the first dash. + ("abc-def", "o:abc"), + ("token-rest-of-key", "o:token"), + ], +) +def test_envelope_ikey_extracts_tenant_token_and_prefixes(full_ikey, expected): + """The envelope iKey is ``o:``; the suffix + (ingestion token + GUID) only goes in the ``x-apikey`` header.""" + assert _envelope_ikey(full_ikey) == expected + + +@pytest.mark.parametrize( + "bad_ikey", + [ + "noseparator", # no dash at all + "-leading-dash", # empty tenant_token portion + "", # empty (defense in depth; exporter rejects this earlier) + ], +) +def test_envelope_ikey_rejects_malformed(bad_ikey): + with pytest.raises(ValueError, match="tenant_token"): + _envelope_ikey(bad_ikey) diff --git a/tests/unit/telemetry/test_cache_integration.py b/tests/unit/telemetry/test_cache_integration.py index 2d7fdc861..f3a8ed11b 100644 --- a/tests/unit/telemetry/test_cache_integration.py +++ b/tests/unit/telemetry/test_cache_integration.py @@ -53,7 +53,7 @@ def test_network_failure_persists_envelopes_to_cache(cache, cache_path): """Process 1: net is down. The exporter must write the envelope to disk so process 2 can recover it.""" exporter = OneCollectorLogExporter( - ikey="o:abc", + ikey="abc-def", endpoint="https://example.invalid/", cache=cache, ) @@ -81,7 +81,7 @@ def test_next_process_flushes_cached_envelopes_on_first_export(cache, cache_path assert cache_path.exists() exporter = OneCollectorLogExporter( - ikey="o:abc", + ikey="abc-def", endpoint="https://example.invalid/", cache=cache, ) @@ -110,7 +110,7 @@ def test_cache_flush_only_runs_on_first_export(cache): cache.append([{"name": "stale", "iKey": "o:abc"}]) exporter = OneCollectorLogExporter( - ikey="o:abc", + ikey="abc-def", endpoint="https://example.invalid/", cache=cache, ) @@ -133,7 +133,7 @@ def test_cached_envelopes_re_persisted_on_recovery_failure(cache, cache_path): cache.append(seeded) exporter = OneCollectorLogExporter( - ikey="o:abc", + ikey="abc-def", endpoint="https://example.invalid/", cache=cache, ) diff --git a/tests/unit/telemetry/test_telemetry_init.py b/tests/unit/telemetry/test_telemetry_init.py index 91eca6497..3e8b1a6af 100644 --- a/tests/unit/telemetry/test_telemetry_init.py +++ b/tests/unit/telemetry/test_telemetry_init.py @@ -24,7 +24,9 @@ def test_empty_ikey_makes_telemetry_disabled(clean_env, isolated_config, monkeyp def test_consent_disabled_makes_telemetry_disabled(clean_env, isolated_config, monkeypatch): - monkeypatch.setattr("winml.modelkit.telemetry.constants.INSTRUMENTATION_KEY", "o:test-key") + monkeypatch.setattr( + "winml.modelkit.telemetry.constants.INSTRUMENTATION_KEY", "test-tenant-1234" + ) consent_mod._write_stored_consent("disabled") monkeypatch.setattr("sys.stdin.isatty", lambda: True) t = Telemetry.get_or_init() @@ -70,7 +72,9 @@ def test_init_swallows_resource_build_errors(clean_env, isolated_config, monkeyp rather than raise. Without this guard a registry permission error or transient OS failure would crash every CLI invocation. """ - monkeypatch.setattr("winml.modelkit.telemetry.constants.INSTRUMENTATION_KEY", "o:test-key") + monkeypatch.setattr( + "winml.modelkit.telemetry.constants.INSTRUMENTATION_KEY", "test-tenant-1234" + ) consent_mod._write_stored_consent("enabled") monkeypatch.setattr("sys.stdin.isatty", lambda: True)