From 2a29328a585ccafad2c7a98ad212c071c4534409 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Fri, 15 May 2026 15:33:09 +0800 Subject: [PATCH] fix(telemetry): drop undocumented ext.os.release / ext.app.initTs OneCollector rejected every batch with `{"acc":0,"efi":{"InvalidEventFormat":"all"}}` because the envelope carried two fields that aren't part of the documented CS 4.0 extension slots: - ext.os.release (the documented ext.os keys are name, ver, bootId) - ext.app.initTs (the documented ext.app keys are name, ver, id, iid, expId, userId, sesId, env, asId, locale, tz) A live probe against the production ingest confirmed the diagnosis: removing either field flips a 400 back to acc:1; either field on its own is enough to fail the whole batch. `_resource_to_ext()` no longer maps these attributes, and `_build_resource()` no longer puts them in the Resource. The `_init_ts` instance attribute is dropped too (unused after the mapping removal). Unit tests are updated to assert the trimmed envelope, plus a regression test that fails fast if anyone re-introduces either mapping. Closes #635 --- .../modelkit/telemetry/library/exporter.py | 11 ++-- src/winml/modelkit/telemetry/telemetry.py | 3 -- tests/unit/telemetry/library/test_exporter.py | 50 ++++++++++++++++--- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/winml/modelkit/telemetry/library/exporter.py b/src/winml/modelkit/telemetry/library/exporter.py index 49e86c076..7a8600931 100644 --- a/src/winml/modelkit/telemetry/library/exporter.py +++ b/src/winml/modelkit/telemetry/library/exporter.py @@ -275,10 +275,13 @@ def _resource_to_ext(resource) -> dict: os.arch → ext.device.deviceClass os.name → ext.os.name os.version → ext.os.ver - os.release → ext.os.release app_version → ext.app.ver app_instance_id → ext.app.sesId - initTs → ext.app.initTs + + `os.release` and `initTs` are intentionally NOT mapped: neither field + exists in the documented CS 4.0 `ext.os` / `ext.app` slots, and + including them causes OneCollector to reject the entire batch with + ``{"acc":0,"efi":{"InvalidEventFormat":"all"}}``. """ if resource is None: return {} @@ -298,14 +301,10 @@ def _resource_to_ext(resource) -> dict: os_["name"] = attrs["os.name"] if "os.version" in attrs: os_["ver"] = attrs["os.version"] - if "os.release" in attrs: - os_["release"] = attrs["os.release"] if "app_version" in attrs: app["ver"] = attrs["app_version"] if "app_instance_id" in attrs: app["sesId"] = attrs["app_instance_id"] - if "initTs" in attrs: - app["initTs"] = attrs["initTs"] if device: ext["device"] = device diff --git a/src/winml/modelkit/telemetry/telemetry.py b/src/winml/modelkit/telemetry/telemetry.py index 565b77926..ddc74eb8b 100644 --- a/src/winml/modelkit/telemetry/telemetry.py +++ b/src/winml/modelkit/telemetry/telemetry.py @@ -91,7 +91,6 @@ def __init__(self) -> None: self._logger = None # set when enabled; None when disabled self._provider = None self._disabled = True # set to False only after successful init - self._init_ts = time.time() self._app_instance_id = str(uuid.uuid4()) # Kept in the event schema for forward-compat: today # `consent.resolve_consent()` returns "disabled" for non-TTY, so @@ -163,11 +162,9 @@ def _build_resource(self): "id_status": id_status, "os.name": uname.system, "os.version": uname.version, - "os.release": uname.release, "os.arch": uname.machine, "app_version": app_version, "app_instance_id": self._app_instance_id, - "initTs": self._init_ts, } ) diff --git a/tests/unit/telemetry/library/test_exporter.py b/tests/unit/telemetry/library/test_exporter.py index 6d0209118..db263569b 100644 --- a/tests/unit/telemetry/library/test_exporter.py +++ b/tests/unit/telemetry/library/test_exporter.py @@ -141,6 +141,12 @@ def test_export_4xx_5xx_returns_failure(exporter): def test_export_translates_resource_to_ext(exporter): + # Resource carries the legacy `os.release` and `initTs` attributes that + # an older version of the exporter mapped into the envelope. The + # OneCollector CS 4.0 backend rejects any envelope that includes those + # fields (they are not part of the documented `ext.os` / `ext.app` + # slots) with `InvalidEventFormat: all`. The exporter must NOT translate + # them, even if they appear in the Resource. resource = Resource.create( { "device_id": "hash-abc", @@ -176,12 +182,44 @@ def fake_post(url, data=None, headers=None, timeout=None): "authId": "EXISTING", "deviceClass": "AMD64", } - assert ext["os"] == {"name": "Windows", "ver": "10.0.26200", "release": "11"} - assert ext["app"] == { - "ver": "0.0.1", - "sesId": "sesid-xyz", - "initTs": 1712678400.0, - } + assert ext["os"] == {"name": "Windows", "ver": "10.0.26200"} + assert ext["app"] == {"ver": "0.0.1", "sesId": "sesid-xyz"} + + +def test_export_omits_undocumented_cs40_ext_fields(exporter): + """Regression guard for https://github.com/microsoft/winml-cli/issues/635. + + `ext.os.release` and `ext.app.initTs` are not part of the documented + CS 4.0 `os` / `app` extension slots. Including them causes the + OneCollector backend to reject the entire batch with + ``{"acc":0,"efi":{"InvalidEventFormat":"all"}}``. This test fails + fast if anyone re-introduces either mapping in + :func:`_resource_to_ext`. + """ + resource = Resource.create( + { + "os.name": "Windows", + "os.release": "11", + "app_version": "0.0.1", + "initTs": 1712678400.0, + } + ) + ld = _make_log_data("WinMLCLIHeartbeat", {}, resource=resource) + + captured = {} + + def fake_post(url, data=None, **_kw): + captured["data"] = data + return MagicMock(status_code=200) + + with patch.object(exporter._session, "post", side_effect=fake_post): + exporter.export([ld]) + + import json + + envelope = json.loads(captured["data"].splitlines()[0]) + assert "release" not in envelope["ext"].get("os", {}) + assert "initTs" not in envelope["ext"].get("app", {}) def test_export_serializes_multiple_envelopes_as_ndjson(exporter):