Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
817d36f
chore: add opentelemetry-sdk dependency for telemetry phase 1
timenick Apr 17, 2026
5a802bc
feat(telemetry): add Common Schema 4.0 envelope serializer
timenick Apr 17, 2026
350b29f
refactor(telemetry): mark envelope helpers as private + restore unico…
timenick Apr 17, 2026
bb5bd62
feat(telemetry): add OneCollectorLogExporter with CS 4.0 serialization
timenick Apr 17, 2026
209ec3d
refactor(telemetry): address exporter review findings (tests, dedup h…
timenick Apr 17, 2026
8e13888
feat(telemetry): expose create_logger_provider factory from library/
timenick Apr 17, 2026
89e5510
feat(telemetry): add cross-platform _store with registry + HOME fallback
timenick Apr 17, 2026
71402b0
refactor(telemetry): narrow registry error swallow + atomic file writ…
timenick Apr 17, 2026
7c05727
feat(telemetry): add device ID generation with EXISTING/NEW/FAILED st…
timenick Apr 17, 2026
4175c4a
feat(telemetry): expose get_or_create_device_id from deviceid/
timenick Apr 17, 2026
87acfb9
Merge remote-tracking branch 'origin/main' into zhiwang/telemetry-phase1
timenick Apr 17, 2026
a5b83a6
refactor(telemetry): address final review findings (public import, ik…
timenick Apr 17, 2026
ee37386
chore(telemetry): add explanatory comments to empty except clauses (C…
timenick Apr 17, 2026
c88999c
refactor(telemetry): drop non-Windows parity code from deviceid store
timenick Apr 17, 2026
b8c884e
refactor(telemetry): address PR 367 review feedback
timenick Apr 17, 2026
db86cc7
Merge branch 'main' into zhiwang/telemetry-phase1
timenick Apr 20, 2026
24dbadc
Merge branch 'main' into zhiwang/telemetry-phase1
timenick Apr 21, 2026
cba3b9f
test(telemetry): shut down LoggerProvider in factory tests
timenick Apr 21, 2026
f954e00
Merge remote-tracking branch 'origin/main' into zhiwang/telemetry-phase1
timenick Apr 21, 2026
a4d7de2
Merge remote-tracking branch 'origin/zhiwang/telemetry-phase1' into z…
timenick Apr 21, 2026
b4202b1
refactor(telemetry): introduce IdStatus enum and drop unused base-dir…
timenick Apr 21, 2026
8b426ad
Merge remote-tracking branch 'origin/main' into zhiwang/telemetry-phase1
timenick Apr 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"onnx-ir>=0.1",
"onnxruntime-windowsml>=1.23.2,<1.24.0",
"onnxscript>=0.2",
"opentelemetry-sdk>=1.39.1",
"optimum>=2",
"optimum-onnx>=0.1",
"plotext>=5.3.2",
Expand Down
4 changes: 4 additions & 0 deletions src/winml/modelkit/telemetry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
11 changes: 11 additions & 0 deletions src/winml/modelkit/telemetry/deviceid/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

"""Device ID: a stable SHA256-hashed UUID4 persisted per user."""

from .deviceid import IdStatus, get_or_create_device_id


__all__ = ["IdStatus", "get_or_create_device_id"]
52 changes: 52 additions & 0 deletions src/winml/modelkit/telemetry/deviceid/_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

r"""Windows-registry-backed storage for telemetry's small key-value entries.

Location: ``HKCU\SOFTWARE\Microsoft\DeveloperTools\.modelkit``. Values are
``REG_SZ`` strings; non-string registry types are treated as absent on
read.
"""

from __future__ import annotations


_REGISTRY_KEY = r"SOFTWARE\Microsoft\DeveloperTools\.modelkit"


def read_key(name: str) -> str | None:
"""Return the stored ``REG_SZ`` value, or ``None`` if absent or non-string."""
import winreg

try:
with winreg.OpenKey(winreg.HKEY_CURRENT_USER, _REGISTRY_KEY) as key:
Comment thread
timenick marked this conversation as resolved.
value, value_type = winreg.QueryValueEx(key, name)
except FileNotFoundError:
return None
if value_type != winreg.REG_SZ:
return None
return value # already str for REG_SZ


def write_key(name: str, value: str) -> None:
"""Persist ``value`` as a ``REG_SZ`` under ``name``."""
import winreg

with winreg.CreateKeyEx(winreg.HKEY_CURRENT_USER, _REGISTRY_KEY) as key:
winreg.SetValueEx(key, name, 0, winreg.REG_SZ, value)


def delete_key(name: str) -> None:
"""Remove the stored value for ``name``. Idempotent."""
import winreg

try:
with winreg.OpenKey(
winreg.HKEY_CURRENT_USER, _REGISTRY_KEY, 0, winreg.KEY_SET_VALUE
) as key:
winreg.DeleteValue(key, name)
except FileNotFoundError:
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
# Value or key already absent — delete is idempotent.
return
66 changes: 66 additions & 0 deletions src/winml/modelkit/telemetry/deviceid/deviceid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

"""Stable-per-device ID used as the telemetry device_id.

Derived from a random UUID4, hashed with SHA256, and persisted so the same
machine reports the same id across sessions. Users can reset by removing the
stored value (registry on Windows, state file elsewhere).
"""

from __future__ import annotations

import hashlib
import logging
import uuid
from enum import Enum

from . import _store


_LOGGER = logging.getLogger(__name__)
_STORAGE_KEY = "deviceid"


class IdStatus(str, Enum):
"""Outcome of :func:`get_or_create_device_id`.

Subclassing ``str`` keeps the enum serialization-compatible with
OpenTelemetry resource attributes (which require str values) and
with the CS 4.0 ``ext.device.authId`` slot on the wire.
"""

EXISTING = "EXISTING"
NEW = "NEW"
FAILED = "FAILED"


def get_or_create_device_id() -> tuple[str, IdStatus]:
"""Return ``(device_id, status)``.

- :attr:`IdStatus.EXISTING`: read from persistent storage
- :attr:`IdStatus.NEW`: freshly generated and persisted
- :attr:`IdStatus.FAILED`: storage unavailable; caller should proceed with empty id
"""
try:
existing = _store.read_key(_STORAGE_KEY)
except Exception: # defensive: any storage error means we treat as fresh
_LOGGER.debug("deviceid read failed", exc_info=True)
existing = None

if existing:
return existing, IdStatus.EXISTING

new_id = _hash_uuid(uuid.uuid4())
try:
_store.write_key(_STORAGE_KEY, new_id)
except Exception:
_LOGGER.debug("deviceid write failed", exc_info=True)
return "", IdStatus.FAILED
return new_id, IdStatus.NEW


def _hash_uuid(value: uuid.UUID) -> str:
return hashlib.sha256(str(value).encode("utf-8")).hexdigest()
40 changes: 40 additions & 0 deletions src/winml/modelkit/telemetry/library/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

"""Public surface for the OneCollector client.

Consumers (the rest of the telemetry module) only import from this file.
Everything else in `library/` is private implementation detail.
"""

from __future__ import annotations

from opentelemetry.sdk._logs import LoggerProvider
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.resources import Resource

from .exporter import OneCollectorLogExporter


__all__ = ["DEFAULT_ENDPOINT", "OneCollectorLogExporter", "create_logger_provider"]

DEFAULT_ENDPOINT = "https://mobile.events.data.microsoft.com/OneCollector/1.0/"


def create_logger_provider(
ikey: str,
endpoint: str = DEFAULT_ENDPOINT,
resource: Resource | None = None,
) -> LoggerProvider:
"""Create a LoggerProvider wired to a OneCollectorLogExporter.

The returned provider's `get_logger(name)` produces loggers whose
`.emit(LogRecord(...))` calls are batched and sent to the OneCollector
endpoint. Shutdown the provider to flush pending events.
"""
provider = LoggerProvider(resource=resource or Resource.get_empty())
exporter = OneCollectorLogExporter(ikey=ikey, endpoint=endpoint)
provider.add_log_record_processor(BatchLogRecordProcessor(exporter))
return provider
174 changes: 174 additions & 0 deletions src/winml/modelkit/telemetry/library/exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

"""OneCollector log exporter: serializes OTel log records as CS 4.0 envelopes and POSTs them."""

from __future__ import annotations

import logging
from datetime import datetime, timezone
from typing import TYPE_CHECKING

import requests
from opentelemetry.sdk._logs.export import LogRecordExporter, LogRecordExportResult

from .serialization import _build_envelope, _serialize_batch


if TYPE_CHECKING:
from collections.abc import Sequence

from opentelemetry.sdk._logs import ReadableLogRecord

_LOGGER = logging.getLogger(__name__)

_HTTP_TIMEOUT = 10.0


class OneCollectorLogExporter(LogRecordExporter):
"""Post Common Schema 4.0 event envelopes to the OneCollector endpoint."""

def __init__(self, ikey: str, endpoint: str) -> None:
# Fail loudly rather than silently POST ``{"iKey": ""}`` to the
# endpoint. In dev installs ``constants.INSTRUMENTATION_KEY`` is
# empty; the Telemetry singleton guards against that, and this
# second guard keeps the invariant a property of the library
# itself (defense in depth).
if not ikey:
raise ValueError("ikey must be non-empty")
if not endpoint:
raise ValueError("endpoint must be non-empty")
self._ikey = ikey
self._endpoint = endpoint
# _shutdown is read on the BatchLogRecordProcessor export thread and
# written on the shutdown thread; bool assignment is atomic under the
# CPython GIL, so no lock is needed.
self._shutdown = False
# Close the session if post-creation setup fails, so we never leak a
# Session object (and its connection pool) when init raises.
session = requests.Session()
try:
session.headers.update(
{
"Content-Type": "application/json; charset=utf-8",
}
)
except Exception:
session.close()
raise
self._session = session

def export(self, batch: Sequence[ReadableLogRecord]) -> LogRecordExportResult:
"""Serialize *batch* and POST to the configured OneCollector endpoint.

Retries are **not** implemented at this layer; the upstream
``BatchLogRecordProcessor`` re-queues batches for which ``export``
returns ``FAILURE``.
"""
if self._shutdown or not batch:
return LogRecordExportResult.SUCCESS

try:
envelopes = [self._to_envelope(ld) for ld in batch]
body = _serialize_batch(envelopes)
except Exception:
_LOGGER.debug("telemetry serialization failed", exc_info=True)
return LogRecordExportResult.FAILURE

try:
response = self._session.post(
self._endpoint,
data=body,
timeout=_HTTP_TIMEOUT,
)
except (requests.ConnectionError, requests.Timeout):
_LOGGER.debug("telemetry network failure", exc_info=True)
return LogRecordExportResult.FAILURE

if 200 <= response.status_code < 300:
return LogRecordExportResult.SUCCESS
_LOGGER.debug("telemetry backend returned %s", response.status_code)
return LogRecordExportResult.FAILURE

def force_flush(self, timeout_millis: int = 30_000) -> bool:
"""No-op: all exports are synchronous."""
return True

def shutdown(self) -> None:
"""Mark exporter as shut down and close the underlying HTTP session."""
self._shutdown = True
try:
self._session.close()
except Exception:
_LOGGER.debug("session close failed", exc_info=True)

# --- internal ---

def _to_envelope(self, ld: ReadableLogRecord) -> dict:
record = ld.log_record
timestamp = _ns_to_datetime(record.timestamp)
data = dict(record.attributes or {})
ext = _resource_to_ext(ld.resource)
return _build_envelope(
name=str(record.body),
ikey=self._ikey,
timestamp=timestamp,
data=data,
ext=ext,
)


def _ns_to_datetime(ts_ns: int) -> datetime:
return datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=timezone.utc)


def _resource_to_ext(resource) -> dict:
"""Translate OpenTelemetry Resource attributes to CS 4.0 ext.* slots.

Attribute name → CS slot mapping:
device_id → ext.device.localId
id_status → ext.device.authId
os.arch → ext.device.deviceClass
os.name → ext.os.name
os.version → ext.os.ver
os.release → ext.os.release
app_version → ext.app.ver
app_instance_id → ext.app.sesId
initTs → ext.app.initTs
"""
if resource is None:
return {}
attrs = dict(resource.attributes or {})
ext: dict[str, dict] = {}
device: dict = {}
os_: dict = {}
app: dict = {}

if "device_id" in attrs:
device["localId"] = attrs["device_id"]
if "id_status" in attrs:
device["authId"] = attrs["id_status"]
if "os.arch" in attrs:
device["deviceClass"] = attrs["os.arch"]
if "os.name" in attrs:
os_["name"] = attrs["os.name"]
if "os.version" in attrs:
os_["ver"] = attrs["os.version"]
if "os.release" in attrs:
os_["release"] = attrs["os.release"]
if "app_version" in attrs:
app["ver"] = attrs["app_version"]
if "app_instance_id" in attrs:
app["sesId"] = attrs["app_instance_id"]
if "initTs" in attrs:
app["initTs"] = attrs["initTs"]

if device:
ext["device"] = device
if os_:
ext["os"] = os_
if app:
ext["app"] = app
return ext
Loading
Loading