Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add tracing with OpenTelemetry #479

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions scripts/run_local_otel_collector.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -x
set -eo pipefail

# If an OpenTelemetry Collector container is already running, kill it.
RUNNING_OTEL_COLLECTOR_CONTAINER=$(docker ps --filter 'name=otel_collector' --format '{{.ID}}')
if [[ -n ${RUNNING_OTEL_COLLECTOR_CONTAINER} ]]; then
docker kill "${RUNNING_OTEL_COLLECTOR_CONTAINER}"
fi

# Run the OpenTelemetry Collector in a new container.
# The `--mount` and `--env` options mount the local Default Application Credentials fetched after
# `gcloud auth login --update-adc` to ensure the OpenTelemetry Collector is able to authenticate to
# GCP from inside the Docker container. The `--user` option overrides the ID of the default user
# running the processes inside the container, to ensure `/etc/gcp/creds.json` is readable. Otherwise
# the default user of the Docker almost certainly has a different ID than the local user on the
# Docker host, and `~/.config/gcloud/application_default_credentials.json` can only be read by the
# current local user (600 permissions = rw-------). Finally, the `OTEL_BACKEND_PROJECT_ID` environment
# variable, used in `config.yaml`, tells the OpenTelemetry Collector which GCP project to export to.
# Define it prior to running this script, for example in `.env`, next to the other variables.
# Published ports:
# - 4317 = gRPC
# - 4318 = HTTP
docker run \
--mount type=bind,source=${HOME}/.config/gcloud/application_default_credentials.json,target=/etc/gcp/creds.json,readonly \
--env GOOGLE_APPLICATION_CREDENTIALS=/etc/gcp/creds.json \
--user $(id --user) \
--mount type=bind,source=$(pwd)/traces.yaml,target=/etc/otelcol-contrib/config.yaml,readonly \
--env OTEL_BACKEND_PROJECT_ID \
--publish 4317:4317 \
--publish 4318:4318 \
--name "otel_collector_$(date '+%s')" \
--detach \
otel/opentelemetry-collector-contrib

# Configure the SLO Generator to send traces to the OpenTelemetry Collector...
export SEND_TRACES_TO_OTLP_EXPORTER=1

# ... and tail the OpenTelemetry Collector logs.
docker logs --follow $(docker ps --filter 'name=otel_collector' --format '{{.ID}}')
26 changes: 26 additions & 0 deletions scripts/traces.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
receivers:
otlp:
protocols:
grpc:
http:

processors:
resourcedetection:
detectors: [env, gcp]
timeout: 2s
override: false

exporters:
googlecloud:
project: ${env:OTEL_BACKEND_PROJECT_ID}

extensions:
health_check:

service:
extensions: [health_check]
pipelines:
traces:
receivers: [otlp]
processors: [resourcedetection]
exporters: [googlecloud]
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ install_requires =
click
setuptools >=65.5.1 # https://pyup.io/v/52495/f17 (reported by `safety check`)
certifi >=2023.07.22 # avoid CVE-2023-37920 (reported by `safety check`)
opentelemetry-api==1.24.0 # required to initialize OpenTelemetry
opentelemetry-sdk==1.24.0 # required to instrument the code
opentelemetry-exporter-otlp==1.24.0 # required to export to OpenTelemetry Collectors

[options.packages.find]
exclude =
Expand Down
6 changes: 6 additions & 0 deletions slo_generator/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import requests
from flask import jsonify, make_response
from opentelemetry import trace

from slo_generator.compute import compute, export
from slo_generator.utils import get_exporters, load_config, setup_logging
Expand All @@ -36,7 +37,10 @@
API_SIGNATURE_TYPE = os.environ["GOOGLE_FUNCTION_SIGNATURE_TYPE"]
setup_logging()

tracer = trace.get_tracer(__name__)


@tracer.start_as_current_span("run_compute")
def run_compute(request):
"""Run slo-generator compute function. Can be configured to export data as
well, using the `exporters` key of the SLO config.
Expand Down Expand Up @@ -79,6 +83,7 @@ def run_compute(request):
return reports


@tracer.start_as_current_span("run_export")
def run_export(request):
"""Run slo-generator export function. Get the SLO report data from a request
object.
Expand Down Expand Up @@ -142,6 +147,7 @@ def run_export(request):
return errors


@tracer.start_as_current_span("process_req")
def process_req(request):
"""Process incoming request.

Expand Down
30 changes: 27 additions & 3 deletions slo_generator/backends/cloud_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@
from collections import OrderedDict

from google.cloud import monitoring_v3
from opentelemetry import trace

from slo_generator.constants import NO_DATA

LOGGER = logging.getLogger(__name__)

tracer = trace.get_tracer(__name__)


class CloudMonitoringBackend:
"""Backend for querying metrics from Cloud Monitoring.
Expand All @@ -38,12 +41,15 @@ class CloudMonitoringBackend:
if omitted.
"""

@tracer.start_as_current_span("CloudMonitoringBackend")
def __init__(self, project_id, client=None):
self.client = client
if client is None:
self.client = monitoring_v3.MetricServiceClient()
with tracer.start_as_current_span("Instantiate client"):
self.client = client
if client is None:
self.client = monitoring_v3.MetricServiceClient()
self.parent = self.client.common_project_path(project_id)

@tracer.start_as_current_span("good_bad_ratio")
def good_bad_ratio(self, timestamp, window, slo_config):
"""Query two timeseries, one containing 'good' events, one containing
'bad' events.
Expand Down Expand Up @@ -94,8 +100,17 @@ def good_bad_ratio(self, timestamp, window, slo_config):
f"Good events: {good_event_count} | " f"Bad events: {bad_event_count}"
)

trace.get_current_span().add_event(
"count",
{
"good": good_event_count,
"bad": bad_event_count,
},
)

return good_event_count, bad_event_count

@tracer.start_as_current_span("distribution_cut")
def distribution_cut(self, timestamp, window, slo_config):
"""Query one timeseries of type 'exponential'.

Expand Down Expand Up @@ -158,8 +173,16 @@ def distribution_cut(self, timestamp, window, slo_config):
good_event_count = upper_events_count
bad_event_count = lower_events_count

trace.get_current_span().add_event(
"count",
{
"good": good_event_count,
"bad": bad_event_count,
},
)
return good_event_count, bad_event_count

@tracer.start_as_current_span("exponential_distribution_cut")
def exponential_distribution_cut(self, *args, **kwargs):
"""Alias for `distribution_cut` method to allow for backwards
compatibility.
Expand All @@ -172,6 +195,7 @@ def exponential_distribution_cut(self, *args, **kwargs):
)
return self.distribution_cut(*args, **kwargs)

@tracer.start_as_current_span("query")
def query( # noqa: PLR0913
self,
timestamp,
Expand Down
28 changes: 27 additions & 1 deletion slo_generator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,43 @@
from pathlib import Path

import click
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from pkg_resources import get_distribution

from slo_generator import utils
from slo_generator.compute import compute as _compute
from slo_generator.constants import LATEST_MAJOR_VERSION
from slo_generator.constants import LATEST_MAJOR_VERSION, SEND_TRACES_TO_OTLP_EXPORTER
from slo_generator.migrations import migrator

sys.path.append(os.getcwd()) # dynamic backend loading

# logging.basicConfig(level=logging.INFO)
LOGGER = logging.getLogger(__name__)

# Set up Tracing with OpenTelemetry.
# TODO Refactor to `utils.setup_tracing()`, similar to `utils.setup_logging()`?
# ---
# Set the resource name that will show up in the traces.
resource = Resource(
attributes={
"service.name": "slo-generator",
}
)
# Create a new tracer provider.
provider = TracerProvider(resource=resource)
# Batch ended spans and push them to the OTLP exporter if requested by the user.
if SEND_TRACES_TO_OTLP_EXPORTER:
processor = BatchSpanProcessor(
OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
)
provider.add_span_processor(processor)
# Set the global default tracer provider.
trace.set_tracer_provider(provider)


@click.group(invoke_without_command=True)
@click.option(
Expand Down
6 changes: 6 additions & 0 deletions slo_generator/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,18 @@
import time
from typing import Optional

from opentelemetry import trace

from slo_generator import constants, utils
from slo_generator.migrations.migrator import report_v2tov1
from slo_generator.report import SLOReport

LOGGER = logging.getLogger(__name__)

tracer = trace.get_tracer(__name__)


@tracer.start_as_current_span("compute")
def compute( # noqa: PLR0913
slo_config: dict,
config: dict,
Expand Down Expand Up @@ -95,6 +100,7 @@ def compute( # noqa: PLR0913
return reports


@tracer.start_as_current_span("export")
def export(data: dict, exporters: list, raise_on_error: bool = False) -> list:
"""Export data using selected exporters.

Expand Down
3 changes: 3 additions & 0 deletions slo_generator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
COLORED_OUTPUT: int = int(os.environ.get("COLORED_OUTPUT", "0"))
DRY_RUN: bool = bool(int(os.environ.get("DRY_RUN", "0")))
DEBUG: int = int(os.environ.get("DEBUG", "0"))
SEND_TRACES_TO_OTLP_EXPORTER: bool = bool(
int(os.environ.get("SEND_TRACES_TO_OTLP_EXPORTER", "0"))
)

# Exporters supporting v2 SLO report format
V2_EXPORTERS: tuple[str, ...] = ("Pubsub", "Cloudevent")
Expand Down
7 changes: 7 additions & 0 deletions slo_generator/exporters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@
import warnings
from abc import ABCMeta, abstractmethod

from opentelemetry import trace

LOGGER = logging.getLogger(__name__)

tracer = trace.get_tracer(__name__)

# Default metric labels exported by all metrics exporters
DEFAULT_METRIC_LABELS = [
"error_budget_policy_step_name",
Expand Down Expand Up @@ -67,6 +71,7 @@ class MetricsExporter: # pytype: disable=ignored-metaclass

__metaclass__ = ABCMeta # pytype: disable=ignored-metaclass

@tracer.start_as_current_span("export")
def export(self, data, **config):
"""Export metric data. Loops through metrics config and calls the child
class `export_metric` method.
Expand Down Expand Up @@ -104,6 +109,7 @@ class `export_metric` method.
metric = self.build_metric(data, metric)
self.export_metric(metric)

@tracer.start_as_current_span("build_metric")
def build_metric(self, data, metric):
"""Build a metric from current data and metric configuration.
Set the metric value labels and eventual alias.
Expand Down Expand Up @@ -140,6 +146,7 @@ def build_metric(self, data, metric):
metric["description"] = metric.get("description", "")
return metric

@tracer.start_as_current_span("build_data_labels")
@staticmethod
def build_data_labels(data, labels):
"""Build data labels. Also handle nested labels (depth=1).
Expand Down
6 changes: 6 additions & 0 deletions slo_generator/exporters/cloud_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,26 @@
import logging

from google.cloud import monitoring_v3
from opentelemetry import trace

from .base import MetricsExporter

LOGGER = logging.getLogger(__name__)

tracer = trace.get_tracer(__name__)


class CloudMonitoringExporter(MetricsExporter):
"""Cloud Monitoring exporter class."""

METRIC_PREFIX = "custom.googleapis.com/"
REQUIRED_FIELDS = ["project_id"]

@tracer.start_as_current_span("CloudMonitoringExporter")
def __init__(self):
self.client = monitoring_v3.MetricServiceClient()

@tracer.start_as_current_span("export_metric")
def export_metric(self, data: dict):
"""Export metric to Cloud Monitoring.

Expand All @@ -45,6 +50,7 @@ def export_metric(self, data: dict):
"""
self.create_timeseries(data)

@tracer.start_as_current_span("create_timeseries")
def create_timeseries(self, data: dict):
"""Create Cloud Monitoring timeseries.

Expand Down
Loading
Loading