Enabling and Using Metrics Functionality
Introduction
On this page, we will enable and use the Opentelemetry Metrics functionality. Once again, we will use the UIP VSCode Plugin to test our changes, for now.
It is assumed that the Opentelemetry Collector is properly configured to accept metric data and expose as a Prometheus endpoint.
Step 1 - Enabling Metrics
Go ahead and open configurations.yml and edit the properties block as shown:
properties:
agent:
log_level: Info
netname: UIP-DBG-01
otel:
enable_tracing: true
export_metrics: true
trace_endpoint: http://192.168.56.11:4318
metrics_endpoint: http://192.168.56.11:4318
service_name: vscode-uip-debugger
uip_service_name: uip/${extension_name}
api:
extension_start:
- name: es1
log_level: Inherited
runtime_dir: /home/shrey/dev/extensions/test/OtelDemoTest
fields:
src_folder: /tmp/test_src
dst_folder: /tmp/test_dst
file_type:
- txt
The export_metrics property was set to true and the metrics_endpoint was changed to the Opentelemetry Collector URL (it will need to be changed according to your setup).
Enabling in UA
Similar properties exist in uags.conf and omss.conf that can be used to enable tracing in the Agent. See OTEL_EXPORT_METRICS - UAG configuration option and OTEL_EXPORT_METRICS - OMS configuration option.
Step 2 – Adding Custom Metrics
Let's add some custom metrics. Go ahead and update extension.py as follows:
from __future__ import print_function
from universal_extension import UniversalExtension
from universal_extension import ExtensionResult
from universal_extension import ui
from universal_extension import logger
from universal_extension import utility
from universal_extension import otel
import time
import shutil
import os
import random
import json
if otel.is_compatible:
from opentelemetry import trace
from opentelemetry import metrics
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.view import (
ExplicitBucketHistogramAggregation,
View
)
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
OTLPMetricExporter,
)
class Extension(UniversalExtension):
def __init__(self):
"""Initializes an instance of the 'Extension' class"""
# Call the base class initializer
super(Extension, self).__init__()
self.stop = False
self.setup_tracer()
self.setup_metrics()
def setup_tracer(self):
if otel.is_compatible:
self.tracer = trace.get_tracer(__name__)
else:
self.tracer = utility.NoOp()
def setup_metrics(self):
if otel.is_compatible:
self.meter = metrics.get_meter(__name__)
else:
self.meter = utility.NoOp()
self.num_files_transferred_cntr = self.meter.create_counter(
name="num.files.transferred",
description="Number of files transferred",
)
self.file_transfer_duration = self.meter.create_histogram(
name="file.transfer.duration",
description="How long the file took to transfer",
unit="s",
)
@classmethod
def extension_new(cls, fields):
if not otel.is_compatible:
return cls.ExtensionConfig()
return cls.ExtensionConfig(
meter_provider=MeterProvider(
views=[
View(
instrument_name="file.transfer.duration",
aggregation=ExplicitBucketHistogramAggregation(
(0, 0.5, 1, 1.5, 2, 10)
),
)
],
metric_readers=[
PeriodicExportingMetricReader(
OTLPMetricExporter(), export_interval_millis=1000
)
],
)
)
def transfer_file(self, src_path, dst_path, span):
start_time = time.time()
span.set_attributes({"src_file": src_path, "dst_folder": dst_path})
# Ensure destination directory exits
if not os.path.exists(dst_path):
raise FileNotFoundError(
"Destination directory ({0}) does not exist".format(dst_path)
)
# Ensure the source file is not already present in the destination
# directory (unless overwrite is selected)
if os.path.exists(os.path.join(dst_path, os.path.basename(src_path))):
logger.info(
"'{0}' already exists in '{1}'".format(
os.path.basename(src_path), dst_path
)
)
if otel.is_compatible:
span.set_status(
trace.Status(
status_code=trace.StatusCode.ERROR,
description="'{0}' already exists in '{1}'".format(
os.path.basename(src_path), dst_path
),
)
)
return False
shutil.copy(src_path, dst_path)
time.sleep(random.uniform(0, 2))
self.num_files_transferred_cntr.add(
1, {"file_type": os.path.splitext(src_path)[1]}
)
self.file_transfer_duration.record(time.time() - start_time)
return True
def extension_start(self, fields):
"""Required method that serves as the starting point for work performed
for a task instance.
Parameters
----------
fields : dict
populated with field values from the associated task instance
launched in the Controller
Returns
-------
ExtensionResult
once the work is done, an instance of ExtensionResult must be
returned. See the documentation for a full list of parameters that
can be passed to the ExtensionResult class constructor
"""
files_transferred = []
src = fields["src_folder"]
dst = fields["dst_folder"]
file_types = [
ft.lower() if ft.startswith(".") else "." + ft.lower()
for ft in fields["file_type"]
]
if not os.path.exists(src):
raise FileNotFoundError("'{0}' does not exist".format(src))
all_file_list = os.listdir(src)
# filter the files
file_list = []
for f in all_file_list:
file_path = os.path.join(src, f)
file_type = os.path.splitext(file_path)[1]
if os.path.isfile(file_path) and file_type in file_types:
file_list.append(file_path)
logger.info(
"Found {0} files that can be transferred".format(len(file_list))
)
for f in file_list:
if self.stop:
break
span_ctx = (
utility.noop_context()
if not otel.is_compatible
else self.tracer.start_as_current_span("transferring file")
)
with span_ctx as span:
if self.transfer_file(f, dst, span):
files_transferred.append(f)
ui.update_progress(
int(len(files_transferred) / len(file_list) * 100)
)
logger.info("Transferred '{0}' to '{1}'".format(f, dst))
return ExtensionResult(
rc=0 if len(file_list) - len(files_transferred) == 0 else 1,
unv_output="The following files were transferred: \n {0}".format(
json.dumps(files_transferred)
),
message="{0} files found and {1} files transferred".format(
len(file_list), len(files_transferred)
),
)
def extension_cancel(self):
self.stop = True
- Lines 18-27 import all the necessary metrics-related modules from the Opentelemetry library.
- Line 46-60 creates a method called
setup_metrics()which set up the meter and uses it to create two metrics. The first metric will be used to count the number of files transferred, and the second to capture the transfer duration. - Line 62-83 implements the new
extension_new()method introduced in API Level 1.5.0. It allows developers to control the initialization of theMeterProviderandTracerProvider(see UniversalExtension Class (1.5.0) for details). Withinextension_new(), we customize theMeterProviderto change the bucket boundaries to the transfer duration histogram; the default boundaries are not suitable for our data. Additionally, we modify the export interval from the default 60 seconds to 1 second. - Lines 85-123 update the metrics:
- Line 86 captures the start time at the beginning of the method
- Lines 118-120 update the
num.files.transferredcounter. - Line 121 records how long it took to transfer the file.
Step 3 - Verifying Metrics
Now, let's verify our changes. Go ahead and delete all the files inside /tmp/test_dst. Once deleted, press F5 to start the debugging session. Upon completion, navigate to the Prometheus endpoint (http://192.168.56.11:8000/metrics in my case – this will vary) and you should see:
# HELP file_transfer_duration_seconds How long the file took to transfer
# TYPE file_transfer_duration_seconds histogram
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="0"} 0
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="0.5"} 1
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="1"} 3
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="1.5"} 3
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="2"} 4
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="10"} 4
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="+Inf"} 4
file_transfer_duration_seconds_sum{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 3.421269655227661
file_transfer_duration_seconds_count{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 4
# HELP num_files_transferred_total Number of files transferred
# TYPE num_files_transferred_total counter
num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".json",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 1
num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".txt",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 2
num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".zip",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 1
As you can see, a total of 4 files were transferred, 2 of them were .txt, 1 was .json and other was .zip. Additionally, we can see the transfer duration. Tools like Grafana can be used to visualize the metrics in a meaningful manner.