Enabling and Using Metrics Functionality
Introduction
On this page, we will enable and use the Opentelemetry Metrics functionality. Once again, we will use the UIP VSCode Plugin to test our changes, for now.
It is assumed that the Opentelemetry Collector is properly configured to accept metric data and expose as a Prometheus endpoint.
Step 1 - Enabling Metrics
Go ahead and open configurations.yml
and edit the properties
block as shown:
properties: agent: log_level: Info netname: UIP-DBG-01 otel: enable_tracing: true export_metrics: true trace_endpoint: http://192.168.56.11:4318 metrics_endpoint: http://192.168.56.11:4318 service_name: vscode-uip-debugger uip_service_name: uip/${extension_name} api: extension_start: - name: es1 log_level: Inherited runtime_dir: /home/shrey/dev/extensions/test/OtelDemoTest fields: src_folder: /tmp/test_src dst_folder: /tmp/test_dst file_type: - txt
The export_metrics
property was set to true
and the metrics_endpoint
was changed to the Opentelemetry Collector URL (it will need to be changed according to your setup).
Enabling in UA
Similar properties exist in uags.conf
and omss.conf
that can be used to enable tracing in the Agent. See OTEL_EXPORT_METRICS - UAG configuration option and OTEL_EXPORT_METRICS - OMS configuration option.
Step 2 – Adding Custom Metrics
Let's add some custom metrics. Go ahead and update extension.py
as follows:
from __future__ import print_function from universal_extension import UniversalExtension from universal_extension import ExtensionResult from universal_extension import ui from universal_extension import logger from universal_extension import utility from universal_extension import otel import time import shutil import os import random import json if otel.is_compatible: from opentelemetry import trace from opentelemetry import metrics from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics.view import ( ExplicitBucketHistogramAggregation, View ) from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.exporter.otlp.proto.http.metric_exporter import ( OTLPMetricExporter, ) class Extension(UniversalExtension): def __init__(self): """Initializes an instance of the 'Extension' class""" # Call the base class initializer super(Extension, self).__init__() self.stop = False self.setup_tracer() self.setup_metrics() def setup_tracer(self): if otel.is_compatible: self.tracer = trace.get_tracer(__name__) else: self.tracer = utility.NoOp() def setup_metrics(self): if otel.is_compatible: self.meter = metrics.get_meter(__name__) else: self.meter = utility.NoOp() self.num_files_transferred_cntr = self.meter.create_counter( name="num.files.transferred", description="Number of files transferred", ) self.file_transfer_duration = self.meter.create_histogram( name="file.transfer.duration", description="How long the file took to transfer", unit="s", ) @classmethod def extension_new(cls, fields): if not otel.is_compatible: return cls.ExtensionConfig() return cls.ExtensionConfig( meter_provider=MeterProvider( views=[ View( instrument_name="file.transfer.duration", aggregation=ExplicitBucketHistogramAggregation( (0, 0.5, 1, 1.5, 2, 10) ), ) ], metric_readers=[ PeriodicExportingMetricReader( OTLPMetricExporter(), export_interval_millis=1000 ) ], ) ) def transfer_file(self, src_path, dst_path, span): start_time = time.time() span.set_attributes({"src_file": src_path, "dst_folder": dst_path}) # Ensure destination directory exits if not os.path.exists(dst_path): raise FileNotFoundError( "Destination directory ({0}) does not exist".format(dst_path) ) # Ensure the source file is not already present in the destination # directory (unless overwrite is selected) if os.path.exists(os.path.join(dst_path, os.path.basename(src_path))): logger.info( "'{0}' already exists in '{1}'".format( os.path.basename(src_path), dst_path ) ) if otel.is_compatible: span.set_status( trace.Status( status_code=trace.StatusCode.ERROR, description="'{0}' already exists in '{1}'".format( os.path.basename(src_path), dst_path ), ) ) return False shutil.copy(src_path, dst_path) time.sleep(random.uniform(0, 2)) self.num_files_transferred_cntr.add( 1, {"file_type": os.path.splitext(src_path)[1]} ) self.file_transfer_duration.record(time.time() - start_time) return True def extension_start(self, fields): """Required method that serves as the starting point for work performed for a task instance. Parameters ---------- fields : dict populated with field values from the associated task instance launched in the Controller Returns ------- ExtensionResult once the work is done, an instance of ExtensionResult must be returned. See the documentation for a full list of parameters that can be passed to the ExtensionResult class constructor """ files_transferred = [] src = fields["src_folder"] dst = fields["dst_folder"] file_types = [ ft.lower() if ft.startswith(".") else "." + ft.lower() for ft in fields["file_type"] ] if not os.path.exists(src): raise FileNotFoundError("'{0}' does not exist".format(src)) all_file_list = os.listdir(src) # filter the files file_list = [] for f in all_file_list: file_path = os.path.join(src, f) file_type = os.path.splitext(file_path)[1] if os.path.isfile(file_path) and file_type in file_types: file_list.append(file_path) logger.info( "Found {0} files that can be transferred".format(len(file_list)) ) for f in file_list: if self.stop: break span_ctx = ( utility.noop_context() if not otel.is_compatible else self.tracer.start_as_current_span("transferring file") ) with span_ctx as span: if self.transfer_file(f, dst, span): files_transferred.append(f) ui.update_progress( int(len(files_transferred) / len(file_list) * 100) ) logger.info("Transferred '{0}' to '{1}'".format(f, dst)) return ExtensionResult( rc=0 if len(file_list) - len(files_transferred) == 0 else 1, unv_output="The following files were transferred: \n {0}".format( json.dumps(files_transferred) ), message="{0} files found and {1} files transferred".format( len(file_list), len(files_transferred) ), ) def extension_cancel(self): self.stop = True
- Lines 18-27 import all the necessary metrics-related modules from the Opentelemetry library.
- Line 46-60 creates a method called
setup_metrics
()
which set up the meter and uses it to create two metrics. The first metric will be used to count the number of files transferred, and the second to capture the transfer duration. - Line 62-83 implements the new
extension_new()
method introduced in API Level 1.5.0. It allows developers to control the initialization of theMeterProvider
andTracerProvider
(see UniversalExtension Class (1.5.0) for details). Withinextension_new()
, we customize theMeterProvider
to change the bucket boundaries to the transfer duration histogram; the default boundaries are not suitable for our data. Additionally, we modify the export interval from the default 60 seconds to 1 second. - Lines 85-123 update the metrics:
- Line 86 captures the start time at the beginning of the method
- Lines 118-120 update the
num.files.transferred
counter. - Line 121 records how long it took to transfer the file.
Step 3 - Verifying Metrics
Now, let's verify our changes. Go ahead and delete all the files inside /tmp/test_dst
. Once deleted, press F5
to start the debugging session. Upon completion, navigate to the Prometheus endpoint (http://192.168.56.11:8000/metrics
in my case – this will vary) and you should see:
# HELP file_transfer_duration_seconds How long the file took to transfer # TYPE file_transfer_duration_seconds histogram file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="0"} 0 file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="0.5"} 1 file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="1"} 3 file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="1.5"} 3 file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="2"} 4 file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="10"} 4 file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="+Inf"} 4 file_transfer_duration_seconds_sum{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 3.421269655227661 file_transfer_duration_seconds_count{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 4 # HELP num_files_transferred_total Number of files transferred # TYPE num_files_transferred_total counter num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".json",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 1 num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".txt",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 2 num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".zip",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 1
As you can see, a total of 4 files were transferred, 2 of them were .txt
, 1 was .json
and other was .zip
. Additionally, we can see the transfer duration. Tools like Grafana can be used to visualize the metrics in a meaningful manner.