diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py new file mode 100644 index 0000000000..978a1b42db --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py @@ -0,0 +1,247 @@ +import logging +import time + +from opentelemetry import context as context_api +from opentelemetry.instrumentation.openai.shared import ( + _set_client_attributes, + _set_request_attributes, + _set_response_attributes, + _set_span_attribute, + metric_shared_attributes, + model_as_dict, +) +from opentelemetry.instrumentation.openai.utils import ( + _with_audio_telemetry_wrapper, + dont_throw, + is_openai_v1, + start_as_current_span_async, +) +from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.metrics import Counter, Histogram +from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE +from opentelemetry.semconv_ai import ( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, + SpanAttributes, +) +from opentelemetry.trace import SpanKind, Status, StatusCode + +SPAN_NAME = "openai.audio.transcriptions" + +logger = logging.getLogger(__name__) + + +def _get_audio_duration(file): + """ + Extract audio duration from file object. + Returns duration in seconds, or None if unable to determine. + """ + try: + # Try to get duration from common audio libraries + # First check if it's a file-like object with a name attribute + if hasattr(file, "name"): + file_path = file.name + elif isinstance(file, (str, bytes)): + # If it's a path string or bytes + return None + else: + # If it's a file-like object without name, we can't easily determine duration + return None + + # Try mutagen (supports many formats) + try: + from mutagen import File as MutagenFile + + audio = MutagenFile(file_path) + if audio and hasattr(audio.info, "length"): + return audio.info.length + except (ImportError, Exception): + pass + + except Exception as e: + logger.debug(f"Unable to extract audio duration: {e}") + + return None + + +@_with_audio_telemetry_wrapper +def transcription_wrapper( + tracer, + duration_histogram: Histogram, + exception_counter: Counter, + wrapped, + instance, + args, + kwargs, +): + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY + ): + return wrapped(*args, **kwargs) + + with tracer.start_as_current_span( + name=SPAN_NAME, + kind=SpanKind.CLIENT, + ) as span: + _handle_request(span, kwargs, instance) + + try: + # record time for duration + start_time = time.time() + response = wrapped(*args, **kwargs) + end_time = time.time() + except Exception as e: # pylint: disable=broad-except + end_time = time.time() + duration = end_time - start_time if "start_time" in locals() else 0 + attributes = { + "error.type": e.__class__.__name__, + } + + # if there are legal duration, record it + if duration > 0 and duration_histogram: + duration_histogram.record(duration, attributes=attributes) + if exception_counter: + exception_counter.add(1, attributes=attributes) + + span.set_attribute(ERROR_TYPE, e.__class__.__name__) + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + span.end() + + raise + + duration = end_time - start_time + + _handle_response( + response, + span, + instance, + duration_histogram, + duration, + ) + + return response + + +@_with_audio_telemetry_wrapper +async def atranscription_wrapper( + tracer, + duration_histogram: Histogram, + exception_counter: Counter, + wrapped, + instance, + args, + kwargs, +): + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY + ): + return await wrapped(*args, **kwargs) + + async with start_as_current_span_async( + tracer=tracer, + name=SPAN_NAME, + kind=SpanKind.CLIENT, + ) as span: + _handle_request(span, kwargs, instance) + + try: + # record time for duration + start_time = time.time() + response = await wrapped(*args, **kwargs) + end_time = time.time() + except Exception as e: # pylint: disable=broad-except + end_time = time.time() + duration = end_time - start_time if "start_time" in locals() else 0 + attributes = { + "error.type": e.__class__.__name__, + } + + # if there are legal duration, record it + if duration > 0 and duration_histogram: + duration_histogram.record(duration, attributes=attributes) + if exception_counter: + exception_counter.add(1, attributes=attributes) + + span.set_attribute(ERROR_TYPE, e.__class__.__name__) + span.record_exception(e) + span.set_status(Status(StatusCode.ERROR, str(e))) + span.end() + + raise + + duration = end_time - start_time + + _handle_response( + response, + span, + instance, + duration_histogram, + duration, + ) + + return response + + +@dont_throw +def _handle_request(span, kwargs, instance): + _set_request_attributes(span, kwargs, instance) + _set_client_attributes(span, instance) + + # Extract and set audio duration + file_param = kwargs.get("file") + if file_param: + audio_duration = _get_audio_duration(file_param) + if audio_duration is not None: + # _set_span_attribute( + # span, SpanAttributes.LLM_OPENAI_AUDIO_INPUT_DURATION_SECONDS, audio_duration + # ) + # TODO(Ata): come back here later when semconv is published + _set_span_attribute( + span, 'gen_ai.openai.audio.input.duration_seconds', audio_duration + ) + else: + print("REMOVE ME : ATA-DBG : COULD NOT READ AUDIO FILE WITH MUTAGEN") + + +@dont_throw +def _handle_response( + response, + span, + instance=None, + duration_histogram=None, + duration=None, +): + if is_openai_v1(): + response_dict = model_as_dict(response) + else: + response_dict = response + + # metrics record + _set_transcription_metrics( + instance, + duration_histogram, + response_dict, + duration, + ) + + # span attributes + _set_response_attributes(span, response_dict) + + +def _set_transcription_metrics( + instance, + duration_histogram, + response_dict, + duration, +): + from opentelemetry.instrumentation.openai.shared import _get_openai_base_url + + shared_attributes = metric_shared_attributes( + response_model=response_dict.get("model") or None, + operation="audio.transcriptions", + server_address=_get_openai_base_url(instance), + ) + + # duration metrics + if duration and isinstance(duration, (float, int)) and duration_histogram: + duration_histogram.record(duration, attributes=shared_attributes) diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py index 1b16fa3fec..b951f801f6 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/utils.py @@ -83,6 +83,29 @@ def wrapper(wrapped, instance, args, kwargs): return _with_embeddings_telemetry +def _with_audio_telemetry_wrapper(func): + """Wrapper to convert the audio wrapper function into the expected format for wrapt.""" + def _with_audio_telemetry( + tracer, + duration_histogram, + exception_counter, + ): + def wrapper(wrapped, instance, args, kwargs): + return func( + tracer, + duration_histogram, + exception_counter, + wrapped, + instance, + args, + kwargs, + ) + + return wrapper + + return _with_audio_telemetry + + def _with_chat_telemetry_wrapper(func): def _with_chat_telemetry( tracer, diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/__init__.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/__init__.py index 7511a16d6a..e6e9b49bd6 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/__init__.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/__init__.py @@ -18,6 +18,10 @@ from opentelemetry.instrumentation.openai.shared.image_gen_wrappers import ( image_gen_metrics_wrapper, ) +from opentelemetry.instrumentation.openai.shared.audio_wrappers import ( + atranscription_wrapper, + transcription_wrapper, +) from opentelemetry.instrumentation.openai.utils import is_metrics_enabled from opentelemetry.instrumentation.openai.v1.assistant_wrappers import ( assistants_create_wrapper, @@ -247,6 +251,36 @@ def _instrument(self, **kwargs): image_gen_metrics_wrapper(duration_histogram, image_gen_exception_counter), ) + if is_metrics_enabled(): + audio_transcription_exception_counter = meter.create_counter( + # name=Meters.LLM_AUDIO_TRANSCRIPTIONS_EXCEPTIONS, # TODO(Ata): come back here later when semconv is published + name='llm.openai.audio.transcriptions.exceptions', + unit="time", + description="Number of exceptions occurred during audio transcriptions operation", + ) + else: + audio_transcription_exception_counter = None + + wrap_function_wrapper( + "openai.resources.audio.transcriptions", + "Transcriptions.create", + transcription_wrapper( + tracer, + duration_histogram, + audio_transcription_exception_counter, + ), + ) + + wrap_function_wrapper( + "openai.resources.audio.transcriptions", + "AsyncTranscriptions.create", + atranscription_wrapper( + tracer, + duration_histogram, + audio_transcription_exception_counter, + ), + ) + # Beta APIs may not be available consistently in all versions self._try_wrap( "openai.resources.beta.assistants", @@ -338,6 +372,8 @@ def _uninstrument(self, **kwargs): unwrap("openai.resources.completions", "AsyncCompletions.create") unwrap("openai.resources.embeddings", "AsyncEmbeddings.create") unwrap("openai.resources.images", "Images.generate") + unwrap("openai.resources.audio.transcriptions", "Transcriptions.create") + unwrap("openai.resources.audio.transcriptions", "AsyncTranscriptions.create") # Beta APIs may not be available consistently in all versions try: diff --git a/packages/opentelemetry-instrumentation-openai/pyproject.toml b/packages/opentelemetry-instrumentation-openai/pyproject.toml index 3abf68f228..0a3076d05b 100644 --- a/packages/opentelemetry-instrumentation-openai/pyproject.toml +++ b/packages/opentelemetry-instrumentation-openai/pyproject.toml @@ -28,6 +28,7 @@ opentelemetry-api = "^1.38.0" opentelemetry-instrumentation = ">=0.59b0" opentelemetry-semantic-conventions = ">=0.59b0" opentelemetry-semantic-conventions-ai = "^0.4.13" +mutagen = { version = "^1.47.0", optional = true } [tool.poetry.group.dev.dependencies] autopep8 = "^2.2.0" @@ -49,6 +50,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.extras] instruments = ["openai"] +audio = ["mutagen"] [tool.poetry.plugins."opentelemetry_instrumentor"] openai = "opentelemetry.instrumentation.openai:OpenAIInstrumentor" diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_audio_transcription.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_audio_transcription.py new file mode 100644 index 0000000000..bacd82a2bb --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_audio_transcription.py @@ -0,0 +1,60 @@ +import io +import pytest +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv_ai import SpanAttributes + + +@pytest.mark.vcr +def test_audio_transcription(instrument_legacy, span_exporter, openai_client): + # Create a mock audio file (in real test, use VCR cassette) + audio_file = io.BytesIO(b"fake audio content") + audio_file.name = "test_audio.mp3" + + openai_client.audio.transcriptions.create( + file=audio_file, + model="whisper-1", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "openai.audio.transcriptions" + + transcription_span = spans[0] + assert ( + transcription_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] + == "whisper-1" + ) + assert ( + transcription_span.attributes[SpanAttributes.LLM_OPENAI_API_BASE] + == "https://api.openai.com/v1/" + ) + + +@pytest.mark.vcr +async def test_audio_transcription_async( + instrument_legacy, span_exporter, async_openai_client +): + # Create a mock audio file + audio_file = io.BytesIO(b"fake audio content") + audio_file.name = "test_audio.mp3" + + await async_openai_client.audio.transcriptions.create( + file=audio_file, + model="whisper-1", + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "openai.audio.transcriptions" + + transcription_span = spans[0] + assert ( + transcription_span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] + == "whisper-1" + ) + assert ( + transcription_span.attributes[SpanAttributes.LLM_OPENAI_API_BASE] + == "https://api.openai.com/v1/" + )