Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
import logging
import time

from opentelemetry import context as context_api
from opentelemetry.instrumentation.openai.shared import (
_set_client_attributes,
_set_request_attributes,
_set_response_attributes,
_set_span_attribute,
metric_shared_attributes,
model_as_dict,
)
from opentelemetry.instrumentation.openai.utils import (
_with_audio_telemetry_wrapper,
dont_throw,
is_openai_v1,
start_as_current_span_async,
)
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
from opentelemetry.semconv_ai import (
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
SpanAttributes,
)
Comment on lines +22 to +25
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, locate and read the audio_wrappers.py file
find . -type f -name "audio_wrappers.py" | head -20

Repository: traceloop/openllmetry

Length of output: 174


🏁 Script executed:

cat -n ./packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py

Repository: traceloop/openllmetry

Length of output: 8977


Remove unused SpanAttributes import to satisfy Flake8

SpanAttributes is imported but never used in this file—it only appears in commented-out code (line 196), which will trigger a Flake8 F401 violation. Remove it from the import:

 from opentelemetry.semconv_ai import (
     SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
-    SpanAttributes,
 )

Additionally, remove the debug print statement on line 203:

-            print("REMOVE ME : ATA-DBG : COULD NOT READ AUDIO FILE WITH MUTAGEN")

Re-introduce SpanAttributes once the audio duration attribute is published in semconv_ai.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from opentelemetry.semconv_ai import (
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
SpanAttributes,
)
from opentelemetry.semconv_ai import (
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
)
🧰 Tools
🪛 Flake8 (7.3.0)

[error] 22-22: 'opentelemetry.semconv_ai.SpanAttributes' imported but unused

(F401)

🤖 Prompt for AI Agents
In
packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py
around lines 22–25, remove the unused SpanAttributes import from
opentelemetry.semconv_ai to fix the Flake8 F401 violation and delete the debug
print statement at line 203; keep the rest of the imports intact and re-add
SpanAttributes later if/when the audio duration attribute is published in
semconv_ai.

from opentelemetry.trace import SpanKind, Status, StatusCode

SPAN_NAME = "openai.audio.transcriptions"

logger = logging.getLogger(__name__)


def _get_audio_duration(file):
"""
Extract audio duration from file object.
Returns duration in seconds, or None if unable to determine.
"""
try:
# Try to get duration from common audio libraries
# First check if it's a file-like object with a name attribute
if hasattr(file, "name"):
file_path = file.name
elif isinstance(file, (str, bytes)):
# If it's a path string or bytes
return None
else:
# If it's a file-like object without name, we can't easily determine duration
return None

# Try mutagen (supports many formats)
try:
from mutagen import File as MutagenFile

audio = MutagenFile(file_path)
if audio and hasattr(audio.info, "length"):
return audio.info.length
except (ImportError, Exception):
pass

except Exception as e:
logger.debug(f"Unable to extract audio duration: {e}")

return None


@_with_audio_telemetry_wrapper
def transcription_wrapper(
tracer,
duration_histogram: Histogram,
exception_counter: Counter,
wrapped,
instance,
args,
kwargs,
):
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
):
return wrapped(*args, **kwargs)

with tracer.start_as_current_span(
name=SPAN_NAME,
kind=SpanKind.CLIENT,
) as span:
_handle_request(span, kwargs, instance)

try:
# record time for duration
start_time = time.time()
response = wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
duration = end_time - start_time if "start_time" in locals() else 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider initializing start_time before the try block to avoid checking if "start_time" in locals() in the exception handler.

attributes = {
"error.type": e.__class__.__name__,
}

# if there are legal duration, record it
if duration > 0 and duration_histogram:
duration_histogram.record(duration, attributes=attributes)
if exception_counter:
exception_counter.add(1, attributes=attributes)

span.set_attribute(ERROR_TYPE, e.__class__.__name__)
span.record_exception(e)
span.set_status(Status(StatusCode.ERROR, str(e)))
span.end()

raise

duration = end_time - start_time

_handle_response(
response,
span,
instance,
duration_histogram,
duration,
)

return response


@_with_audio_telemetry_wrapper
async def atranscription_wrapper(
tracer,
duration_histogram: Histogram,
exception_counter: Counter,
wrapped,
instance,
args,
kwargs,
):
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
):
return await wrapped(*args, **kwargs)

async with start_as_current_span_async(
tracer=tracer,
name=SPAN_NAME,
kind=SpanKind.CLIENT,
) as span:
_handle_request(span, kwargs, instance)

try:
# record time for duration
start_time = time.time()
response = await wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
duration = end_time - start_time if "start_time" in locals() else 0
attributes = {
"error.type": e.__class__.__name__,
}

# if there are legal duration, record it
if duration > 0 and duration_histogram:
duration_histogram.record(duration, attributes=attributes)
if exception_counter:
exception_counter.add(1, attributes=attributes)

span.set_attribute(ERROR_TYPE, e.__class__.__name__)
span.record_exception(e)
span.set_status(Status(StatusCode.ERROR, str(e)))
span.end()

raise

duration = end_time - start_time

_handle_response(
response,
span,
instance,
duration_histogram,
duration,
)

return response


@dont_throw
def _handle_request(span, kwargs, instance):
_set_request_attributes(span, kwargs, instance)
_set_client_attributes(span, instance)

# Extract and set audio duration
file_param = kwargs.get("file")
if file_param:
audio_duration = _get_audio_duration(file_param)
if audio_duration is not None:
# _set_span_attribute(
# span, SpanAttributes.LLM_OPENAI_AUDIO_INPUT_DURATION_SECONDS, audio_duration
# )
# TODO(Ata): come back here later when semconv is published
_set_span_attribute(
span, 'gen_ai.openai.audio.input.duration_seconds', audio_duration
)
else:
print("REMOVE ME : ATA-DBG : COULD NOT READ AUDIO FILE WITH MUTAGEN")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the debug print statement and use the logger at an appropriate level instead.

Suggested change
print("REMOVE ME : ATA-DBG : COULD NOT READ AUDIO FILE WITH MUTAGEN")
logger.debug("COULD NOT READ AUDIO FILE WITH MUTAGEN")


Comment on lines +185 to +204
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cat -n packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py | sed -n '185,210p'

Repository: traceloop/openllmetry

Length of output: 1110


🏁 Script executed:

# Check if the file exists and explore structure
fd audio_wrappers.py

Repository: traceloop/openllmetry

Length of output: 172


🏁 Script executed:

# Search for SpanAttributes to understand naming conventions
rg "SpanAttributes" packages/opentelemetry-instrumentation-openai --type py -A 3 -B 1

Repository: traceloop/openllmetry

Length of output: 50378


Remove debug print statement and refactor audio duration attribute naming

Two issues require attention:

  1. Debug output on line 203

The print("REMOVE ME : ATA-DBG : COULD NOT READ AUDIO FILE WITH MUTAGEN") statement will spam stdout when audio duration cannot be read. Remove it entirely or replace with a structured log:

         else:
-            print("REMOVE ME : ATA-DBG : COULD NOT READ AUDIO FILE WITH MUTAGEN")
  1. Hardcoded attribute key on line 200

Replace the hardcoded string 'gen_ai.openai.audio.input.duration_seconds' with a module-level constant. This will simplify the migration to SpanAttributes.LLM_OPENAI_AUDIO_INPUT_DURATION_SECONDS once it is published:

# At module top
AUDIO_INPUT_DURATION_ATTR = "gen_ai.openai.audio.input.duration_seconds"

# Then use
_set_span_attribute(span, AUDIO_INPUT_DURATION_ATTR, audio_duration)

This approach avoids hardcoding magic strings and makes future refactoring to the official semconv attribute straightforward.

🤖 Prompt for AI Agents
In
packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/audio_wrappers.py
around lines 185 to 204, remove the debug print statement that spams stdout when
mutagen cannot read the audio file and instead either drop the message or
replace it with a structured logger call; also introduce a module-level constant
(e.g., AUDIO_INPUT_DURATION_ATTR) at the top of the file for the attribute key
and use that constant in the _set_span_attribute call instead of the hardcoded
string so future migration to
SpanAttributes.LLM_OPENAI_AUDIO_INPUT_DURATION_SECONDS is simple.


@dont_throw
def _handle_response(
response,
span,
instance=None,
duration_histogram=None,
duration=None,
):
if is_openai_v1():
response_dict = model_as_dict(response)
else:
response_dict = response

# metrics record
_set_transcription_metrics(
instance,
duration_histogram,
response_dict,
duration,
)

# span attributes
_set_response_attributes(span, response_dict)


def _set_transcription_metrics(
instance,
duration_histogram,
response_dict,
duration,
):
from opentelemetry.instrumentation.openai.shared import _get_openai_base_url

shared_attributes = metric_shared_attributes(
response_model=response_dict.get("model") or None,
operation="audio.transcriptions",
server_address=_get_openai_base_url(instance),
)

# duration metrics
if duration and isinstance(duration, (float, int)) and duration_histogram:
duration_histogram.record(duration, attributes=shared_attributes)
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,29 @@ def wrapper(wrapped, instance, args, kwargs):
return _with_embeddings_telemetry


def _with_audio_telemetry_wrapper(func):
"""Wrapper to convert the audio wrapper function into the expected format for wrapt."""
def _with_audio_telemetry(
tracer,
duration_histogram,
exception_counter,
):
def wrapper(wrapped, instance, args, kwargs):
return func(
tracer,
duration_histogram,
exception_counter,
wrapped,
instance,
args,
kwargs,
)

return wrapper

return _with_audio_telemetry


def _with_chat_telemetry_wrapper(func):
def _with_chat_telemetry(
tracer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
from opentelemetry.instrumentation.openai.shared.image_gen_wrappers import (
image_gen_metrics_wrapper,
)
from opentelemetry.instrumentation.openai.shared.audio_wrappers import (
atranscription_wrapper,
transcription_wrapper,
)
from opentelemetry.instrumentation.openai.utils import is_metrics_enabled
from opentelemetry.instrumentation.openai.v1.assistant_wrappers import (
assistants_create_wrapper,
Expand Down Expand Up @@ -247,6 +251,36 @@ def _instrument(self, **kwargs):
image_gen_metrics_wrapper(duration_histogram, image_gen_exception_counter),
)

if is_metrics_enabled():
audio_transcription_exception_counter = meter.create_counter(
# name=Meters.LLM_AUDIO_TRANSCRIPTIONS_EXCEPTIONS, # TODO(Ata): come back here later when semconv is published
name='llm.openai.audio.transcriptions.exceptions',
unit="time",
description="Number of exceptions occurred during audio transcriptions operation",
)
else:
audio_transcription_exception_counter = None

wrap_function_wrapper(
"openai.resources.audio.transcriptions",
"Transcriptions.create",
transcription_wrapper(
tracer,
duration_histogram,
audio_transcription_exception_counter,
),
)

wrap_function_wrapper(
"openai.resources.audio.transcriptions",
"AsyncTranscriptions.create",
atranscription_wrapper(
tracer,
duration_histogram,
audio_transcription_exception_counter,
),
)

# Beta APIs may not be available consistently in all versions
self._try_wrap(
"openai.resources.beta.assistants",
Expand Down Expand Up @@ -338,6 +372,8 @@ def _uninstrument(self, **kwargs):
unwrap("openai.resources.completions", "AsyncCompletions.create")
unwrap("openai.resources.embeddings", "AsyncEmbeddings.create")
unwrap("openai.resources.images", "Images.generate")
unwrap("openai.resources.audio.transcriptions", "Transcriptions.create")
unwrap("openai.resources.audio.transcriptions", "AsyncTranscriptions.create")

# Beta APIs may not be available consistently in all versions
try:
Expand Down
2 changes: 2 additions & 0 deletions packages/opentelemetry-instrumentation-openai/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ opentelemetry-api = "^1.38.0"
opentelemetry-instrumentation = ">=0.59b0"
opentelemetry-semantic-conventions = ">=0.59b0"
opentelemetry-semantic-conventions-ai = "^0.4.13"
mutagen = { version = "^1.47.0", optional = true }

[tool.poetry.group.dev.dependencies]
autopep8 = "^2.2.0"
Expand All @@ -49,6 +50,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry.extras]
instruments = ["openai"]
audio = ["mutagen"]

[tool.poetry.plugins."opentelemetry_instrumentor"]
openai = "opentelemetry.instrumentation.openai:OpenAIInstrumentor"
Loading
Loading