Skip to content

Commit cb9b6f1

Browse files
fix(openai, Anthropic) fixed finish_reasons Otel semconv compliance gaps. (#3916)
1 parent 21d89e4 commit cb9b6f1

File tree

167 files changed

+53931
-11843
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+53931
-11843
lines changed

packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
def _map_finish_reason(anthropic_reason):
2929
"""Map an Anthropic stop_reason to the OTel GenAI FinishReason enum value."""
3030
if not anthropic_reason:
31-
return anthropic_reason
31+
return ""
3232
return _FINISH_REASON_MAP.get(anthropic_reason, anthropic_reason)
3333

3434

@@ -256,9 +256,7 @@ def _build_output_messages_from_content(response):
256256
"role": response.get("role", "assistant"),
257257
"parts": [{"type": "text", "content": response.get("completion")}],
258258
}
259-
mapped = _map_finish_reason(response.get("stop_reason"))
260-
if mapped:
261-
msg["finish_reason"] = mapped
259+
msg["finish_reason"] = _map_finish_reason(response.get("stop_reason"))
262260
return [msg]
263261

264262
if not response.get("content"):
@@ -295,9 +293,7 @@ def _build_output_messages_from_content(response):
295293
"role": response.get("role", "assistant"),
296294
"parts": parts,
297295
}
298-
mapped = _map_finish_reason(response.get("stop_reason"))
299-
if mapped:
300-
msg["finish_reason"] = mapped
296+
msg["finish_reason"] = _map_finish_reason(response.get("stop_reason"))
301297
return [msg]
302298

303299

@@ -449,8 +445,7 @@ def set_streaming_response_attributes(span, complete_response_events):
449445
"role": "assistant",
450446
"parts": parts,
451447
}
452-
if finish_reasons:
453-
msg["finish_reason"] = finish_reasons[-1]
448+
msg["finish_reason"] = finish_reasons[-1] if finish_reasons else ""
454449
output_messages = [msg]
455450
set_span_attribute(
456451
span,

packages/opentelemetry-instrumentation-anthropic/tests/test_semconv_span_attrs.py

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,34 @@ def test_streaming_finish_reasons_set_when_content_tracing_disabled():
375375
assert GenAIAttributes.GEN_AI_OUTPUT_MESSAGES not in span.attributes
376376

377377

378+
def test_finish_reason_empty_string_when_none():
379+
"""finish_reason must be '' (not omitted) when stop_reason is None (Bedrock convention)."""
380+
span = make_span()
381+
response = _make_response([_make_text_block("Hello")], stop_reason=None)
382+
set_response_attributes(span, response)
383+
384+
output = json.loads(span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES])
385+
assert len(output) == 1
386+
assert "finish_reason" in output[0], "finish_reason key must always be present"
387+
assert output[0]["finish_reason"] == "", (
388+
f"Expected '' for missing stop_reason, got '{output[0]['finish_reason']}'"
389+
)
390+
391+
392+
def test_streaming_finish_reason_empty_string_when_none():
393+
"""Streaming: finish_reason must be '' when no finish_reason in events."""
394+
span = make_span()
395+
events = [{"type": "text", "text": "Hello", "index": 0}]
396+
set_streaming_response_attributes(span, events)
397+
398+
raw = span.attributes.get(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)
399+
if raw:
400+
output = json.loads(raw)
401+
assert output[0]["finish_reason"] == "", (
402+
f"Expected '' for missing streaming finish_reason, got '{output[0]['finish_reason']}'"
403+
)
404+
405+
378406
def test_output_messages_tool_use_response():
379407
"""Tool use in the response should appear as tool_call parts."""
380408
span = make_span()
@@ -1009,8 +1037,8 @@ def test_image_without_upload_produces_blob_part():
10091037

10101038

10111039
def test_streaming_finish_reason_null_omitted_from_json():
1012-
"""When no finish_reason is available, the key must be omitted from
1013-
gen_ai.output.messages JSON — NOT serialized as null."""
1040+
"""When no finish_reason is available, the key must be present with empty
1041+
string value — NOT serialized as null, NOT omitted (Bedrock convention)."""
10141042
span = make_span()
10151043
# Event with no finish_reason key at all
10161044
events = [{"type": "text", "text": "Hello world", "index": 0}]
@@ -1020,8 +1048,8 @@ def test_streaming_finish_reason_null_omitted_from_json():
10201048
assert len(output) == 1
10211049
assert output[0]["role"] == "assistant"
10221050
assert output[0]["parts"] == [{"type": "text", "content": "Hello world"}]
1023-
# finish_reason key must be absent, not null
1024-
assert "finish_reason" not in output[0]
1051+
# finish_reason key must be present with empty string fallback
1052+
assert output[0]["finish_reason"] == ""
10251053

10261054

10271055
def test_streaming_finish_reason_none_does_not_set_span_attr():
@@ -1536,3 +1564,32 @@ def test_event_attributes_uses_provider_name_not_system():
15361564
assert EVENT_ATTRIBUTES[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "anthropic"
15371565
assert GenAIAttributes.GEN_AI_SYSTEM not in EVENT_ATTRIBUTES, \
15381566
"Deprecated GEN_AI_SYSTEM should not be in EVENT_ATTRIBUTES"
1567+
1568+
1569+
# ---------------------------------------------------------------------------
1570+
# _map_finish_reason must return "" for falsy input, mapped value for known
1571+
# reasons, and the original string as-is for unknown reasons.
1572+
# ---------------------------------------------------------------------------
1573+
1574+
class TestMapFinishReason:
1575+
from opentelemetry.instrumentation.anthropic.span_utils import _map_finish_reason
1576+
_map_finish_reason = staticmethod(_map_finish_reason)
1577+
1578+
@pytest.mark.parametrize("falsy_input", [None, "", 0, False])
1579+
def test_returns_empty_string_for_falsy(self, falsy_input):
1580+
assert self._map_finish_reason(falsy_input) == ""
1581+
1582+
def test_maps_end_turn_to_stop(self):
1583+
assert self._map_finish_reason("end_turn") == "stop"
1584+
1585+
def test_maps_tool_use_to_tool_call(self):
1586+
assert self._map_finish_reason("tool_use") == "tool_call"
1587+
1588+
def test_maps_max_tokens_to_length(self):
1589+
assert self._map_finish_reason("max_tokens") == "length"
1590+
1591+
def test_maps_stop_sequence_to_stop(self):
1592+
assert self._map_finish_reason("stop_sequence") == "stop"
1593+
1594+
def test_passes_through_unknown_reason(self):
1595+
assert self._map_finish_reason("some_new_reason") == "some_new_reason"

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,7 @@ def set_chat_response(span: Span, response: LLMResult) -> None:
369369
if tool_calls and isinstance(tool_calls, list):
370370
parts.extend(_tool_calls_to_parts(tool_calls))
371371

372-
msg_obj = {"role": role, "parts": parts}
373-
if fr:
374-
msg_obj["finish_reason"] = fr
372+
msg_obj = {"role": role, "parts": parts, "finish_reason": fr if fr else ""}
375373
output_messages.append(msg_obj)
376374

377375
if output_messages:

packages/opentelemetry-instrumentation-langchain/tests/test_finish_reasons.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_no_finish_reason_omits_attribute(self, mock_span, monkeypatch):
111111
assert GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS not in mock_span.attributes
112112

113113
output = json.loads(mock_span.attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES])
114-
assert "finish_reason" not in output[0]
114+
assert output[0]["finish_reason"] == ""
115115

116116
def test_empty_generation_info_omits_attribute(self, mock_span, monkeypatch):
117117
monkeypatch.setattr(

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ def _set_completions(span, choices):
556556

557557
def _map_finish_reason(reason):
558558
if not reason:
559-
return None
559+
return ""
560560
return OPENAI_FINISH_REASON_MAP.get(reason, reason)
561561

562562
def _set_output_messages(span, choices):
@@ -603,11 +603,12 @@ def _set_output_messages(span, choices):
603603
"name": fc_name,
604604
"arguments": _parse_arguments(fc_args),
605605
})
606-
fr = _map_finish_reason(choice.get("finish_reason")) or "stop"
606+
fr = _map_finish_reason(choice.get("finish_reason"))
607607
entry = {"role": "assistant", "parts": parts, "finish_reason": fr}
608608
if content_filter_results:
609609
entry["content_filter_results"] = content_filter_results
610610
messages.append(entry)
611+
611612
_set_span_attribute(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(messages))
612613

613614
@dont_throw

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,14 @@ def _set_output_messages(span, choices):
190190

191191
messages = []
192192
for choice in choices:
193-
fr = _map_finish_reason(choice.get("finish_reason")) or "stop"
193+
fr = _map_finish_reason(choice.get("finish_reason"))
194194
entry = {
195195
"role": "assistant",
196196
"parts": [{"content": choice.get("text"), "type": "text"}],
197197
"finish_reason": fr,
198198
}
199199
messages.append(entry)
200+
200201
_set_span_attribute(
201202
span,
202203
GenAIAttributes.GEN_AI_OUTPUT_MESSAGES,

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ class TracedData(pydantic.BaseModel):
183183
request_service_tier: Optional[str] = pydantic.Field(default=None)
184184
response_service_tier: Optional[str] = pydantic.Field(default=None)
185185

186+
# Response status from Responses API ("completed", "incomplete", "failed", etc.)
187+
response_status: Optional[str] = pydantic.Field(default=None)
188+
# Reason from incomplete_details when status is "incomplete"
189+
incomplete_reason: Optional[str] = pydantic.Field(default=None)
190+
186191
# Trace context - to maintain trace continuity across async operations
187192
trace_context: Any = pydantic.Field(default=None)
188193

@@ -193,6 +198,41 @@ class Config:
193198
responses: dict[str, TracedData] = {}
194199

195200

201+
def _derive_finish_reason(traced_data: TracedData) -> str:
202+
"""Derive finish_reason from response.status instead of fabricating from block types.
203+
204+
Mapping:
205+
completed + tool calls → "tool_call"
206+
completed + no tool calls → "stop"
207+
incomplete + max_output_tokens → "length"
208+
incomplete + content_filter → "content_filter"
209+
incomplete + other → "length"
210+
failed → "error"
211+
None/unknown → ""
212+
"""
213+
status = traced_data.response_status
214+
if not status:
215+
return ""
216+
if status == "completed":
217+
if traced_data.output_blocks:
218+
for block in traced_data.output_blocks.values():
219+
block_dict = model_as_dict(block)
220+
if block_dict.get("type") in (
221+
"function_call", "file_search_call", "web_search_call",
222+
"computer_call", "code_interpreter_call",
223+
):
224+
return "tool_call"
225+
return "stop"
226+
if status == "incomplete":
227+
reason = traced_data.incomplete_reason
228+
if reason == "content_filter":
229+
return "content_filter"
230+
return "length"
231+
if status in ("failed", "cancelled"):
232+
return "error"
233+
return ""
234+
235+
196236
def parse_response(response: Union[LegacyAPIResponse, Response]) -> Response:
197237
if isinstance(response, LegacyAPIResponse):
198238
return response.parse()
@@ -256,7 +296,11 @@ def prepare_kwargs_for_shared_attributes(kwargs):
256296

257297

258298
def _set_responses_json_messages(traced_response: TracedData, span: Span):
259-
"""Set gen_ai.input.messages and gen_ai.output.messages as JSON."""
299+
"""Set gen_ai.input.messages and gen_ai.output.messages as JSON.
300+
301+
finish_reason is derived from response.status via _derive_finish_reason(),
302+
not fabricated from output block types.
303+
"""
260304
# Build input messages
261305
input_messages = []
262306
if traced_response.instructions:
@@ -352,12 +396,10 @@ def _set_responses_json_messages(traced_response: TracedData, span: Span):
352396
else:
353397
parts.append({"type": "reasoning", "content": summary})
354398
if parts:
355-
has_tool_call = any(p.get("type") == "tool_call" for p in parts)
356-
finish_reason = "tool_call" if has_tool_call else "stop"
357399
output_messages.append({
358400
"role": "assistant",
359401
"parts": parts,
360-
"finish_reason": finish_reason,
402+
"finish_reason": _derive_finish_reason(traced_response),
361403
})
362404

363405
_set_span_attribute(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps(output_messages))
@@ -419,26 +461,13 @@ def set_data_attributes(traced_response: TracedData, span: Span):
419461
traced_response.response_reasoning_effort,
420462
)
421463

422-
# P1-2: Derive finish_reasons from output blocks
423-
if traced_response.output_blocks:
424-
finish_reasons = []
425-
has_tool_call = False
426-
for block in traced_response.output_blocks.values():
427-
block_dict = model_as_dict(block)
428-
block_type = block_dict.get("type")
429-
if block_type == "message":
430-
finish_reasons.append("stop")
431-
elif block_type in ("function_call", "file_search_call", "web_search_call",
432-
"computer_call", "code_interpreter_call"):
433-
has_tool_call = True
434-
if has_tool_call:
435-
finish_reasons.append("tool_call")
436-
if finish_reasons:
437-
_set_span_attribute(
438-
span,
439-
GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS,
440-
tuple(finish_reasons),
441-
)
464+
finish_reason = _derive_finish_reason(traced_response)
465+
if finish_reason:
466+
_set_span_attribute(
467+
span,
468+
GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS,
469+
(finish_reason,),
470+
)
442471

443472
if should_send_prompts():
444473
_set_responses_json_messages(traced_response, span)
@@ -585,6 +614,11 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
585614
response_reasoning_effort=non_sentinel_kwargs.get("reasoning", {}).get("effort"),
586615
request_service_tier=existing_data.get("request_service_tier", non_sentinel_kwargs.get("service_tier")),
587616
response_service_tier=existing_data.get("response_service_tier", parsed_response.service_tier),
617+
response_status=parsed_response.status,
618+
incomplete_reason=(
619+
getattr(parsed_response.incomplete_details, "reason", None)
620+
if getattr(parsed_response, "incomplete_details", None) else None
621+
),
588622
# Capture trace context to maintain continuity across async operations
589623
trace_context=existing_data.get("trace_context", context_api.get_current()),
590624
)
@@ -748,6 +782,11 @@ async def async_responses_get_or_create_wrapper(
748782
response_reasoning_effort=non_sentinel_kwargs.get("reasoning", {}).get("effort"),
749783
request_service_tier=existing_data.get("request_service_tier", non_sentinel_kwargs.get("service_tier")),
750784
response_service_tier=existing_data.get("response_service_tier", parsed_response.service_tier),
785+
response_status=parsed_response.status,
786+
incomplete_reason=(
787+
getattr(parsed_response.incomplete_details, "reason", None)
788+
if getattr(parsed_response, "incomplete_details", None) else None
789+
),
751790
# Capture trace context to maintain continuity across async operations
752791
trace_context=existing_data.get("trace_context", context_api.get_current()),
753792
)
@@ -1008,6 +1047,11 @@ def _process_complete_response(self):
10081047
self._traced_data.response_id = parsed_response.id
10091048
self._traced_data.response_model = parsed_response.model
10101049
self._traced_data.output_text = self._output_text
1050+
self._traced_data.response_status = parsed_response.status
1051+
self._traced_data.incomplete_reason = (
1052+
getattr(parsed_response.incomplete_details, "reason", None)
1053+
if getattr(parsed_response, "incomplete_details", None) else None
1054+
)
10111055

10121056
if parsed_response.usage:
10131057
self._traced_data.usage = parsed_response.usage

0 commit comments

Comments
 (0)