mirror of
https://github.com/chatwoot/chatwoot.git
synced 2026-06-04 21:02:35 +08:00
# Pull Request Template ## Description We need to pass on trace level attributes down to the spans inside them like tool calls, observations, etc. This way, we can filter observations based on trace level attributes. ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) ## How Has This Been Tested? Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration. Attributes added to observation metadata for easy filtering <img width="1327" height="708" alt="image" src="https://github.com/user-attachments/assets/8f1d1bf8-cde4-481d-a2c2-7920ad2fc52e" /> added a `generation_stage` to differentiate llm_calls that call tools vs those that generate a `final_response` <img width="1806" height="968" alt="CleanShot 2026-06-03 at 15 11 09@2x" src="https://github.com/user-attachments/assets/db1fa8e0-7f2d-404b-a719-27a16d400442" /> propagated attributes to tool calls for future use <img width="903" height="517" alt="image" src="https://github.com/user-attachments/assets/edc61ce8-93db-465c-a66e-043138e2dc15" /> ## Checklist: - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my code - [x] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] New and existing unit tests pass locally with my changes - [x] Any dependent changes have been merged and published in downstream modules
343 lines
16 KiB
Ruby
343 lines
16 KiB
Ruby
require 'rails_helper'
|
|
|
|
RSpec.describe Integrations::LlmInstrumentation do
|
|
let(:test_class) do
|
|
Class.new do
|
|
include Integrations::LlmInstrumentation
|
|
end
|
|
end
|
|
|
|
let(:instance) { test_class.new }
|
|
let!(:otel_config) do
|
|
InstallationConfig.find_or_create_by(name: 'OTEL_PROVIDER') do |config|
|
|
config.value = 'langfuse'
|
|
end
|
|
end
|
|
|
|
let(:params) do
|
|
{
|
|
span_name: 'llm.test',
|
|
account_id: 123,
|
|
conversation_id: 456,
|
|
feature_name: 'reply_suggestion',
|
|
model: 'gpt-4o-mini',
|
|
messages: [{ 'role' => 'user', 'content' => 'Hello' }],
|
|
temperature: 0.7
|
|
}
|
|
end
|
|
|
|
before do
|
|
InstallationConfig.find_or_create_by(name: 'LANGFUSE_SECRET_KEY') do |config|
|
|
config.value = 'test-secret-key'
|
|
end
|
|
end
|
|
|
|
describe '#instrument_llm_call' do
|
|
context 'when OTEL provider is not configured' do
|
|
before { otel_config.update(value: '') }
|
|
|
|
it 'executes the block without tracing' do
|
|
result = instance.instrument_llm_call(params) { 'my_result' }
|
|
expect(result).to eq('my_result')
|
|
end
|
|
end
|
|
|
|
context 'when OTEL provider is configured' do
|
|
it 'executes the block and returns the result' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
result = instance.instrument_llm_call(params) { 'my_result' }
|
|
|
|
expect(result).to eq('my_result')
|
|
end
|
|
|
|
it 'creates a tracing span with the provided span name' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
instance.instrument_llm_call(params) { 'result' }
|
|
|
|
expect(mock_tracer).to have_received(:in_span).with('llm.test')
|
|
end
|
|
|
|
it 'returns the block result even if instrumentation has errors' do
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_raise(StandardError.new('Instrumentation failed'))
|
|
|
|
result = instance.instrument_llm_call(params) { 'my_result' }
|
|
|
|
expect(result).to eq('my_result')
|
|
end
|
|
|
|
it 'handles errors gracefully and captures exceptions' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
allow(mock_span).to receive(:set_attribute).and_raise(StandardError.new('Span error'))
|
|
allow(ChatwootExceptionTracker).to receive(:new).and_call_original
|
|
|
|
result = instance.instrument_llm_call(params) { 'my_result' }
|
|
|
|
expect(result).to eq('my_result')
|
|
expect(ChatwootExceptionTracker).to have_received(:new)
|
|
end
|
|
|
|
it 'sets correct request attributes on the span' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
instance.instrument_llm_call(params) { 'result' }
|
|
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.provider.name', 'openai')
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.request.model', 'gpt-4o-mini')
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.request.temperature', 0.7)
|
|
end
|
|
|
|
it 'sets correct prompt message attributes' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
custom_params = params.merge(
|
|
messages: [
|
|
{ 'role' => 'system', 'content' => 'You are a helpful assistant' },
|
|
{ 'role' => 'user', 'content' => 'Hello' }
|
|
]
|
|
)
|
|
|
|
instance.instrument_llm_call(custom_params) { 'result' }
|
|
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.prompt.0.role', 'system')
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.prompt.0.content', 'You are a helpful assistant')
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.prompt.1.role', 'user')
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.prompt.1.content', 'Hello')
|
|
end
|
|
|
|
it 'sets correct metadata attributes' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
instance.instrument_llm_call(params) { 'result' }
|
|
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.user.id', '123')
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.session.id', '123_456')
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.trace.tags', ['reply_suggestion'])
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.metadata.user_id', '123')
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.metadata.session_id', '123_456')
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.metadata.feature_name', 'reply_suggestion')
|
|
end
|
|
|
|
it 'sets completion message attributes when result contains message' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
result = instance.instrument_llm_call(params) do
|
|
{ message: 'AI response here' }
|
|
end
|
|
|
|
expect(result).to eq({ message: 'AI response here' })
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.completion.0.role', 'assistant')
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.completion.0.content', 'AI response here')
|
|
end
|
|
|
|
it 'sets usage metrics when result contains usage data' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
result = instance.instrument_llm_call(params) do
|
|
{
|
|
usage: {
|
|
'prompt_tokens' => 150,
|
|
'completion_tokens' => 200,
|
|
'total_tokens' => 350
|
|
}
|
|
}
|
|
end
|
|
|
|
expect(result[:usage]['prompt_tokens']).to eq(150)
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.usage.input_tokens', 150)
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.usage.output_tokens', 200)
|
|
expect(mock_span).to have_received(:set_attribute).with('gen_ai.usage.total_tokens', 350)
|
|
end
|
|
|
|
it 'sets error attributes when result contains error' do
|
|
mock_span = instance_double(OpenTelemetry::Trace::Span)
|
|
mock_status = instance_double(OpenTelemetry::Trace::Status)
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(mock_span).to receive(:status=)
|
|
allow(OpenTelemetry::Trace::Status).to receive(:error).and_return(mock_status)
|
|
mock_tracer = instance_double(OpenTelemetry::Trace::Tracer)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
|
|
result = instance.instrument_llm_call(params) do
|
|
{
|
|
error: 'API rate limit exceeded'
|
|
}
|
|
end
|
|
|
|
expect(result[:error]).to eq('API rate limit exceeded')
|
|
expect(mock_span).to have_received(:set_attribute)
|
|
.with('gen_ai.response.error', '"API rate limit exceeded"')
|
|
expect(mock_span).to have_received(:status=).with(mock_status)
|
|
expect(OpenTelemetry::Trace::Status).to have_received(:error).with('API rate limit exceeded')
|
|
end
|
|
end
|
|
|
|
describe '#instrument_agent_session' do
|
|
context 'when OTEL provider is not configured' do
|
|
before { otel_config.update(value: '') }
|
|
|
|
it 'executes the block without tracing' do
|
|
result = instance.instrument_agent_session(params) { 'my_result' }
|
|
expect(result).to eq('my_result')
|
|
end
|
|
end
|
|
|
|
context 'when OTEL provider is configured' do
|
|
let(:mock_span) { instance_double(OpenTelemetry::Trace::Span) }
|
|
let(:mock_tracer) { instance_double(OpenTelemetry::Trace::Tracer) }
|
|
|
|
before do
|
|
allow(mock_span).to receive(:set_attribute)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
|
|
end
|
|
|
|
it 'executes the block and returns the result' do
|
|
result = instance.instrument_agent_session(params) { 'my_result' }
|
|
expect(result).to eq('my_result')
|
|
end
|
|
|
|
it 'returns the block result even if instrumentation has errors' do
|
|
allow(mock_tracer).to receive(:in_span).and_raise(StandardError.new('Instrumentation failed'))
|
|
|
|
result = instance.instrument_agent_session(params) { 'my_result' }
|
|
|
|
expect(result).to eq('my_result')
|
|
end
|
|
|
|
it 'sets trace input and output attributes' do
|
|
result_data = { content: 'AI response' }
|
|
instance.instrument_agent_session(params) { result_data }
|
|
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.input', params[:messages].to_json)
|
|
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.output', result_data.to_json)
|
|
end
|
|
|
|
it 'propagates trace attributes as observation metadata to child tool spans' do
|
|
root_span = instance_double(OpenTelemetry::Trace::Span)
|
|
tool_span = instance_double(OpenTelemetry::Trace::Span)
|
|
tool_instance = test_class.new
|
|
allow(root_span).to receive(:set_attribute)
|
|
allow(tool_span).to receive(:set_attribute)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(tool_instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).with('llm.test').and_yield(root_span)
|
|
allow(mock_tracer).to receive(:in_span).with('tool.search').and_yield(tool_span)
|
|
|
|
instance.instrument_agent_session(params) do
|
|
tool_instance.instrument_tool_call('search', { query: 'test' }) { 'tool result' }
|
|
end
|
|
|
|
expect(tool_span).to have_received(:set_attribute).with('langfuse.observation.metadata.user_id', '123')
|
|
expect(tool_span).to have_received(:set_attribute).with('langfuse.observation.metadata.session_id', '123_456')
|
|
expect(tool_span).to have_received(:set_attribute).with('langfuse.observation.metadata.feature_name', 'reply_suggestion')
|
|
end
|
|
|
|
it 'keeps inherited session metadata for nested service spans with their own feature tag' do
|
|
root_span = instance_double(OpenTelemetry::Trace::Span)
|
|
nested_span = instance_double(OpenTelemetry::Trace::Span)
|
|
nested_instance = test_class.new
|
|
nested_params = params.merge(span_name: 'llm.translate_query', conversation_id: nil, feature_name: 'translate_query')
|
|
allow(root_span).to receive(:set_attribute)
|
|
allow(nested_span).to receive(:set_attribute)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(nested_instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).with('llm.test').and_yield(root_span)
|
|
allow(mock_tracer).to receive(:in_span).with('llm.translate_query').and_yield(nested_span)
|
|
|
|
instance.instrument_agent_session(params) do
|
|
nested_instance.instrument_llm_call(nested_params) { 'translated query' }
|
|
end
|
|
|
|
expect(nested_span).to have_received(:set_attribute).with('langfuse.session.id', '123_456')
|
|
expect(nested_span).to have_received(:set_attribute).with('langfuse.trace.tags', ['translate_query'])
|
|
expect(nested_span).to have_received(:set_attribute).with('langfuse.observation.metadata.session_id', '123_456')
|
|
expect(nested_span).to have_received(:set_attribute).with('langfuse.observation.metadata.feature_name', 'translate_query')
|
|
end
|
|
|
|
it 'propagates session metadata to nested embedding spans' do
|
|
root_span = instance_double(OpenTelemetry::Trace::Span)
|
|
embedding_span = instance_double(OpenTelemetry::Trace::Span)
|
|
embedding_instance = test_class.new
|
|
embedding_params = {
|
|
span_name: 'llm.captain.embedding',
|
|
account_id: 123,
|
|
feature_name: 'embedding',
|
|
model: 'text-embedding-3-small',
|
|
input: 'search result'
|
|
}
|
|
allow(root_span).to receive(:set_attribute)
|
|
allow(embedding_span).to receive(:set_attribute)
|
|
allow(instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(embedding_instance).to receive(:tracer).and_return(mock_tracer)
|
|
allow(mock_tracer).to receive(:in_span).with('llm.test').and_yield(root_span)
|
|
allow(mock_tracer).to receive(:in_span).with('llm.captain.embedding').and_yield(embedding_span)
|
|
|
|
instance.instrument_agent_session(params) do
|
|
embedding_instance.instrument_embedding_call(embedding_params) { [0.1, 0.2, 0.3] }
|
|
end
|
|
|
|
expect(embedding_span).to have_received(:set_attribute).with('langfuse.session.id', '123_456')
|
|
expect(embedding_span).to have_received(:set_attribute).with('langfuse.trace.tags', ['embedding'])
|
|
expect(embedding_span).to have_received(:set_attribute).with('langfuse.observation.metadata.session_id', '123_456')
|
|
expect(embedding_span).to have_received(:set_attribute).with('langfuse.observation.metadata.feature_name', 'embedding')
|
|
end
|
|
|
|
# Regression test for Langfuse double-counting bug.
|
|
# Setting gen_ai.request.model on parent spans causes Langfuse to classify them as
|
|
# GENERATIONs instead of SPANs, resulting in cost being counted multiple times
|
|
# (once for the parent, once for each child GENERATION).
|
|
# See: https://github.com/langfuse/langfuse/issues/7549
|
|
it 'does NOT set gen_ai.request.model to avoid being classified as a GENERATION' do
|
|
instance.instrument_agent_session(params) { 'result' }
|
|
|
|
expect(mock_span).not_to have_received(:set_attribute).with('gen_ai.request.model', anything)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|