# llm_with_tracing.py
# Goal: Manufacturing-ready LLM name wrapper with full observability.
# All calls are traced in Langfuse: inputs, outputs, tokens, prices, and latency.
#
# Stipulations:
# pip set up langfuse anthropic python-dotenv
#
# setting:
# 1. Create a free account at https://cloud.langfuse.com.
# 2.[設定]>[API キー]Get the important thing from
# 3. Create a .env file with the next variables
#
# Run:
# Python llm_with_tracing.py
import OS
import time
from Dotenfu import load_dotenv
import human
from lang fuse import langfuse
# Load surroundings variables from .env file
load_dotenv()
# Setting variables required in .env:
# LANGFUSE_PUBLIC_KEY=pk-lf-…
# LANGFUSE_SECRET_KEY=sk-lf-…
# LANGFUSE_HOST=https://cloud.langfuse.com (or self-hosted URL)
# ANTHROPIC_API_KEY=sk-ant-…
# Initialize the consumer
langfuse_client = langfuse() # Robotically learn keys from surroundings
anthropic_client = human.human() # Learn ANTHROPIC_API_KEY from surroundings
# ── Configuration ──── Configuration ────────────────
# Retailer the immediate right here as an alternative of inline with the API name.
# This permits for model management and unbiased testing.
SYSTEM_PROMPT = “”“You’re a pleasant buyer assist assistant.
Please reply the questions clearly and concisely.
If you happen to do not perceive one thing, do not make assumptions, simply say it straight. ”“”
mannequin = “Claude Sonnet-4-20250514”
# Anthropic costs as of mid-2026 (up to date if costs change)
# Used to calculate value per name for value monitoring
COST_PER_INPUT_TOKEN = 3.00 / 1_000_000 # $3.00 per million enter tokens
COST_PER_OUTPUT_TOKEN = 15.00 / 1_000_000 # $15.00 per million output tokens
certainly call_llm_with_tracing(
Consumer message: str,
Session ID: str = “Default session”,
Consumer ID: str = “Nameless”
) -> str:
“””
Executes a traced LLM name. Each name creates a Langfuse hint much like the next:
– Full enter/output
– Token utilization (enter, output, complete)
– Value calculated in USD
– Delay in milliseconds
– Mannequin used and session context
Parameters:
user_message : Message from the consumer
session_id : Teams associated calls into one dialog in Langfuse.
user_id : associates a name with a particular consumer for evaluation
Return worth:
LLM response as string
”“”
# Create a top-level hint of this consumer interplay
# Traces are displayed within the Langfuse dashboard as a single unit of labor
hint = langfuse_client.hint(
identify=“Buyer assist name”,
Session ID=Session ID,
Consumer ID=Consumer ID,
enter={“Consumer message”: Consumer message, “System Immediate”: SYSTEM_PROMPT}
)
# create a technology span within the hint
# This retrieves model-specific particulars resembling mannequin identify, token, and price.
technology = hint.technology(
identify=“Claude Full”,
mannequin=mannequin,
enter={
“system”: SYSTEM_PROMPT,
“message”: [{“role”: “user”, “content”: user_message}]
}
)
Begin time = time.time()
strive:
# Make an API name
response = anthropic_client.message.create(
mannequin=mannequin,
max_tokens=1024,
system=SYSTEM_PROMPT,
message=[{“role”: “user”, “content”: user_message}]
)
latency_ms = integer((time.time() – Begin time) * 1000)
# extract response textual content
response textual content = response.content material[0].sentence
# Extract token utilization from response
enter token = response.Utilization.enter token
output token = response.Utilization.output token
total_tokens = enter token + output_token
# Calculate the price of this name
Cost_USD = (
enter token * COST_PER_INPUT_TOKEN +
Output token * COST_PER_OUTPUT_token
)
# Replace the technology span utilizing the end result
# This knowledge shall be entered into the Langfuse value and token dashboard
technology.finish(
output=response textual content,
Utilization={
“enter”: enter token,
“output”: output token,
“complete”: total_tokens,
“unit”: “token”
},
metadata={
“Latency_ms”: latency_ms,
“Cost_USD”: spherical(Cost_USD, 6),
“mannequin”: mannequin
}
)
# replace hint with ultimate output
hint.replace(
output={“response”: response textual content},
metadata={“Complete cost_USD”: spherical(Cost_USD, 6)}
)
# Print a abstract to straightforward output for native viewing
print(f“n{‘─’ * 60}”)
print(f“Consumer: {user_message}”)
print(f“Claude: {response_text}”)
print(f“Tokens: {input_tokens} enter / {output_tokens} output / complete {total_tokens}”)
print(f“Value: ${cost_usd:.6f}”)
print(f“Latency: {latency_ms}ms”)
print(f“Hint: {langfuse_client.base_url}/hint/{hint.id}”)
print(f“{‘─’ * 60}n”)
return response textual content
exclude exception as e:
# Log the error in a hint so it is going to be seen in Langfuse
technology.finish(
output=none,
metadata={“error”: str(e), “Latency_ms”: integer((time.time() – Begin time) * 1000)}
)
hint.replace(output={“error”: str(e)})
# All the time flush earlier than firing — ensures error traces are despatched
langfuse_client.flash()
enhance
Lastly:
# Flush sends all buffered occasions to Langfuse
# Langfuse routinely flushes long-running providers.
# The script requires a handbook flush earlier than the method exits.
langfuse_client.flash()
# ── Run the demo ──── ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ Run the demo
if __name__ == “__Major__”:
# Simulate a two-turn buyer assist dialog
take a look at message = [
“What is your return policy for electronics?”,
“Can I return an item I bought 45 days ago?”
]
session = “Demo session-001”
for I, message in enumerate(take a look at message):
print(f“nCalling {i + 1}/{len(test_messages)}”)
strive:
call_llm_with_tracing(
Consumer message=message,
Session ID=session,
Consumer ID=“Check user-42”
)
exclude exception as e:
print(f“Invocation error {i + 1}: {e}”)

