These patterns show how to combine multiple ABV features for production-grade AI applications.
Validated Chat Pipeline
Combines tracing, input/output guardrails, and scoring.- Python
- JavaScript
Copy
from abvdev import ABV, observe
from dataclasses import dataclass
from typing import Optional
abv = ABV(api_key="sk-abv-...")
@dataclass
class ChatResult:
response: Optional[str]
blocked: bool
reason: Optional[str]
@observe()
def validated_chat(user_id: str, session_id: str, message: str) -> ChatResult:
# Set trace context
abv.update_current_trace(
user_id=user_id,
session_id=session_id,
tags=["chat", "production"]
)
# Input guardrail
with abv.start_as_current_observation(as_type="guardrail", name="input-check") as guard:
input_check = abv.guardrails.validate_toxic_language(message, {"sensitivity": "medium"})
guard.update(output={"status": input_check.status})
if input_check.status == "FAIL":
abv.score_current_trace(name="blocked", value=1.0, data_type="BOOLEAN")
return ChatResult(response=None, blocked=True, reason="Input blocked")
# Generate response
with abv.start_as_current_generation(name="llm-call", model="gpt-4o-mini") as gen:
response = abv.gateway.complete_chat(
provider="openai",
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": message}
]
)
output = response.choices[0].message.content
gen.update(
output=output,
usage_details={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens
}
)
# Output guardrail
with abv.start_as_current_observation(as_type="guardrail", name="output-check") as guard:
output_check = abv.guardrails.validate_toxic_language(output, {"sensitivity": "high"})
guard.update(output={"status": output_check.status})
if output_check.status == "FAIL":
abv.score_current_trace(name="output_filtered", value=1.0, data_type="BOOLEAN")
return ChatResult(response=None, blocked=True, reason="Output filtered")
# Score successful interaction
abv.score_current_trace(name="completed", value=1.0, data_type="BOOLEAN")
abv.score_current_trace(name="response_length", value=len(output), data_type="NUMERIC")
return ChatResult(response=output, blocked=False, reason=None)
result = validated_chat("user-123", "session-456", "What is machine learning?")
Copy
import {
startActiveObservation,
startObservation,
updateActiveTrace,
} from "@abvdev/tracing";
interface ChatResult {
response: string | null;
blocked: boolean;
reason: string | null;
}
async function validatedChat(
userId: string,
sessionId: string,
message: string
): Promise<ChatResult> {
return startActiveObservation("validated-chat", async () => {
updateActiveTrace({
userId,
sessionId,
tags: ["chat", "production"],
});
// Input guardrail
const inputGuard = startObservation(
"input-check",
{ input: message },
{ asType: "guardrail" }
);
const inputCheck = await abv.guardrails.validators.toxicLanguage.validate(
message,
{ sensitivity: "MEDIUM" }
);
inputGuard.update({ output: { status: inputCheck.status } }).end();
if (inputCheck.status === "FAIL") {
abv.score.activeTrace({ name: "blocked", value: 1.0 });
return { response: null, blocked: true, reason: "Input blocked" };
}
// Generate response
const gen = startObservation(
"llm-call",
{
model: "gpt-4o-mini",
input: [{ role: "user", content: message }],
},
{ asType: "generation" }
);
const response = await abv.gateway.chat.completions.create({
provider: "openai",
model: "gpt-4o-mini",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: message },
],
});
const output = response.choices[0].message.content;
gen
.update({
output: { content: output },
usageDetails: {
input: response.usage?.prompt_tokens,
output: response.usage?.completion_tokens,
},
})
.end();
// Output guardrail
const outputGuard = startObservation(
"output-check",
{ input: output },
{ asType: "guardrail" }
);
const outputCheck =
await abv.guardrails.validators.toxicLanguage.validate(output, {
sensitivity: "HIGH",
});
outputGuard.update({ output: { status: outputCheck.status } }).end();
if (outputCheck.status === "FAIL") {
abv.score.activeTrace({ name: "output_filtered", value: 1.0 });
return { response: null, blocked: true, reason: "Output filtered" };
}
// Score success
abv.score.activeTrace({ name: "completed", value: 1.0 });
abv.score.activeTrace({ name: "response_length", value: output.length });
return { response: output, blocked: false, reason: null };
});
}
const result = await validatedChat(
"user-123",
"session-456",
"What is machine learning?"
);
await abv.score.flush();
Cost-Tracked Generation with Fallback
Combines gateway, cost tracking, and provider fallback.- Python
- JavaScript
Copy
from abvdev import ABV, observe
from typing import List, Dict
abv = ABV(api_key="sk-abv-...")
PROVIDERS = [
{"provider": "openai", "model": "gpt-4o-mini", "cost_per_1k_input": 0.00015, "cost_per_1k_output": 0.0006},
{"provider": "anthropic", "model": "claude-sonnet-4-20250514", "cost_per_1k_input": 0.003, "cost_per_1k_output": 0.015},
]
@observe()
def cost_aware_generation(messages: List[Dict], max_cost: float = 0.10) -> dict:
abv.update_current_trace(metadata={"max_cost_budget": max_cost})
for config in PROVIDERS:
with abv.start_as_current_generation(
name=f"{config['provider']}-call",
model=config["model"]
) as gen:
try:
response = abv.gateway.complete_chat(
provider=config["provider"],
model=config["model"],
messages=messages
)
# Calculate cost
input_tokens = response.usage.prompt_tokens
output_tokens = response.usage.completion_tokens
cost = (
(input_tokens / 1000) * config["cost_per_1k_input"] +
(output_tokens / 1000) * config["cost_per_1k_output"]
)
gen.update(
output=response.choices[0].message.content,
usage_details={
"prompt_tokens": input_tokens,
"completion_tokens": output_tokens
},
cost_details={"total_cost": cost, "currency": "USD"}
)
# Score cost metrics
abv.score_current_trace(name="cost_usd", value=cost, data_type="NUMERIC")
abv.score_current_trace(
name="under_budget",
value=1.0 if cost <= max_cost else 0.0,
data_type="BOOLEAN"
)
return {
"response": response.choices[0].message.content,
"provider": config["provider"],
"cost": cost
}
except Exception as e:
gen.update(level="WARNING", status_message=str(e))
continue
raise Exception("All providers failed")
Copy
const PROVIDERS = [
{
provider: "openai",
model: "gpt-4o-mini",
costPer1kInput: 0.00015,
costPer1kOutput: 0.0006,
},
{
provider: "anthropic",
model: "claude-sonnet-4-20250514",
costPer1kInput: 0.003,
costPer1kOutput: 0.015,
},
];
async function costAwareGeneration(
messages: any[],
maxCost: number = 0.1
) {
return startActiveObservation("cost-aware-generation", async () => {
updateActiveTrace({ metadata: { max_cost_budget: maxCost } });
for (const config of PROVIDERS) {
const gen = startObservation(
`${config.provider}-call`,
{ model: config.model, input: messages },
{ asType: "generation" }
);
try {
const response = await abv.gateway.chat.completions.create({
provider: config.provider as any,
model: config.model,
messages,
});
const inputTokens = response.usage?.prompt_tokens ?? 0;
const outputTokens = response.usage?.completion_tokens ?? 0;
const cost =
(inputTokens / 1000) * config.costPer1kInput +
(outputTokens / 1000) * config.costPer1kOutput;
gen
.update({
output: { content: response.choices[0].message.content },
usageDetails: { input: inputTokens, output: outputTokens },
costDetails: { totalCost: cost, currency: "USD" },
})
.end();
abv.score.activeTrace({ name: "cost_usd", value: cost });
abv.score.activeTrace({
name: "under_budget",
value: cost <= maxCost ? 1.0 : 0.0,
});
return {
response: response.choices[0].message.content,
provider: config.provider,
cost,
};
} catch (error) {
gen.update({ level: "WARNING", statusMessage: error.message }).end();
continue;
}
}
throw new Error("All providers failed");
});
}
Prompt + Guardrail + Evaluation
Fetch managed prompts, validate output, and auto-score.- Python
- JavaScript
Copy
from abvdev import ABV, observe
import json
abv = ABV(api_key="sk-abv-...")
@observe()
def managed_prompt_pipeline(query: str, user_id: str) -> dict:
abv.update_current_trace(user_id=user_id, tags=["managed-prompt"])
# Fetch managed prompt
prompt = abv.get_prompt("qa-assistant-v2")
# Compile prompt with variables
messages = prompt.compile(query=query, context="General knowledge")
# Generate with prompt config
with abv.start_as_current_generation(
name="managed-generation",
model=prompt.config.get("model", "gpt-4o-mini"),
metadata={"prompt_name": prompt.name, "prompt_version": prompt.version}
) as gen:
response = abv.gateway.complete_chat(
provider="openai",
model=prompt.config.get("model", "gpt-4o-mini"),
messages=messages,
temperature=prompt.config.get("temperature", 0.7)
)
output = response.choices[0].message.content
gen.update(output=output)
# Validate output structure if JSON expected
if prompt.config.get("expects_json"):
validation = abv.guardrails.validate_json(output, {"strictMode": True})
abv.score_current_span(
name="valid_json",
value=1.0 if validation.status == "PASS" else 0.0,
data_type="BOOLEAN"
)
# Auto-evaluate with LLM judge
with abv.start_as_current_observation(as_type="evaluator", name="auto-judge") as judge:
judge_response = abv.gateway.complete_chat(
provider="openai",
model="gpt-4o",
messages=[{
"role": "user",
"content": f"""Rate this response 1-10 for helpfulness.
Query: {query}
Response: {output}
Return JSON: {{"score": N, "reason": "..."}}"""
}],
temperature=0
)
scores = json.loads(judge_response.choices[0].message.content)
judge.update(output=scores)
abv.score_current_trace(
name="helpfulness",
value=scores["score"] / 10,
data_type="NUMERIC",
comment=scores["reason"]
)
return {"response": output, "helpfulness": scores["score"]}
Copy
async function managedPromptPipeline(query: string, userId: string) {
return startActiveObservation("managed-prompt-pipeline", async () => {
updateActiveTrace({ userId, tags: ["managed-prompt"] });
// Fetch managed prompt
const prompt = await abv.prompt.get("qa-assistant-v2");
// Compile prompt
const messages = prompt.compile({ query, context: "General knowledge" });
// Generate
const gen = startObservation(
"managed-generation",
{
model: prompt.config?.model ?? "gpt-4o-mini",
input: messages,
metadata: { prompt_name: prompt.name, prompt_version: prompt.version },
},
{ asType: "generation" }
);
const response = await abv.gateway.chat.completions.create({
provider: "openai",
model: prompt.config?.model ?? "gpt-4o-mini",
messages,
temperature: prompt.config?.temperature ?? 0.7,
});
const output = response.choices[0].message.content;
gen.update({ output: { content: output } }).end();
// Validate JSON if expected
if (prompt.config?.expects_json) {
const validation = await abv.guardrails.validators.validJson.validate(
output,
{ strictMode: true }
);
abv.score.activeObservation({
name: "valid_json",
value: validation.status === "PASS" ? 1.0 : 0.0,
});
}
// Auto-judge
const judge = startObservation(
"auto-judge",
{ input: { query, response: output } },
{ asType: "evaluator" }
);
const judgeResponse = await abv.gateway.chat.completions.create({
provider: "openai",
model: "gpt-4o",
messages: [
{
role: "user",
content: `Rate this response 1-10 for helpfulness.
Query: ${query}
Response: ${output}
Return JSON: {"score": N, "reason": "..."}`,
},
],
temperature: 0,
});
const scores = JSON.parse(judgeResponse.choices[0].message.content);
judge.update({ output: scores }).end();
abv.score.activeTrace({
name: "helpfulness",
value: scores.score / 10,
comment: scores.reason,
});
return { response: output, helpfulness: scores.score };
});
}
Session-Based Conversation with Memory
Track multi-turn conversations with session context.- Python
- JavaScript
Copy
from abvdev import ABV, observe
from typing import List, Dict
abv = ABV(api_key="sk-abv-...")
# In-memory store (use Redis/DB in production)
conversation_store: Dict[str, List[Dict]] = {}
@observe()
def chat_with_memory(user_id: str, session_id: str, message: str) -> str:
abv.update_current_trace(
user_id=user_id,
session_id=session_id,
metadata={"conversation_turn": len(conversation_store.get(session_id, [])) + 1}
)
# Get or create conversation history
if session_id not in conversation_store:
conversation_store[session_id] = [
{"role": "system", "content": "You are a helpful assistant. Be concise."}
]
history = conversation_store[session_id]
# Add user message
history.append({"role": "user", "content": message})
# Generate response
with abv.start_as_current_generation(name="chat-turn", model="gpt-4o-mini") as gen:
response = abv.gateway.complete_chat(
provider="openai",
model="gpt-4o-mini",
messages=history
)
assistant_message = response.choices[0].message.content
gen.update(
input=history,
output=assistant_message,
metadata={"history_length": len(history)}
)
# Store assistant response
history.append({"role": "assistant", "content": assistant_message})
# Score conversation metrics
abv.score_current_trace(
name="conversation_length",
value=len(history),
data_type="NUMERIC"
)
return assistant_message
# Multi-turn conversation
chat_with_memory("user-1", "session-abc", "Hi, I'm learning Python")
chat_with_memory("user-1", "session-abc", "What's a good first project?")
chat_with_memory("user-1", "session-abc", "Can you give me more details?")
Copy
const conversationStore = new Map<string, any[]>();
async function chatWithMemory(
userId: string,
sessionId: string,
message: string
): Promise<string> {
return startActiveObservation("chat-with-memory", async () => {
const turn = (conversationStore.get(sessionId)?.length ?? 0) + 1;
updateActiveTrace({
userId,
sessionId,
metadata: { conversation_turn: turn },
});
// Get or create history
if (!conversationStore.has(sessionId)) {
conversationStore.set(sessionId, [
{
role: "system",
content: "You are a helpful assistant. Be concise.",
},
]);
}
const history = conversationStore.get(sessionId)!;
history.push({ role: "user", content: message });
// Generate
const gen = startObservation(
"chat-turn",
{ model: "gpt-4o-mini", input: history },
{ asType: "generation" }
);
const response = await abv.gateway.chat.completions.create({
provider: "openai",
model: "gpt-4o-mini",
messages: history,
});
const assistantMessage = response.choices[0].message.content;
gen
.update({
output: { content: assistantMessage },
metadata: { history_length: history.length },
})
.end();
history.push({ role: "assistant", content: assistantMessage });
abv.score.activeTrace({
name: "conversation_length",
value: history.length,
});
return assistantMessage;
});
}
// Multi-turn
await chatWithMemory("user-1", "session-abc", "Hi, I'm learning Python");
await chatWithMemory("user-1", "session-abc", "What's a good first project?");
await chatWithMemory("user-1", "session-abc", "Can you give me more details?");
Quality Gate Pipeline
Block deployment if evaluation scores are below threshold.- Python
- JavaScript
Copy
from abvdev import ABV, observe
from typing import List
abv = ABV(api_key="sk-abv-...")
@observe()
def quality_gate_evaluation(
dataset_name: str,
min_accuracy: float = 0.8,
min_helpfulness: float = 0.7
) -> dict:
"""Run evaluation and determine if quality gate passes."""
dataset = abv.get_dataset(dataset_name)
scores = {"accuracy": [], "helpfulness": []}
for item in dataset.items:
with item.run(run_name="quality-gate", run_description="Pre-deploy check") as span:
# Generate response
response = abv.gateway.complete_chat(
provider="openai",
model="gpt-4o-mini",
messages=[{"role": "user", "content": item.input["query"]}]
)
output = response.choices[0].message.content
# Check accuracy (exact match or semantic similarity)
accuracy = 1.0 if item.expected_output.lower() in output.lower() else 0.0
scores["accuracy"].append(accuracy)
span.score(name="accuracy", value=accuracy, data_type="NUMERIC")
# LLM judge for helpfulness
judge_resp = abv.gateway.complete_chat(
provider="openai",
model="gpt-4o",
messages=[{
"role": "user",
"content": f"Rate 0-1: Is this helpful? Query: {item.input['query']} Response: {output}. Reply with just a number."
}],
temperature=0
)
helpfulness = float(judge_resp.choices[0].message.content.strip())
scores["helpfulness"].append(helpfulness)
span.score(name="helpfulness", value=helpfulness, data_type="NUMERIC")
# Calculate averages
avg_accuracy = sum(scores["accuracy"]) / len(scores["accuracy"])
avg_helpfulness = sum(scores["helpfulness"]) / len(scores["helpfulness"])
# Determine gate status
gate_passed = avg_accuracy >= min_accuracy and avg_helpfulness >= min_helpfulness
# Score the overall run
abv.score_current_trace(name="avg_accuracy", value=avg_accuracy, data_type="NUMERIC")
abv.score_current_trace(name="avg_helpfulness", value=avg_helpfulness, data_type="NUMERIC")
abv.score_current_trace(name="gate_passed", value=1.0 if gate_passed else 0.0, data_type="BOOLEAN")
return {
"gate_passed": gate_passed,
"avg_accuracy": avg_accuracy,
"avg_helpfulness": avg_helpfulness,
"thresholds": {"accuracy": min_accuracy, "helpfulness": min_helpfulness}
}
# Run in CI/CD
result = quality_gate_evaluation("production-test-cases", min_accuracy=0.85)
if not result["gate_passed"]:
raise Exception(f"Quality gate failed: {result}")
Copy
async function qualityGateEvaluation(
datasetName: string,
minAccuracy: number = 0.8,
minHelpfulness: number = 0.7
) {
return startActiveObservation("quality-gate-evaluation", async () => {
const dataset = await abv.dataset.get(datasetName);
const scores = { accuracy: [] as number[], helpfulness: [] as number[] };
for (const item of dataset.items) {
await startActiveObservation(`eval-${item.id}`, async (span) => {
const response = await abv.gateway.chat.completions.create({
provider: "openai",
model: "gpt-4o-mini",
messages: [{ role: "user", content: item.input.query }],
});
const output = response.choices[0].message.content;
// Accuracy check
const accuracy = item.expectedOutput
.toLowerCase()
.includes(output.toLowerCase())
? 1.0
: 0.0;
scores.accuracy.push(accuracy);
abv.score.activeObservation({ name: "accuracy", value: accuracy });
// LLM judge
const judgeResp = await abv.gateway.chat.completions.create({
provider: "openai",
model: "gpt-4o",
messages: [
{
role: "user",
content: `Rate 0-1: Is this helpful? Query: ${item.input.query} Response: ${output}. Reply with just a number.`,
},
],
temperature: 0,
});
const helpfulness = parseFloat(
judgeResp.choices[0].message.content.trim()
);
scores.helpfulness.push(helpfulness);
abv.score.activeObservation({
name: "helpfulness",
value: helpfulness,
});
await item.link({ otelSpan: span }, "quality-gate", {
description: "Pre-deploy check",
});
});
}
const avgAccuracy =
scores.accuracy.reduce((a, b) => a + b, 0) / scores.accuracy.length;
const avgHelpfulness =
scores.helpfulness.reduce((a, b) => a + b, 0) /
scores.helpfulness.length;
const gatePassed =
avgAccuracy >= minAccuracy && avgHelpfulness >= minHelpfulness;
abv.score.activeTrace({ name: "avg_accuracy", value: avgAccuracy });
abv.score.activeTrace({ name: "avg_helpfulness", value: avgHelpfulness });
abv.score.activeTrace({ name: "gate_passed", value: gatePassed ? 1.0 : 0.0 });
return {
gatePassed,
avgAccuracy,
avgHelpfulness,
thresholds: { accuracy: minAccuracy, helpfulness: minHelpfulness },
};
});
}
// In CI/CD
const result = await qualityGateEvaluation("production-test-cases", 0.85);
if (!result.gatePassed) {
throw new Error(`Quality gate failed: ${JSON.stringify(result)}`);
}