Integration Patterns

These patterns show how to combine multiple ABV features for production-grade AI applications.

Validated Chat Pipeline

Combines tracing, input/output guardrails, and scoring.

Python
JavaScript

from abvdev import ABV, observe
from dataclasses import dataclass
from typing import Optional

abv = ABV(api_key="sk-abv-...")

@dataclass
class ChatResult:
    response: Optional[str]
    blocked: bool
    reason: Optional[str]

@observe()
def validated_chat(user_id: str, session_id: str, message: str) -> ChatResult:
    # Set trace context
    abv.update_current_trace(
        user_id=user_id,
        session_id=session_id,
        tags=["chat", "production"]
    )

    # Input guardrail
    with abv.start_as_current_observation(as_type="guardrail", name="input-check") as guard:
        input_check = abv.guardrails.validate_toxic_language(message, {"sensitivity": "medium"})
        guard.update(output={"status": input_check.status})

        if input_check.status == "FAIL":
            abv.score_current_trace(name="blocked", value=1.0, data_type="BOOLEAN")
            return ChatResult(response=None, blocked=True, reason="Input blocked")

    # Generate response
    with abv.start_as_current_generation(name="llm-call", model="gpt-4o-mini") as gen:
        response = abv.gateway.complete_chat(
            provider="openai",
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": message}
            ]
        )
        output = response.choices[0].message.content
        gen.update(
            output=output,
            usage_details={
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens
            }
        )

    # Output guardrail
    with abv.start_as_current_observation(as_type="guardrail", name="output-check") as guard:
        output_check = abv.guardrails.validate_toxic_language(output, {"sensitivity": "high"})
        guard.update(output={"status": output_check.status})

        if output_check.status == "FAIL":
            abv.score_current_trace(name="output_filtered", value=1.0, data_type="BOOLEAN")
            return ChatResult(response=None, blocked=True, reason="Output filtered")

    # Score successful interaction
    abv.score_current_trace(name="completed", value=1.0, data_type="BOOLEAN")
    abv.score_current_trace(name="response_length", value=len(output), data_type="NUMERIC")

    return ChatResult(response=output, blocked=False, reason=None)

result = validated_chat("user-123", "session-456", "What is machine learning?")

import {
  startActiveObservation,
  startObservation,
  updateActiveTrace,
} from "@abvdev/tracing";

interface ChatResult {
  response: string | null;
  blocked: boolean;
  reason: string | null;
}

async function validatedChat(
  userId: string,
  sessionId: string,
  message: string
): Promise<ChatResult> {
  return startActiveObservation("validated-chat", async () => {
    updateActiveTrace({
      userId,
      sessionId,
      tags: ["chat", "production"],
    });

    // Input guardrail
    const inputGuard = startObservation(
      "input-check",
      { input: message },
      { asType: "guardrail" }
    );
    const inputCheck = await abv.guardrails.validators.toxicLanguage.validate(
      message,
      { sensitivity: "MEDIUM" }
    );
    inputGuard.update({ output: { status: inputCheck.status } }).end();

    if (inputCheck.status === "FAIL") {
      abv.score.activeTrace({ name: "blocked", value: 1.0 });
      return { response: null, blocked: true, reason: "Input blocked" };
    }

    // Generate response
    const gen = startObservation(
      "llm-call",
      {
        model: "gpt-4o-mini",
        input: [{ role: "user", content: message }],
      },
      { asType: "generation" }
    );

    const response = await abv.gateway.chat.completions.create({
      provider: "openai",
      model: "gpt-4o-mini",
      messages: [
        { role: "system", content: "You are a helpful assistant." },
        { role: "user", content: message },
      ],
    });
    const output = response.choices[0].message.content;

    gen
      .update({
        output: { content: output },
        usageDetails: {
          input: response.usage?.prompt_tokens,
          output: response.usage?.completion_tokens,
        },
      })
      .end();

    // Output guardrail
    const outputGuard = startObservation(
      "output-check",
      { input: output },
      { asType: "guardrail" }
    );
    const outputCheck =
      await abv.guardrails.validators.toxicLanguage.validate(output, {
        sensitivity: "HIGH",
      });
    outputGuard.update({ output: { status: outputCheck.status } }).end();

    if (outputCheck.status === "FAIL") {
      abv.score.activeTrace({ name: "output_filtered", value: 1.0 });
      return { response: null, blocked: true, reason: "Output filtered" };
    }

    // Score success
    abv.score.activeTrace({ name: "completed", value: 1.0 });
    abv.score.activeTrace({ name: "response_length", value: output.length });

    return { response: output, blocked: false, reason: null };
  });
}

const result = await validatedChat(
  "user-123",
  "session-456",
  "What is machine learning?"
);
await abv.score.flush();

Cost-Tracked Generation with Fallback

Combines gateway, cost tracking, and provider fallback.

Python
JavaScript

from abvdev import ABV, observe
from typing import List, Dict

abv = ABV(api_key="sk-abv-...")

PROVIDERS = [
    {"provider": "openai", "model": "gpt-4o-mini", "cost_per_1k_input": 0.00015, "cost_per_1k_output": 0.0006},
    {"provider": "anthropic", "model": "claude-sonnet-4-20250514", "cost_per_1k_input": 0.003, "cost_per_1k_output": 0.015},
]

@observe()
def cost_aware_generation(messages: List[Dict], max_cost: float = 0.10) -> dict:
    abv.update_current_trace(metadata={"max_cost_budget": max_cost})

    for config in PROVIDERS:
        with abv.start_as_current_generation(
            name=f"{config['provider']}-call",
            model=config["model"]
        ) as gen:
            try:
                response = abv.gateway.complete_chat(
                    provider=config["provider"],
                    model=config["model"],
                    messages=messages
                )

                # Calculate cost
                input_tokens = response.usage.prompt_tokens
                output_tokens = response.usage.completion_tokens
                cost = (
                    (input_tokens / 1000) * config["cost_per_1k_input"] +
                    (output_tokens / 1000) * config["cost_per_1k_output"]
                )

                gen.update(
                    output=response.choices[0].message.content,
                    usage_details={
                        "prompt_tokens": input_tokens,
                        "completion_tokens": output_tokens
                    },
                    cost_details={"total_cost": cost, "currency": "USD"}
                )

                # Score cost metrics
                abv.score_current_trace(name="cost_usd", value=cost, data_type="NUMERIC")
                abv.score_current_trace(
                    name="under_budget",
                    value=1.0 if cost <= max_cost else 0.0,
                    data_type="BOOLEAN"
                )

                return {
                    "response": response.choices[0].message.content,
                    "provider": config["provider"],
                    "cost": cost
                }

            except Exception as e:
                gen.update(level="WARNING", status_message=str(e))
                continue

    raise Exception("All providers failed")

const PROVIDERS = [
  {
    provider: "openai",
    model: "gpt-4o-mini",
    costPer1kInput: 0.00015,
    costPer1kOutput: 0.0006,
  },
  {
    provider: "anthropic",
    model: "claude-sonnet-4-20250514",
    costPer1kInput: 0.003,
    costPer1kOutput: 0.015,
  },
];

async function costAwareGeneration(
  messages: any[],
  maxCost: number = 0.1
) {
  return startActiveObservation("cost-aware-generation", async () => {
    updateActiveTrace({ metadata: { max_cost_budget: maxCost } });

    for (const config of PROVIDERS) {
      const gen = startObservation(
        `${config.provider}-call`,
        { model: config.model, input: messages },
        { asType: "generation" }
      );

      try {
        const response = await abv.gateway.chat.completions.create({
          provider: config.provider as any,
          model: config.model,
          messages,
        });

        const inputTokens = response.usage?.prompt_tokens ?? 0;
        const outputTokens = response.usage?.completion_tokens ?? 0;
        const cost =
          (inputTokens / 1000) * config.costPer1kInput +
          (outputTokens / 1000) * config.costPer1kOutput;

        gen
          .update({
            output: { content: response.choices[0].message.content },
            usageDetails: { input: inputTokens, output: outputTokens },
            costDetails: { totalCost: cost, currency: "USD" },
          })
          .end();

        abv.score.activeTrace({ name: "cost_usd", value: cost });
        abv.score.activeTrace({
          name: "under_budget",
          value: cost <= maxCost ? 1.0 : 0.0,
        });

        return {
          response: response.choices[0].message.content,
          provider: config.provider,
          cost,
        };
      } catch (error) {
        gen.update({ level: "WARNING", statusMessage: error.message }).end();
        continue;
      }
    }

    throw new Error("All providers failed");
  });
}

Prompt + Guardrail + Evaluation

Fetch managed prompts, validate output, and auto-score.

Python
JavaScript

from abvdev import ABV, observe
import json

abv = ABV(api_key="sk-abv-...")

@observe()
def managed_prompt_pipeline(query: str, user_id: str) -> dict:
    abv.update_current_trace(user_id=user_id, tags=["managed-prompt"])

    # Fetch managed prompt
    prompt = abv.get_prompt("qa-assistant-v2")

    # Compile prompt with variables
    messages = prompt.compile(query=query, context="General knowledge")

    # Generate with prompt config
    with abv.start_as_current_generation(
        name="managed-generation",
        model=prompt.config.get("model", "gpt-4o-mini"),
        metadata={"prompt_name": prompt.name, "prompt_version": prompt.version}
    ) as gen:
        response = abv.gateway.complete_chat(
            provider="openai",
            model=prompt.config.get("model", "gpt-4o-mini"),
            messages=messages,
            temperature=prompt.config.get("temperature", 0.7)
        )
        output = response.choices[0].message.content
        gen.update(output=output)

    # Validate output structure if JSON expected
    if prompt.config.get("expects_json"):
        validation = abv.guardrails.validate_json(output, {"strictMode": True})
        abv.score_current_span(
            name="valid_json",
            value=1.0 if validation.status == "PASS" else 0.0,
            data_type="BOOLEAN"
        )

    # Auto-evaluate with LLM judge
    with abv.start_as_current_observation(as_type="evaluator", name="auto-judge") as judge:
        judge_response = abv.gateway.complete_chat(
            provider="openai",
            model="gpt-4o",
            messages=[{
                "role": "user",
                "content": f"""Rate this response 1-10 for helpfulness.
                Query: {query}
                Response: {output}
                Return JSON: {{"score": N, "reason": "..."}}"""
            }],
            temperature=0
        )
        scores = json.loads(judge_response.choices[0].message.content)
        judge.update(output=scores)

        abv.score_current_trace(
            name="helpfulness",
            value=scores["score"] / 10,
            data_type="NUMERIC",
            comment=scores["reason"]
        )

    return {"response": output, "helpfulness": scores["score"]}

async function managedPromptPipeline(query: string, userId: string) {
  return startActiveObservation("managed-prompt-pipeline", async () => {
    updateActiveTrace({ userId, tags: ["managed-prompt"] });

    // Fetch managed prompt
    const prompt = await abv.prompt.get("qa-assistant-v2");

    // Compile prompt
    const messages = prompt.compile({ query, context: "General knowledge" });

    // Generate
    const gen = startObservation(
      "managed-generation",
      {
        model: prompt.config?.model ?? "gpt-4o-mini",
        input: messages,
        metadata: { prompt_name: prompt.name, prompt_version: prompt.version },
      },
      { asType: "generation" }
    );

    const response = await abv.gateway.chat.completions.create({
      provider: "openai",
      model: prompt.config?.model ?? "gpt-4o-mini",
      messages,
      temperature: prompt.config?.temperature ?? 0.7,
    });
    const output = response.choices[0].message.content;
    gen.update({ output: { content: output } }).end();

    // Validate JSON if expected
    if (prompt.config?.expects_json) {
      const validation = await abv.guardrails.validators.validJson.validate(
        output,
        { strictMode: true }
      );
      abv.score.activeObservation({
        name: "valid_json",
        value: validation.status === "PASS" ? 1.0 : 0.0,
      });
    }

    // Auto-judge
    const judge = startObservation(
      "auto-judge",
      { input: { query, response: output } },
      { asType: "evaluator" }
    );

    const judgeResponse = await abv.gateway.chat.completions.create({
      provider: "openai",
      model: "gpt-4o",
      messages: [
        {
          role: "user",
          content: `Rate this response 1-10 for helpfulness.
          Query: ${query}
          Response: ${output}
          Return JSON: {"score": N, "reason": "..."}`,
        },
      ],
      temperature: 0,
    });

    const scores = JSON.parse(judgeResponse.choices[0].message.content);
    judge.update({ output: scores }).end();

    abv.score.activeTrace({
      name: "helpfulness",
      value: scores.score / 10,
      comment: scores.reason,
    });

    return { response: output, helpfulness: scores.score };
  });
}

Session-Based Conversation with Memory

Track multi-turn conversations with session context.

Python
JavaScript

from abvdev import ABV, observe
from typing import List, Dict

abv = ABV(api_key="sk-abv-...")

# In-memory store (use Redis/DB in production)
conversation_store: Dict[str, List[Dict]] = {}

@observe()
def chat_with_memory(user_id: str, session_id: str, message: str) -> str:
    abv.update_current_trace(
        user_id=user_id,
        session_id=session_id,
        metadata={"conversation_turn": len(conversation_store.get(session_id, [])) + 1}
    )

    # Get or create conversation history
    if session_id not in conversation_store:
        conversation_store[session_id] = [
            {"role": "system", "content": "You are a helpful assistant. Be concise."}
        ]

    history = conversation_store[session_id]

    # Add user message
    history.append({"role": "user", "content": message})

    # Generate response
    with abv.start_as_current_generation(name="chat-turn", model="gpt-4o-mini") as gen:
        response = abv.gateway.complete_chat(
            provider="openai",
            model="gpt-4o-mini",
            messages=history
        )
        assistant_message = response.choices[0].message.content

        gen.update(
            input=history,
            output=assistant_message,
            metadata={"history_length": len(history)}
        )

    # Store assistant response
    history.append({"role": "assistant", "content": assistant_message})

    # Score conversation metrics
    abv.score_current_trace(
        name="conversation_length",
        value=len(history),
        data_type="NUMERIC"
    )

    return assistant_message

# Multi-turn conversation
chat_with_memory("user-1", "session-abc", "Hi, I'm learning Python")
chat_with_memory("user-1", "session-abc", "What's a good first project?")
chat_with_memory("user-1", "session-abc", "Can you give me more details?")

const conversationStore = new Map<string, any[]>();

async function chatWithMemory(
  userId: string,
  sessionId: string,
  message: string
): Promise<string> {
  return startActiveObservation("chat-with-memory", async () => {
    const turn = (conversationStore.get(sessionId)?.length ?? 0) + 1;

    updateActiveTrace({
      userId,
      sessionId,
      metadata: { conversation_turn: turn },
    });

    // Get or create history
    if (!conversationStore.has(sessionId)) {
      conversationStore.set(sessionId, [
        {
          role: "system",
          content: "You are a helpful assistant. Be concise.",
        },
      ]);
    }

    const history = conversationStore.get(sessionId)!;
    history.push({ role: "user", content: message });

    // Generate
    const gen = startObservation(
      "chat-turn",
      { model: "gpt-4o-mini", input: history },
      { asType: "generation" }
    );

    const response = await abv.gateway.chat.completions.create({
      provider: "openai",
      model: "gpt-4o-mini",
      messages: history,
    });
    const assistantMessage = response.choices[0].message.content;

    gen
      .update({
        output: { content: assistantMessage },
        metadata: { history_length: history.length },
      })
      .end();

    history.push({ role: "assistant", content: assistantMessage });

    abv.score.activeTrace({
      name: "conversation_length",
      value: history.length,
    });

    return assistantMessage;
  });
}

// Multi-turn
await chatWithMemory("user-1", "session-abc", "Hi, I'm learning Python");
await chatWithMemory("user-1", "session-abc", "What's a good first project?");
await chatWithMemory("user-1", "session-abc", "Can you give me more details?");

Quality Gate Pipeline

Block deployment if evaluation scores are below threshold.

Python
JavaScript

from abvdev import ABV, observe
from typing import List

abv = ABV(api_key="sk-abv-...")

@observe()
def quality_gate_evaluation(
    dataset_name: str,
    min_accuracy: float = 0.8,
    min_helpfulness: float = 0.7
) -> dict:
    """Run evaluation and determine if quality gate passes."""

    dataset = abv.get_dataset(dataset_name)
    scores = {"accuracy": [], "helpfulness": []}

    for item in dataset.items:
        with item.run(run_name="quality-gate", run_description="Pre-deploy check") as span:
            # Generate response
            response = abv.gateway.complete_chat(
                provider="openai",
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": item.input["query"]}]
            )
            output = response.choices[0].message.content

            # Check accuracy (exact match or semantic similarity)
            accuracy = 1.0 if item.expected_output.lower() in output.lower() else 0.0
            scores["accuracy"].append(accuracy)
            span.score(name="accuracy", value=accuracy, data_type="NUMERIC")

            # LLM judge for helpfulness
            judge_resp = abv.gateway.complete_chat(
                provider="openai",
                model="gpt-4o",
                messages=[{
                    "role": "user",
                    "content": f"Rate 0-1: Is this helpful? Query: {item.input['query']} Response: {output}. Reply with just a number."
                }],
                temperature=0
            )
            helpfulness = float(judge_resp.choices[0].message.content.strip())
            scores["helpfulness"].append(helpfulness)
            span.score(name="helpfulness", value=helpfulness, data_type="NUMERIC")

    # Calculate averages
    avg_accuracy = sum(scores["accuracy"]) / len(scores["accuracy"])
    avg_helpfulness = sum(scores["helpfulness"]) / len(scores["helpfulness"])

    # Determine gate status
    gate_passed = avg_accuracy >= min_accuracy and avg_helpfulness >= min_helpfulness

    # Score the overall run
    abv.score_current_trace(name="avg_accuracy", value=avg_accuracy, data_type="NUMERIC")
    abv.score_current_trace(name="avg_helpfulness", value=avg_helpfulness, data_type="NUMERIC")
    abv.score_current_trace(name="gate_passed", value=1.0 if gate_passed else 0.0, data_type="BOOLEAN")

    return {
        "gate_passed": gate_passed,
        "avg_accuracy": avg_accuracy,
        "avg_helpfulness": avg_helpfulness,
        "thresholds": {"accuracy": min_accuracy, "helpfulness": min_helpfulness}
    }

# Run in CI/CD
result = quality_gate_evaluation("production-test-cases", min_accuracy=0.85)
if not result["gate_passed"]:
    raise Exception(f"Quality gate failed: {result}")

async function qualityGateEvaluation(
  datasetName: string,
  minAccuracy: number = 0.8,
  minHelpfulness: number = 0.7
) {
  return startActiveObservation("quality-gate-evaluation", async () => {
    const dataset = await abv.dataset.get(datasetName);
    const scores = { accuracy: [] as number[], helpfulness: [] as number[] };

    for (const item of dataset.items) {
      await startActiveObservation(`eval-${item.id}`, async (span) => {
        const response = await abv.gateway.chat.completions.create({
          provider: "openai",
          model: "gpt-4o-mini",
          messages: [{ role: "user", content: item.input.query }],
        });
        const output = response.choices[0].message.content;

        // Accuracy check
        const accuracy = item.expectedOutput
          .toLowerCase()
          .includes(output.toLowerCase())
          ? 1.0
          : 0.0;
        scores.accuracy.push(accuracy);
        abv.score.activeObservation({ name: "accuracy", value: accuracy });

        // LLM judge
        const judgeResp = await abv.gateway.chat.completions.create({
          provider: "openai",
          model: "gpt-4o",
          messages: [
            {
              role: "user",
              content: `Rate 0-1: Is this helpful? Query: ${item.input.query} Response: ${output}. Reply with just a number.`,
            },
          ],
          temperature: 0,
        });
        const helpfulness = parseFloat(
          judgeResp.choices[0].message.content.trim()
        );
        scores.helpfulness.push(helpfulness);
        abv.score.activeObservation({
          name: "helpfulness",
          value: helpfulness,
        });

        await item.link({ otelSpan: span }, "quality-gate", {
          description: "Pre-deploy check",
        });
      });
    }

    const avgAccuracy =
      scores.accuracy.reduce((a, b) => a + b, 0) / scores.accuracy.length;
    const avgHelpfulness =
      scores.helpfulness.reduce((a, b) => a + b, 0) /
      scores.helpfulness.length;
    const gatePassed =
      avgAccuracy >= minAccuracy && avgHelpfulness >= minHelpfulness;

    abv.score.activeTrace({ name: "avg_accuracy", value: avgAccuracy });
    abv.score.activeTrace({ name: "avg_helpfulness", value: avgHelpfulness });
    abv.score.activeTrace({ name: "gate_passed", value: gatePassed ? 1.0 : 0.0 });

    return {
      gatePassed,
      avgAccuracy,
      avgHelpfulness,
      thresholds: { accuracy: minAccuracy, helpfulness: minHelpfulness },
    };
  });
}

// In CI/CD
const result = await qualityGateEvaluation("production-test-cases", 0.85);
if (!result.gatePassed) {
  throw new Error(`Quality gate failed: ${JSON.stringify(result)}`);
}

Production Patterns

Next: Chatbots, RAG, and agents

Tracing Basics

Back: Core tracing patterns

Getting Started

Basic Features

LLM Gateway

Guardrails

Evaluations

Prompt Management

Cookbook

SDKs

Platform

Support

Validated Chat Pipeline

Cost-Tracked Generation with Fallback

Prompt + Guardrail + Evaluation

Session-Based Conversation with Memory

Quality Gate Pipeline

Production Patterns

Tracing Basics

Getting Started

Basic Features

LLM Gateway

Guardrails

Evaluations

Prompt Management

Cookbook

SDKs

Platform

Support

​Validated Chat Pipeline

​Cost-Tracked Generation with Fallback

​Prompt + Guardrail + Evaluation

​Session-Based Conversation with Memory

​Quality Gate Pipeline

Production Patterns

Tracing Basics

Validated Chat Pipeline

Cost-Tracked Generation with Fallback

Prompt + Guardrail + Evaluation

Session-Based Conversation with Memory

Quality Gate Pipeline