Agent Integration: Advanced Patterns

For: Developers building LLM-powered workflows
Level: Advanced
Time to read: 35 minutes
Patterns: 8+ production patterns

This guide covers advanced LLM integration, ensuring determinism, optimizing costs, and handling complex agent patterns.

Agent Architecture Review

Agent as Activity Pattern


┌─────────────────────────────────────┐
│ Workflow                            │
└──────────────┬──────────────────────┘
               │
        ┌──────▼──────┐
        │ Task State  │
        └──────┬──────┘
               │
        ┌──────▼──────┐
        │   Activity  │
        │   (Agent)   │ ← Deterministic
        │             │   Retryable
        │ • Calls LLM │   Idempotent
        │ • Processes │
        │ • Returns   │
        └──────┬──────┘
               │
        ┌──────▼──────┐
        │   Result    │
        └─────────────┘

Why Activities?

Durable: Retryable without replay
Deterministic: Same input = same output
Idempotent: Safe to retry
Testable: Isolated from workflow

Determinism Strategies

Problem: Non-Deterministic LLM


// ❌ Problem: Non-deterministic output
func AnalyzeDocument(ctx context.Context, input *AnalysisInput) (*AnalysisOutput, error) {
    agent := agents.New("gpt-4")
    
    response, _ := agent.Execute(ctx, agents.Request{
        Prompt: input.DocumentContent,
        // No temperature control
    })
    
    return &AnalysisOutput{Result: response}, nil
}
 
// Issue: Retry produces different result → Workflow replay fails

Solution 1: Temperature Control


// ✅ Solution: Low temperature = deterministic
func AnalyzeDocument(ctx context.Context, input *AnalysisInput) (*AnalysisOutput, error) {
    agent := agents.New("gpt-4")
    
    response, _ := agent.Execute(ctx, agents.Request{
        Prompt: input.DocumentContent,
        Temperature: 0.1,          // Deterministic (close to 0)
        TopP: 1.0,                 // No sampling variance
        MaxTokens: 500,
    })
    
    return &AnalysisOutput{Result: response}, nil
}
 
// Behavior: Same input always produces same output (safe for retries)

Solution 2: Structured Output


// ✅ Use structured output (JSON schema)
func ExtractEntities(ctx context.Context, input *ExtractInput) (*Entities, error) {
    agent := agents.New("gpt-4")
    
    response, _ := agent.Execute(ctx, agents.Request{
        Prompt: input.Document,
        ResponseFormat: agents.ResponseFormatJSON,
        ResponseSchema: `{
            "type": "object",
            "properties": {
                "entities": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "name": {"type": "string"},
                            "type": {"type": "string", "enum": ["person", "org", "place"]},
                            "confidence": {"type": "number"}
                        }
                    }
                }
            }
        }`,
        Temperature: 0.1,
    })
    
    // Parse JSON → deterministic structure
    var entities Entities
    json.Unmarshal(response, &entities)
    
    return &entities, nil
}

Solution 3: Seed Control


// ✅ Use fixed seed for reproducibility
func AnalyzeSentiment(ctx context.Context, input *SentimentInput) (*SentimentOutput, error) {
    agent := agents.New("gpt-4")
    
    response, _ := agent.Execute(ctx, agents.Request{
        Prompt: input.Text,
        Temperature: 0.1,
        Seed: 42,  // Fixed seed = reproducible
        MaxTokens: 100,
    })
    
    return &SentimentOutput{Sentiment: response}, nil
}
 
// Same input + seed = deterministic output

Cost Optimization

Cost-Aware Agent Design


type AgentRequest struct {
    MaxCost    float64  // Budget in USD
    Model      string   // gpt-4, gpt-3.5-turbo
    MaxTokens  int
}
 
func SelectModel(cost float64) string {
    // Budget < $0.001 → use gpt-3.5-turbo ($0.0005 per 1K tokens)
    // Budget >= $0.001 → use gpt-4 ($0.003 per 1K tokens)
    
    if cost < 0.001 {
        return "gpt-3.5-turbo"
    }
    return "gpt-4"
}
 
func ProcessDocument(ctx context.Context, input *DocumentInput) (*Result, error) {
    budget := 0.005  // $0.005 budget
    model := SelectModel(budget)
    
    agent := agents.New(model)
    response, cost, _ := agent.Execute(ctx, agents.Request{
        Prompt: input.Content,
        MaxTokens: 500,
    })
    
    return &Result{
        Output: response,
        CostUSD: cost,
        EstimatedBudget: budget,
    }, nil
}

Token Optimization


// ❌ Bad: Wasteful prompt
prompt := `
You are an expert at analyzing documents. Please analyze the following document 
in great detail, considering all aspects, including the background, the context, 
the purpose, the audience, and the key insights. Here is the document:
 
${document}
`
 
// ✅ Good: Concise prompt
prompt := fmt.Sprintf(`Analyze document: %s
 
Output format:
- Summary: [1 sentence]
- Key points: [3 bullets]
- Action: [1 item]`, document)
 
// Token usage comparison:
// Bad: ~500 tokens
// Good: ~100 tokens
// Savings: 80% cost reduction

Caching Strategy


func GetAgentResponse(ctx context.Context, query string) (string, error) {
    cache := cache.FromContext(ctx)
    
    // Check cache first (free!)
    cacheKey := fmt.Sprintf("agent:%s", hashQuery(query))
    if cached, _ := cache.Get(ctx, cacheKey); cached != nil {
        return string(cached), nil
    }
    
    // Cache miss: call agent ($0.001 cost)
    agent := agents.New("gpt-4")
    response, _ := agent.Execute(ctx, agents.Request{Prompt: query})
    
    // Cache for 24 hours
    cache.Set(ctx, cacheKey, []byte(response), 24*time.Hour)
    
    return response, nil
}
 
// Savings: ~90% of agent calls avoided via caching

Multi-Agent Patterns

Sequential Agents (Specialized)


# Workflow: Document Processing
states:
  - name: ExtractText
    type: Task
    resource: urn:cascade:activity:extract_agent
    # Agent: Simple extraction
    next: AnalyzeContent
  
  - name: AnalyzeContent
    type: Task
    resource: urn:cascade:activity:analysis_agent
    # Agent: Deep analysis
    next: GenerateSummary
  
  - name: GenerateSummary
    type: Task
    resource: urn:cascade:activity:summary_agent
    # Agent: Summary generation
    end: true

Parallel Agents (Multi-Perspective)


states:
  - name: AnalyzeMultiPerspective
    type: Parallel
    branches:
      - name: LegalAnalysis
        type: Task
        resource: urn:cascade:activity:legal_agent
        result: $.legal
      
      - name: TechnicalAnalysis
        type: Task
        resource: urn:cascade:activity:technical_agent
        result: $.technical
      
      - name: BusinessAnalysis
        type: Task
        resource: urn:cascade:activity:business_agent
        result: $.business
    
    completion_strategy: ALL
    next: SynthesizeResults

Fallback Pattern


// Primary agent fails → fallback to secondary
func AnalyzeWithFallback(ctx context.Context, input *AnalysisInput) (*AnalysisOutput, error) {
    // Try primary agent (better, more expensive)
    primaryAgent := agents.New("gpt-4")
    result, err := primaryAgent.Execute(ctx, agents.Request{Prompt: input.Content})
    
    if err == nil {
        return &AnalysisOutput{Result: result}, nil
    }
    
    // Fallback to secondary agent (faster, cheaper)
    secondaryAgent := agents.New("gpt-3.5-turbo")
    result, _ := secondaryAgent.Execute(ctx, agents.Request{Prompt: input.Content})
    
    return &AnalysisOutput{
        Result: result,
        Fallback: true,
    }, nil
}

Memory Management

Conversation Memory


type ConversationMemory struct {
    ExecutionID string
    Messages    []*Message
}
 
func ChatWithMemory(ctx context.Context, userMessage string) (string, error) {
    memory := getMemory(ctx)
    
    // Append user message
    memory.Messages = append(memory.Messages, &Message{
        Role: "user",
        Content: userMessage,
    })
    
    // Send to agent with full history
    agent := agents.New("gpt-4")
    response, _ := agent.Execute(ctx, agents.Request{
        Messages: memory.Messages,
    })
    
    // Store agent response
    memory.Messages = append(memory.Messages, &Message{
        Role: "assistant",
        Content: response,
    })
    
    // Persist memory (for next execution)
    saveMemory(ctx, memory)
    
    return response, nil
}

Memory Pruning


// Truncate old messages to manage context size
func PruneMemory(memory *ConversationMemory, maxMessages int) {
    if len(memory.Messages) > maxMessages {
        // Keep system prompt + last N messages
        keep := []Message{memory.Messages[0]}  // System
        keep = append(keep, memory.Messages[len(memory.Messages)-maxMessages:]...)
        memory.Messages = keep
    }
}
 
// Before agent call:
PruneMemory(memory, 10)  // Keep system + last 10 messages

Error Handling

Validation


func ValidateAgentOutput(response string, schema interface{}) error {
    // 1. Check format
    if !isValidJSON(response) {
        return fmt.Errorf("invalid JSON format")
    }
    
    // 2. Validate against schema
    if err := validateSchema(response, schema); err != nil {
        return fmt.Errorf("schema validation failed: %w", err)
    }
    
    // 3. Semantic checks
    data := parseJSON(response)
    if data.Score < 0 || data.Score > 100 {
        return fmt.Errorf("score out of range")
    }
    
    return nil
}

Retry Logic


func CallAgentWithRetry(ctx context.Context, req *Request) (string, error) {
    retries := 3
    backoff := 1 * time.Second
    
    for i := 0; i < retries; i++ {
        response, err := agent.Execute(ctx, req)
        
        if err == nil && isValidResponse(response) {
            return response, nil
        }
        
        if i < retries-1 {
            time.Sleep(backoff)
            backoff *= 2
        }
    }
    
    return "", fmt.Errorf("agent call failed after retries")
}

Monitoring Agents

Agent Metrics


cascade_agent_calls_total{model="gpt-4", status="success"}
cascade_agent_cost_usd{model="gpt-4", workflow="ProcessDocument"}
cascade_agent_latency_seconds{model="gpt-4"}
cascade_agent_tokens_used{model="gpt-4", type="prompt"}
cascade_agent_validation_errors{workflow="Analysis"}

Cost Tracking


func TrackAgentCost(ctx context.Context, model string, tokens int) {
    // Cost per model (as of 2024)
    costPerMillion := map[string]float64{
        "gpt-4": 0.03,
        "gpt-3.5-turbo": 0.0005,
    }
    
    cost := (float64(tokens) / 1_000_000) * costPerMillion[model]
    
    metrics.RecordAgentCost(model, cost)
    
    // Track per workflow
    workflowID := workflow.GetID(ctx)
    logger.Info("Agent cost", 
        zap.String("workflow", workflowID),
        zap.String("model", model),
        zap.Float64("cost_usd", cost),
    )
}

Best Practices

✅ DO:

Use low temperature for determinism
Structure outputs
Implement cost budgets
Cache responses
Validate outputs
Monitor costs
Use fallbacks
Log all calls

❌ DON’T:

Use high temperature
Ignore costs
Retry without tracking
Store sensitive data in memory
Forget error handling
Skip testing

Updated: October 29, 2025
Version: 1.0
Patterns: 8+ production patterns