Agent Integration: Advanced Patterns
For: Developers building LLM-powered workflows
Level: Advanced
Time to read: 35 minutes
Patterns: 8+ production patterns
This guide covers advanced LLM integration, ensuring determinism, optimizing costs, and handling complex agent patterns.
Agent Architecture Review
Agent as Activity Pattern
┌─────────────────────────────────────┐
│ Workflow │
└──────────────┬──────────────────────┘
│
┌──────▼──────┐
│ Task State │
└──────┬──────┘
│
┌──────▼──────┐
│ Activity │
│ (Agent) │ ← Deterministic
│ │ Retryable
│ • Calls LLM │ Idempotent
│ • Processes │
│ • Returns │
└──────┬──────┘
│
┌──────▼──────┐
│ Result │
└─────────────┘Why Activities?
- Durable: Retryable without replay
- Deterministic: Same input = same output
- Idempotent: Safe to retry
- Testable: Isolated from workflow
Determinism Strategies
Problem: Non-Deterministic LLM
// ❌ Problem: Non-deterministic output
func AnalyzeDocument(ctx context.Context, input *AnalysisInput) (*AnalysisOutput, error) {
agent := agents.New("gpt-4")
response, _ := agent.Execute(ctx, agents.Request{
Prompt: input.DocumentContent,
// No temperature control
})
return &AnalysisOutput{Result: response}, nil
}
// Issue: Retry produces different result → Workflow replay failsSolution 1: Temperature Control
// ✅ Solution: Low temperature = deterministic
func AnalyzeDocument(ctx context.Context, input *AnalysisInput) (*AnalysisOutput, error) {
agent := agents.New("gpt-4")
response, _ := agent.Execute(ctx, agents.Request{
Prompt: input.DocumentContent,
Temperature: 0.1, // Deterministic (close to 0)
TopP: 1.0, // No sampling variance
MaxTokens: 500,
})
return &AnalysisOutput{Result: response}, nil
}
// Behavior: Same input always produces same output (safe for retries)Solution 2: Structured Output
// ✅ Use structured output (JSON schema)
func ExtractEntities(ctx context.Context, input *ExtractInput) (*Entities, error) {
agent := agents.New("gpt-4")
response, _ := agent.Execute(ctx, agents.Request{
Prompt: input.Document,
ResponseFormat: agents.ResponseFormatJSON,
ResponseSchema: `{
"type": "object",
"properties": {
"entities": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"type": {"type": "string", "enum": ["person", "org", "place"]},
"confidence": {"type": "number"}
}
}
}
}
}`,
Temperature: 0.1,
})
// Parse JSON → deterministic structure
var entities Entities
json.Unmarshal(response, &entities)
return &entities, nil
}Solution 3: Seed Control
// ✅ Use fixed seed for reproducibility
func AnalyzeSentiment(ctx context.Context, input *SentimentInput) (*SentimentOutput, error) {
agent := agents.New("gpt-4")
response, _ := agent.Execute(ctx, agents.Request{
Prompt: input.Text,
Temperature: 0.1,
Seed: 42, // Fixed seed = reproducible
MaxTokens: 100,
})
return &SentimentOutput{Sentiment: response}, nil
}
// Same input + seed = deterministic outputCost Optimization
Cost-Aware Agent Design
type AgentRequest struct {
MaxCost float64 // Budget in USD
Model string // gpt-4, gpt-3.5-turbo
MaxTokens int
}
func SelectModel(cost float64) string {
// Budget < $0.001 → use gpt-3.5-turbo ($0.0005 per 1K tokens)
// Budget >= $0.001 → use gpt-4 ($0.003 per 1K tokens)
if cost < 0.001 {
return "gpt-3.5-turbo"
}
return "gpt-4"
}
func ProcessDocument(ctx context.Context, input *DocumentInput) (*Result, error) {
budget := 0.005 // $0.005 budget
model := SelectModel(budget)
agent := agents.New(model)
response, cost, _ := agent.Execute(ctx, agents.Request{
Prompt: input.Content,
MaxTokens: 500,
})
return &Result{
Output: response,
CostUSD: cost,
EstimatedBudget: budget,
}, nil
}Token Optimization
// ❌ Bad: Wasteful prompt
prompt := `
You are an expert at analyzing documents. Please analyze the following document
in great detail, considering all aspects, including the background, the context,
the purpose, the audience, and the key insights. Here is the document:
${document}
`
// ✅ Good: Concise prompt
prompt := fmt.Sprintf(`Analyze document: %s
Output format:
- Summary: [1 sentence]
- Key points: [3 bullets]
- Action: [1 item]`, document)
// Token usage comparison:
// Bad: ~500 tokens
// Good: ~100 tokens
// Savings: 80% cost reductionCaching Strategy
func GetAgentResponse(ctx context.Context, query string) (string, error) {
cache := cache.FromContext(ctx)
// Check cache first (free!)
cacheKey := fmt.Sprintf("agent:%s", hashQuery(query))
if cached, _ := cache.Get(ctx, cacheKey); cached != nil {
return string(cached), nil
}
// Cache miss: call agent ($0.001 cost)
agent := agents.New("gpt-4")
response, _ := agent.Execute(ctx, agents.Request{Prompt: query})
// Cache for 24 hours
cache.Set(ctx, cacheKey, []byte(response), 24*time.Hour)
return response, nil
}
// Savings: ~90% of agent calls avoided via cachingMulti-Agent Patterns
Sequential Agents (Specialized)
# Workflow: Document Processing
states:
- name: ExtractText
type: Task
resource: urn:cascade:activity:extract_agent
# Agent: Simple extraction
next: AnalyzeContent
- name: AnalyzeContent
type: Task
resource: urn:cascade:activity:analysis_agent
# Agent: Deep analysis
next: GenerateSummary
- name: GenerateSummary
type: Task
resource: urn:cascade:activity:summary_agent
# Agent: Summary generation
end: trueParallel Agents (Multi-Perspective)
states:
- name: AnalyzeMultiPerspective
type: Parallel
branches:
- name: LegalAnalysis
type: Task
resource: urn:cascade:activity:legal_agent
result: $.legal
- name: TechnicalAnalysis
type: Task
resource: urn:cascade:activity:technical_agent
result: $.technical
- name: BusinessAnalysis
type: Task
resource: urn:cascade:activity:business_agent
result: $.business
completion_strategy: ALL
next: SynthesizeResultsFallback Pattern
// Primary agent fails → fallback to secondary
func AnalyzeWithFallback(ctx context.Context, input *AnalysisInput) (*AnalysisOutput, error) {
// Try primary agent (better, more expensive)
primaryAgent := agents.New("gpt-4")
result, err := primaryAgent.Execute(ctx, agents.Request{Prompt: input.Content})
if err == nil {
return &AnalysisOutput{Result: result}, nil
}
// Fallback to secondary agent (faster, cheaper)
secondaryAgent := agents.New("gpt-3.5-turbo")
result, _ := secondaryAgent.Execute(ctx, agents.Request{Prompt: input.Content})
return &AnalysisOutput{
Result: result,
Fallback: true,
}, nil
}Memory Management
Conversation Memory
type ConversationMemory struct {
ExecutionID string
Messages []*Message
}
func ChatWithMemory(ctx context.Context, userMessage string) (string, error) {
memory := getMemory(ctx)
// Append user message
memory.Messages = append(memory.Messages, &Message{
Role: "user",
Content: userMessage,
})
// Send to agent with full history
agent := agents.New("gpt-4")
response, _ := agent.Execute(ctx, agents.Request{
Messages: memory.Messages,
})
// Store agent response
memory.Messages = append(memory.Messages, &Message{
Role: "assistant",
Content: response,
})
// Persist memory (for next execution)
saveMemory(ctx, memory)
return response, nil
}Memory Pruning
// Truncate old messages to manage context size
func PruneMemory(memory *ConversationMemory, maxMessages int) {
if len(memory.Messages) > maxMessages {
// Keep system prompt + last N messages
keep := []Message{memory.Messages[0]} // System
keep = append(keep, memory.Messages[len(memory.Messages)-maxMessages:]...)
memory.Messages = keep
}
}
// Before agent call:
PruneMemory(memory, 10) // Keep system + last 10 messagesError Handling
Validation
func ValidateAgentOutput(response string, schema interface{}) error {
// 1. Check format
if !isValidJSON(response) {
return fmt.Errorf("invalid JSON format")
}
// 2. Validate against schema
if err := validateSchema(response, schema); err != nil {
return fmt.Errorf("schema validation failed: %w", err)
}
// 3. Semantic checks
data := parseJSON(response)
if data.Score < 0 || data.Score > 100 {
return fmt.Errorf("score out of range")
}
return nil
}Retry Logic
func CallAgentWithRetry(ctx context.Context, req *Request) (string, error) {
retries := 3
backoff := 1 * time.Second
for i := 0; i < retries; i++ {
response, err := agent.Execute(ctx, req)
if err == nil && isValidResponse(response) {
return response, nil
}
if i < retries-1 {
time.Sleep(backoff)
backoff *= 2
}
}
return "", fmt.Errorf("agent call failed after retries")
}Monitoring Agents
Agent Metrics
cascade_agent_calls_total{model="gpt-4", status="success"}
cascade_agent_cost_usd{model="gpt-4", workflow="ProcessDocument"}
cascade_agent_latency_seconds{model="gpt-4"}
cascade_agent_tokens_used{model="gpt-4", type="prompt"}
cascade_agent_validation_errors{workflow="Analysis"}Cost Tracking
func TrackAgentCost(ctx context.Context, model string, tokens int) {
// Cost per model (as of 2024)
costPerMillion := map[string]float64{
"gpt-4": 0.03,
"gpt-3.5-turbo": 0.0005,
}
cost := (float64(tokens) / 1_000_000) * costPerMillion[model]
metrics.RecordAgentCost(model, cost)
// Track per workflow
workflowID := workflow.GetID(ctx)
logger.Info("Agent cost",
zap.String("workflow", workflowID),
zap.String("model", model),
zap.Float64("cost_usd", cost),
)
}Best Practices
✅ DO:
- Use low temperature for determinism
- Structure outputs
- Implement cost budgets
- Cache responses
- Validate outputs
- Monitor costs
- Use fallbacks
- Log all calls
❌ DON’T:
- Use high temperature
- Ignore costs
- Retry without tracking
- Store sensitive data in memory
- Forget error handling
- Skip testing
Updated: October 29, 2025
Version: 1.0
Patterns: 8+ production patterns
Last updated on