Performance Tuning Guide

For: Developers and operators optimizing Cascade deployments
Level: Advanced
Time to read: 35 minutes
Real-world examples: 10+

This guide covers optimization techniques for workflows, activities, and infrastructure to achieve production-grade performance.

Performance Fundamentals

The Performance Pyramid


        Scalability
       ╱           ╲
      ╱  Throughput ╲
     ╱───────────────╲
    ╱   Latency       ╲
   ╱─────────────────╲
  ╱  Resource Usage  ╲
 ╱───────────────────╲
Code Quality & Design

Layers (bottom-up):

Code Quality - Foundation: algorithms, data structures
Resource Usage - Memory, CPU, network efficiency
Latency - Single workflow execution time
Throughput - Workflows per second
Scalability - How throughput scales with resources

Activity Optimization

Profile Activities


# View activity performance metrics
cascade metrics activity {activity_name} \
  --period 24h \
  --format json
 
# Output
{
  "name": "validate_order",
  "executions": 15234,
  "avg_duration": "12ms",
  "p50": "8ms",
  "p95": "45ms",
  "p99": "200ms",
  "error_rate": "0.02%"
}

Latency Breakdown


// Instrument activity to find bottlenecks
func ProcessOrder(ctx context.Context, input *OrderInput) (*OrderOutput, error) {
    logger := activity.GetLogger(ctx)
    
    // Database query
    start := time.Now()
    customer, err := db.GetCustomer(ctx, input.CustomerID)
    logger.Info("DB query", "duration_ms", time.Since(start).Milliseconds())
    // Expected: `<10ms`
    
    // External API call
    start = time.Now()
    result, err := externalAPI.Validate(ctx, customer)
    logger.Info("API call", "duration_ms", time.Since(start).Milliseconds())
    // Expected: `<50ms` (external latency)
    
    // Processing
    start = time.Now()
    output := computeResult(customer, result)
    logger.Info("Processing", "duration_ms", time.Since(start).Milliseconds())
    // Expected: `<5ms`
    
    return output, nil
}

Common Optimizations

1. Connection Pooling


// ❌ Bad: New connection per call
db, _ := sql.Open("postgres", connString)
defer db.Close()
row := db.QueryRow(...)
 
// ✅ Good: Reuse connection pool
var db *sql.DB  // Package-level, reused
 
func init() {
    db, _ = sql.Open("postgres", connString)
    db.SetMaxOpenConns(10)
    db.SetMaxIdleConns(5)
}
 
func Activity(ctx context.Context, input *Input) (*Output, error) {
    row := db.QueryRowContext(ctx, ...)
}

2. Database Query Optimization


// ❌ Bad: N+1 queries
orders := getOrders()
for _, order := range orders {
    items := getItems(order.ID)  // Called N times!
    process(order, items)
}
 
// ✅ Good: Single batch query
orders := getOrders()
itemMap := getItemsForOrders(orderIDs)  // One query
for _, order := range orders {
    items := itemMap[order.ID]
    process(order, items)
}

3. Caching Frequently Accessed Data


func GetProduct(ctx context.Context, input *GetProductInput) (*Product, error) {
    cache := cache.FromContext(ctx)
    cacheKey := fmt.Sprintf("product:%s", input.SKU)
    
    // Try cache first (sub-1ms)
    if cached, _ := cache.Get(ctx, cacheKey); cached != nil {
        var product Product
        json.Unmarshal(cached, &product)
        return &product, nil
    }
    
    // Cache miss: fetch from DB (50ms typical)
    product := db.GetProduct(ctx, input.SKU)
    
    // Cache for 5 minutes
    data, _ := json.Marshal(product)
    cache.Set(ctx, cacheKey, data, 5*time.Minute)
    
    return product, nil
}

Workflow Optimization

Reduce Workflow Duration


# ❌ Bad: Sequential activities (total: 30s)
states:
  - name: VerifyIdentity
    type: Task
    timeout: 10s
    next: CheckCredit
  
  - name: CheckCredit
    type: Task
    timeout: 10s
    next: ValidateIncome
  
  - name: ValidateIncome
    type: Task
    timeout: 10s
    end: true
 
# Total workflow duration: ~30s (sequential)


# ✅ Good: Parallel activities (total: 10s)
states:
  - name: ParallelVerification
    type: Parallel
    branches:
      - name: VerifyIdentity
        type: Task
        timeout: 10s
        result: $.identity
      
      - name: CheckCredit
        type: Task
        timeout: 10s
        result: $.credit
      
      - name: ValidateIncome
        type: Task
        timeout: 10s
        result: $.income
    
    completion_strategy: ALL
    next: CombineResults
 
  - name: CombineResults
    type: Task
    end: true
 
# Total workflow duration: ~10s (parallel)

Early Validation


# ❌ Bad: Expensive work after invalid input
states:
  - name: ProcessOrder
    type: Task      # Calls API, updates DB, sends email
    timeout: 30s
    next: Done
 
# ✅ Good: Validate first
states:
  - name: ValidateInput
    type: Task
    timeout: 1s     # Quick validation
    next: CheckValidation
  
  - name: CheckValidation
    type: Choice
    choices:
      - condition: "{{ $.valid == false }}"
        next: RejectOrder
    default: ProcessOrder
  
  - name: ProcessOrder
    type: Task      # Only reached if valid
    timeout: 30s
    next: Done
  
  - name: RejectOrder
    type: Task
    end: true

Minimize Context Data


# ❌ Bad: Large payloads (10MB context)
parameters:
  large_dataset: "{{ $.full_customer_history }}"  # 5MB
  all_products: "{{ $.product_catalog }}"         # 4MB
  extra_info: "{{ $.metadata }}"                  # 1MB
 
# ✅ Good: Only necessary fields
parameters:
  customer_id: "{{ $.customer_id }}"
  product_ids: "{{ $.cart_items[*].id }}"
  flags: "{{ $.flags }}"

Database Performance

Indexing Strategy


-- Identify slow queries
SELECT query, calls, total_time, mean_time
FROM pg_stat_statements
WHERE mean_time > 10  -- Queries `>10ms`
ORDER BY total_time DESC;
 
-- Add indexes for common queries
CREATE INDEX idx_orders_customer_id ON orders(customer_id);
CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_created_at ON orders(created_at DESC);

Query Optimization


// ❌ Bad: N queries for N orders
orders := getOrders()
for _, order := range orders {
    items := db.Query("SELECT * FROM items WHERE order_id = ?", order.ID)
    process(order, items)
}
 
// ✅ Good: Single query with JOIN
query := `
  SELECT o.*, i.* FROM orders o
  LEFT JOIN items i ON o.id = i.order_id
  WHERE o.status = 'pending'
  ORDER BY o.created_at DESC
  LIMIT ?
`
rows := db.Query(query, limit)
// Process in single loop

Connection Pool Tuning


# cascade.yaml
database:
  max_connections: 20           # Adjust based on concurrency
  idle_timeout: 30s             # Close idle connections
  max_lifetime: 5m              # Recycle long-lived connections
  validation_interval: 30s      # Validate idle connections

Caching Strategy

Cache Layers


Request → L1 Cache (Memory) → L2 Cache (Redis) → Database
           (`<1ms)              (1-5ms)           (50ms`+)

Cache Tiers:


// L1: Local memory cache
localCache := cache.NewInMemory()
 
// L2: Distributed Redis cache
redisCache := cache.NewRedis(redisClient)
 
// L3: Database
db := database.FromContext(ctx)
 
func GetProduct(ctx context.Context, sku string) (*Product, error) {
    // L1
    if p, err := localCache.Get(sku); err == nil {
        return p, nil
    }
    
    // L2
    if p, err := redisCache.Get(ctx, sku); err == nil {
        localCache.Set(sku, p)
        return p, nil
    }
    
    // L3
    p := db.GetProduct(sku)
    redisCache.Set(ctx, sku, p, 1*time.Hour)
    localCache.Set(sku, p)
    return p, nil
}

Cache Invalidation


// Invalidate on write
func UpdateProduct(ctx context.Context, product *Product) error {
    err := db.Update(product)
    if err == nil {
        cache.Delete(ctx, fmt.Sprintf("product:%s", product.SKU))
    }
    return err
}
 
// TTL-based expiration (preferred)
cache.Set(ctx, key, value, 5*time.Minute)  // Auto-expire

Infrastructure Optimization

Resource Limits


platform:
  resources:
    requests:
      cpu: 500m       # Guaranteed
      memory: 512Mi
    limits:
      cpu: 2000m      # Maximum
      memory: 2Gi

Activity Worker Tuning


temporal:
  activity_workers: 10           # Parallel activities
  max_concurrent_activities: 100
  activity_heartbeat_timeout: 30s

Database Optimization


-- Monitor connection usage
SELECT count(*) FROM pg_stat_activity;
 
-- Monitor cache hit ratio (target: `>99%`)
SELECT
  heap_blks_hit / (heap_blks_hit + heap_blks_read) AS cache_hit_ratio
FROM pg_statio_user_tables;

Benchmarking

Benchmark Activity Performance


func BenchmarkValidateOrder(b *testing.B) {
    db := setupBenchmarkDB()
    input := &ValidateOrderInput{OrderID: "test-123"}
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        ValidateOrder(context.Background(), input)
    }
    
    // Run: go test -bench=BenchmarkValidateOrder -benchmem
    // Output: BenchmarkValidateOrder-8    10000    123456 ns/op    1024 B/op    15 allocs/op
}

Load Testing


# Test 1000 workflows in parallel
for i in {1..1000}; do
  cascade process start \
    --app myapp \
    --workflow TestWorkflow \
    --input "{\"test_id\":\"$i\"}" &
done
wait
 
# Measure: response time, throughput, errors
cascade metrics workflow \
  --period 1m \
  --percentiles p50,p95,p99

Performance Targets

Component	Target	Notes
Activity execution	`<100ms`	Typical business logic
Workflow latency	`<5s`	Simple workflows
Database query	`<50ms`	Well-indexed
API calls	`<500ms`	External dependency
Cache hit	`<1ms`	L1 memory
Throughput	100+ workflows/sec	Per pod

Best Practices

✅ DO:

Profile before optimizing
Cache aggressively
Use parallel execution
Index database queries
Monitor continuously
Test at scale

❌ DON’T:

Optimize prematurely
Cache without TTL
Create massive contexts
Trust external APIs
Skip load testing

Updated: October 29, 2025
Version: 1.0
Examples: 10+ real-world optimizations