Published on

Distributed Caching Strategies for High-Scale Systems

Authors

Caching is the cornerstone of high-performance distributed systems. While ORM-level caching provides basic performance improvements, architecting caching solutions for systems handling millions of requests requires sophisticated strategies that balance performance, consistency, and operational complexity.

Multi-Tier Caching Architecture

Modern systems employ multiple caching layers, each optimized for specific access patterns:

@Configuration
public class CachingArchitecture {
    
    // L1: Application-level cache (microseconds)
    @Bean
    public CaffeineCacheManager applicationCache() {
        CaffeineCacheManager cacheManager = new CaffeineCacheManager();
        cacheManager.setCaffeine(Caffeine.newBuilder()
            .maximumSize(10_000)
            .expireAfterWrite(Duration.ofMinutes(5))
            .recordStats());
        return cacheManager;
    }
    
    // L2: Distributed cache (milliseconds)
    @Bean
    public RedisTemplate<String, Object> distributedCache() {
        RedisTemplate<String, Object> template = new RedisTemplate<>();
        template.setConnectionFactory(jedisConnectionFactory());
        template.setDefaultSerializer(new GenericJackson2JsonRedisSerializer());
        return template;
    }
    
    // L3: CDN/Edge cache (global distribution)
    @Bean
    public CloudFrontConfiguration edgeCache() {
        return CloudFrontConfiguration.builder()
            .distributionId("E1QXVPK2EXAMPLE")
            .defaultTTL(Duration.ofHours(24))
            .build();
    }
}

// Implementing cache-aside pattern with fallback
@Service
public class TieredCacheService {
    @Autowired private CacheManager l1Cache;
    @Autowired private RedisTemplate<String, Object> l2Cache;
    @Autowired private DataRepository dataRepo;
    @Autowired private MeterRegistry metrics;
    
    public Product getProduct(String productId) {
        // L1 Cache check
        Product product = l1Cache.getCache("products").get(productId, Product.class);
        if (product != null) {
            metrics.counter("cache.hit", "level", "L1").increment();
            return product;
        }
        
        // L2 Cache check
        product = l2Cache.opsForValue().get("product:" + productId);
        if (product != null) {
            metrics.counter("cache.hit", "level", "L2").increment();
            l1Cache.getCache("products").put(productId, product);
            return product;
        }
        
        // Database fallback
        metrics.counter("cache.miss").increment();
        product = dataRepo.findById(productId).orElseThrow();
        
        // Populate caches
        CompletableFuture.runAsync(() -> {
            l2Cache.opsForValue().set("product:" + productId, product, Duration.ofHours(1));
            l1Cache.getCache("products").put(productId, product);
        });
        
        return product;
    }
}

Advanced Caching Patterns

1. Write-Through with Write-Behind Buffering

@Component
public class WriteOptimizedCache {
    private final BlockingQueue<CacheUpdate> writeBuffer = new LinkedBlockingQueue<>(10000);
    @Autowired private RedisTemplate<String, Object> cache;
    @Autowired private DataRepository repository;
    
    @PostConstruct
    public void startWriteBehindProcessor() {
        // Batch writer thread
        Thread writerThread = new Thread(() -> {
            List<CacheUpdate> batch = new ArrayList<>();
            while (!Thread.currentThread().isInterrupted()) {
                try {
                    // Collect updates for batch processing
                    CacheUpdate update = writeBuffer.poll(100, TimeUnit.MILLISECONDS);
                    if (update != null) {
                        batch.add(update);
                    }
                    
                    // Process batch when size or time threshold reached
                    if (batch.size() >= 100 || 
                        (batch.size() > 0 && System.currentTimeMillis() - batch.get(0).timestamp > 1000)) {
                        processBatch(batch);
                        batch.clear();
                    }
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    break;
                }
            }
        });
        writerThread.setName("cache-write-behind");
        writerThread.start();
    }
    
    public void update(String key, Object value) {
        // Immediate cache update
        cache.opsForValue().set(key, value);
        
        // Queue for eventual persistence
        writeBuffer.offer(new CacheUpdate(key, value, System.currentTimeMillis()));
    }
    
    private void processBatch(List<CacheUpdate> batch) {
        try {
            // Batch database update
            repository.batchUpdate(batch);
        } catch (Exception e) {
            // Handle failure - could implement retry or dead letter queue
            log.error("Failed to persist batch", e);
            batch.forEach(update -> 
                cache.delete(update.key) // Invalidate cache on failure
            );
        }
    }
}

2. Cache Warming and Preloading

@Component
public class CacheWarmingStrategy {
    @Autowired private PredictiveAnalytics analytics;
    @Autowired private CacheLoader cacheLoader;
    @Autowired private TaskScheduler scheduler;
    
    @EventListener(ApplicationReadyEvent.class)
    public void warmCriticalCaches() {
        // Warm critical data on startup
        CompletableFuture.allOf(
            warmProductCache(),
            warmUserCache(),
            warmConfigCache()
        ).join();
    }
    
    @Scheduled(cron = "0 0 3 * * *") // 3 AM daily
    public void predictiveWarming() {
        // Use ML to predict tomorrow's hot data
        Set<String> predictedHotKeys = analytics.predictHotKeys(
            LocalDate.now().plusDays(1)
        );
        
        // Warm cache with predicted data
        predictedHotKeys.parallelStream()
            .forEach(key -> cacheLoader.preload(key));
    }
    
    private CompletableFuture<Void> warmProductCache() {
        return CompletableFuture.runAsync(() -> {
            // Load top 1000 products
            List<Product> topProducts = productRepo.findTopProducts(1000);
            topProducts.forEach(product -> 
                cache.put("product:" + product.getId(), product)
            );
        });
    }
}

3. Distributed Cache Coherence

@Configuration
public class CacheCoherenceConfig {
    
    @Bean
    public RedisMessageListenerContainer cacheInvalidationListener() {
        RedisMessageListenerContainer container = new RedisMessageListenerContainer();
        container.setConnectionFactory(jedisConnectionFactory());
        
        // Subscribe to cache invalidation events
        container.addMessageListener(
            new CacheInvalidationListener(),
            new PatternTopic("cache.invalidate.*")
        );
        
        return container;
    }
}

@Component
public class CoherentCacheService {
    @Autowired private RedisTemplate<String, Object> redis;
    @Autowired private CacheManager localCache;
    
    public void updateWithInvalidation(String key, Object value) {
        // Update distributed cache
        redis.opsForValue().set(key, value);
        
        // Publish invalidation event
        CacheInvalidationEvent event = new CacheInvalidationEvent(key, getNodeId());
        redis.convertAndSend("cache.invalidate." + extractRegion(key), event);
    }
    
    @Component
    public class CacheInvalidationListener implements MessageListener {
        @Override
        public void onMessage(Message message, byte[] pattern) {
            CacheInvalidationEvent event = deserialize(message);
            
            // Skip if this node published the event
            if (!event.getSourceNode().equals(getNodeId())) {
                // Invalidate local cache
                localCache.getCache(event.getCacheName())
                    .evict(event.getKey());
            }
        }
    }
}

Performance Optimization Strategies

1. Cache Partitioning and Sharding

@Configuration
public class ShardedCacheConfig {
    
    @Bean
    public ShardedRedisTemplate shardedCache() {
        List<RedisNode> nodes = Arrays.asList(
            new RedisNode("cache1.example.com", 6379),
            new RedisNode("cache2.example.com", 6379),
            new RedisNode("cache3.example.com", 6379),
            new RedisNode("cache4.example.com", 6379)
        );
        
        return new ShardedRedisTemplate(nodes, new ConsistentHashStrategy());
    }
}

public class ConsistentHashStrategy implements ShardingStrategy {
    private final NavigableMap<Long, RedisNode> ring = new TreeMap<>();
    private final int virtualNodes = 150;
    
    public ConsistentHashStrategy(List<RedisNode> nodes) {
        for (RedisNode node : nodes) {
            addNode(node);
        }
    }
    
    public RedisNode getNode(String key) {
        long hash = hash(key);
        Map.Entry<Long, RedisNode> entry = ring.ceilingEntry(hash);
        return entry != null ? entry.getValue() : ring.firstEntry().getValue();
    }
    
    private void addNode(RedisNode node) {
        for (int i = 0; i < virtualNodes; i++) {
            long hash = hash(node.getId() + ":" + i);
            ring.put(hash, node);
        }
    }
    
    private long hash(String key) {
        return Hashing.murmur3_128().hashString(key, StandardCharsets.UTF_8).asLong();
    }
}

2. Adaptive TTL and Eviction

@Component
public class AdaptiveCacheManager {
    @Autowired private CacheMetrics metrics;
    @Autowired private RedisTemplate<String, Object> cache;
    
    public void setWithAdaptiveTTL(String key, Object value) {
        // Calculate TTL based on access patterns
        Duration ttl = calculateOptimalTTL(key);
        cache.opsForValue().set(key, value, ttl);
    }
    
    private Duration calculateOptimalTTL(String key) {
        AccessPattern pattern = metrics.getAccessPattern(key);
        
        // High-frequency access: longer TTL
        if (pattern.getAccessRate() > 1000) {
            return Duration.ofHours(24);
        }
        
        // Predictable access pattern: align with pattern
        if (pattern.isPeriodic()) {
            return Duration.ofSeconds(pattern.getPeriod() * 1.5);
        }
        
        // Default adaptive TTL based on hit ratio
        double hitRatio = pattern.getHitRatio();
        if (hitRatio > 0.9) {
            return Duration.ofHours(12);
        } else if (hitRatio > 0.7) {
            return Duration.ofHours(4);
        } else {
            return Duration.ofHours(1);
        }
    }
    
    @Scheduled(fixedDelay = 300000) // Every 5 minutes
    public void optimizeCache() {
        CacheStats stats = metrics.getStats();
        
        // Adjust cache size based on memory pressure
        if (stats.getMemoryUsage() > 0.9) {
            evictLeastValuableEntries(0.1); // Free 10%
        }
        
        // Preemptively refresh high-value entries
        stats.getHighValueKeys().stream()
            .filter(key -> isNearExpiry(key))
            .forEach(this::refreshEntry);
    }
}

3. Cache Stampede Prevention

@Component
public class StampedePreventionCache {
    private final Map<String, CompletableFuture<Object>> inFlightRequests = new ConcurrentHashMap<>();
    @Autowired private RedisTemplate<String, Object> cache;
    @Autowired private DataLoader dataLoader;
    
    public Object get(String key) {
        // Check cache first
        Object cached = cache.opsForValue().get(key);
        if (cached != null) {
            return cached;
        }
        
        // Prevent stampede using request coalescing
        CompletableFuture<Object> future = inFlightRequests.computeIfAbsent(key, k -> 
            CompletableFuture.supplyAsync(() -> {
                try {
                    // Double-check cache (another thread might have loaded it)
                    Object value = cache.opsForValue().get(key);
                    if (value != null) {
                        return value;
                    }
                    
                    // Load from source
                    value = dataLoader.load(key);
                    
                    // Cache with jittered TTL to prevent synchronized expiration
                    Duration ttl = Duration.ofHours(1).plus(
                        Duration.ofMinutes(ThreadLocalRandom.current().nextInt(0, 10))
                    );
                    cache.opsForValue().set(key, value, ttl);
                    
                    return value;
                } finally {
                    // Clean up in-flight tracking
                    inFlightRequests.remove(key);
                }
            })
        );
        
        return future.join();
    }
}

Monitoring and Observability

@Component
public class CacheObservability {
    private final MeterRegistry metrics;
    
    @EventListener
    public void recordCacheMetrics(CacheEvent event) {
        // Record hit/miss ratios
        metrics.counter("cache.access", 
            "result", event.isHit() ? "hit" : "miss",
            "cache", event.getCacheName()
        ).increment();
        
        // Record latency
        metrics.timer("cache.latency",
            "operation", event.getOperation(),
            "cache", event.getCacheName()
        ).record(event.getDuration());
        
        // Track cache size
        metrics.gauge("cache.size",
            Tags.of("cache", event.getCacheName()),
            event.getSize()
        );
        
        // Alert on anomalies
        if (event.getHitRatio() < 0.5) {
            alerting.sendAlert(new CacheAlert(
                "Low hit ratio detected",
                event.getCacheName(),
                event.getHitRatio()
            ));
        }
    }
}

Key Architectural Decisions

  1. Cache Topology: Centralized vs. distributed vs. hierarchical
  2. Consistency Model: Strong vs. eventual consistency
  3. Eviction Policy: LRU, LFU, or custom based on business value
  4. Invalidation Strategy: TTL-based vs. event-driven
  5. Monitoring: Real-time metrics for cache efficiency

Conclusion

Effective caching at scale requires more than just adding a cache layer. It demands careful consideration of consistency requirements, access patterns, and operational complexity. The key to success lies in choosing the right caching strategy for each use case, implementing proper monitoring, and continuously optimizing based on real-world usage patterns. Remember that caching is not just about performance—it's about building resilient systems that can gracefully handle failures while maintaining data consistency.