Files
aether/nats_eventbus.go
Hugo Nijhuis 71c6011f49
All checks were successful
CI / build (pull_request) Successful in 38s
Add EventBroadcaster metrics for observability and debugging
Implement comprehensive metrics tracking for EventBroadcaster implementations:

- Add BroadcasterMetrics interface for reading metrics per namespace
- Add MetricsCollector interface and DefaultMetricsCollector implementation
- Track events_published and events_received counters per namespace
- Track active_subscriptions gauge per namespace
- Track publish_errors, subscribe_errors, and dropped_events counters
- Add MetricsProvider interface for EventBroadcaster implementations
- Integrate metrics tracking into EventBus and NATSEventBus
- Add optional Prometheus integration via PrometheusMetricsAdapter

Closes #22

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 19:23:00 +01:00

167 lines
4.7 KiB
Go

package aether
import (
"context"
"encoding/json"
"fmt"
"log"
"sync"
"github.com/google/uuid"
"github.com/nats-io/nats.go"
)
// NATSEventBus is an EventBus that broadcasts events across all cluster nodes using NATS
type NATSEventBus struct {
*EventBus // Embed base EventBus for local subscriptions
nc *nats.Conn // NATS connection
subscriptions []*nats.Subscription
namespaceSubscribers map[string]int // Track number of subscribers per namespace
nodeID string // Unique ID for this node
mutex sync.Mutex
ctx context.Context
cancel context.CancelFunc
}
// eventMessage is the wire format for events sent over NATS
type eventMessage struct {
NodeID string `json:"node_id"`
NamespaceID string `json:"namespace_id"`
Event *Event `json:"event"`
}
// NewNATSEventBus creates a new NATS-backed event bus
func NewNATSEventBus(nc *nats.Conn) (*NATSEventBus, error) {
ctx, cancel := context.WithCancel(context.Background())
neb := &NATSEventBus{
EventBus: NewEventBus(),
nc: nc,
nodeID: uuid.New().String(),
subscriptions: make([]*nats.Subscription, 0),
namespaceSubscribers: make(map[string]int),
ctx: ctx,
cancel: cancel,
}
return neb, nil
}
// Subscribe creates a local subscription and ensures NATS subscription exists for the namespace
func (neb *NATSEventBus) Subscribe(namespaceID string) <-chan *Event {
neb.mutex.Lock()
defer neb.mutex.Unlock()
// Create local subscription first
ch := neb.EventBus.Subscribe(namespaceID)
// Check if this is the first subscriber for this namespace
count := neb.namespaceSubscribers[namespaceID]
if count == 0 {
// First subscriber - create NATS subscription
subject := fmt.Sprintf("aether.events.%s", namespaceID)
sub, err := neb.nc.Subscribe(subject, func(msg *nats.Msg) {
neb.handleNATSEvent(msg)
})
if err != nil {
log.Printf("[NATSEventBus] Failed to subscribe to NATS subject %s: %v", subject, err)
// Record subscription error
neb.metrics.RecordSubscribeError(namespaceID)
} else {
neb.subscriptions = append(neb.subscriptions, sub)
log.Printf("[NATSEventBus] Node %s subscribed to %s", neb.nodeID, subject)
}
}
neb.namespaceSubscribers[namespaceID] = count + 1
return ch
}
// Unsubscribe removes a local subscription and cleans up NATS subscription if no more subscribers
func (neb *NATSEventBus) Unsubscribe(namespaceID string, ch <-chan *Event) {
neb.mutex.Lock()
defer neb.mutex.Unlock()
neb.EventBus.Unsubscribe(namespaceID, ch)
count := neb.namespaceSubscribers[namespaceID]
if count > 0 {
count--
neb.namespaceSubscribers[namespaceID] = count
if count == 0 {
delete(neb.namespaceSubscribers, namespaceID)
log.Printf("[NATSEventBus] No more subscribers for namespace %s on node %s", namespaceID, neb.nodeID)
}
}
}
// handleNATSEvent processes events received from NATS
func (neb *NATSEventBus) handleNATSEvent(msg *nats.Msg) {
var eventMsg eventMessage
if err := json.Unmarshal(msg.Data, &eventMsg); err != nil {
log.Printf("[NATSEventBus] Failed to unmarshal event: %v", err)
return
}
// Skip events that originated from this node (already delivered locally)
if eventMsg.NodeID == neb.nodeID {
return
}
// Record receive from NATS (cross-node event)
neb.metrics.RecordReceive(eventMsg.NamespaceID)
// Forward to local EventBus subscribers
neb.EventBus.Publish(eventMsg.NamespaceID, eventMsg.Event)
}
// Publish publishes an event both locally and to NATS for cross-node broadcasting
func (neb *NATSEventBus) Publish(namespaceID string, event *Event) {
// First publish locally (this also records metrics)
neb.EventBus.Publish(namespaceID, event)
// Then publish to NATS for other nodes
subject := fmt.Sprintf("aether.events.%s", namespaceID)
eventMsg := eventMessage{
NodeID: neb.nodeID,
NamespaceID: namespaceID,
Event: event,
}
data, err := json.Marshal(eventMsg)
if err != nil {
log.Printf("[NATSEventBus] Failed to marshal event for NATS: %v", err)
neb.metrics.RecordPublishError(namespaceID)
return
}
if err := neb.nc.Publish(subject, data); err != nil {
log.Printf("[NATSEventBus] Failed to publish event to NATS: %v", err)
neb.metrics.RecordPublishError(namespaceID)
return
}
}
// Stop closes the NATS event bus and all subscriptions
func (neb *NATSEventBus) Stop() {
neb.mutex.Lock()
defer neb.mutex.Unlock()
neb.cancel()
for _, sub := range neb.subscriptions {
if err := sub.Unsubscribe(); err != nil {
log.Printf("[NATSEventBus] Error unsubscribing: %v", err)
}
}
neb.subscriptions = nil
neb.EventBus.Stop()
log.Printf("[NATSEventBus] Node %s stopped", neb.nodeID)
}