Initial aether repository structure
All checks were successful
CI / build (push) Successful in 1m13s

Distributed actor system with event sourcing for Go:
- event.go - Event, ActorSnapshot, EventStore interface
- eventbus.go - EventBus, EventBroadcaster for pub/sub
- nats_eventbus.go - NATS-backed cross-node event broadcasting
- store/ - InMemoryEventStore (testing), JetStreamEventStore (production)
- cluster/ - Node discovery, leader election, shard distribution
- model/ - EventStorming model types

Extracted from arcadia as open-source infrastructure component.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-01-08 19:30:02 +01:00
commit e9e50c021f
22 changed files with 2588 additions and 0 deletions

48
cluster/cluster.go Normal file
View File

@@ -0,0 +1,48 @@
// Package cluster provides distributed computing capabilities for the Aether VM runtime.
//
// This package implements a distributed actor system using NATS for coordination,
// featuring consistent hashing for shard distribution, leader election for
// coordination, and fault-tolerant actor migration between nodes.
//
// Key Components:
//
// - ConsistentHashRing: Distributes actors across cluster nodes using consistent hashing
// - LeaderElection: NATS-based leader election with lease-based coordination
// - ClusterManager: Coordinates distributed operations and shard rebalancing
// - NodeDiscovery: Manages cluster membership and node health monitoring
// - ShardManager: Handles actor placement and distribution across shards
// - DistributedVM: Main entry point for distributed VM cluster operations
//
// Usage:
//
// // Create a distributed VM node
// distributedVM, err := cluster.NewDistributedVM("node-1", []string{"nats://localhost:4222"}, localRuntime)
// if err != nil {
// log.Fatal(err)
// }
//
// // Start the cluster node
// if err := distributedVM.Start(); err != nil {
// log.Fatal(err)
// }
//
// // Load a model across the cluster
// if err := distributedVM.LoadModel(eventStormingModel); err != nil {
// log.Fatal(err)
// }
//
// Architecture:
//
// The cluster package implements a distributed actor system where each node
// runs a local VM runtime and coordinates with other nodes through NATS.
// Actors are sharded across nodes using consistent hashing, and the system
// supports dynamic rebalancing when nodes join or leave the cluster.
//
// Fault Tolerance:
//
// - Automatic node failure detection through heartbeat monitoring
// - Leader election ensures coordination continues despite node failures
// - Actor migration allows rebalancing when cluster topology changes
// - Graceful shutdown with proper resource cleanup
//
package cluster

118
cluster/discovery.go Normal file
View File

@@ -0,0 +1,118 @@
package cluster
import (
"context"
"encoding/json"
"time"
"github.com/nats-io/nats.go"
)
// NodeDiscovery manages cluster membership using NATS
type NodeDiscovery struct {
nodeID string
nodeInfo *NodeInfo
natsConn *nats.Conn
heartbeat time.Duration
timeout time.Duration
updates chan NodeUpdate
ctx context.Context
}
// NewNodeDiscovery creates a node discovery service
func NewNodeDiscovery(nodeID string, natsConn *nats.Conn, ctx context.Context) *NodeDiscovery {
nodeInfo := &NodeInfo{
ID: nodeID,
Status: NodeStatusActive,
Capacity: 1000, // Default capacity
Load: 0,
LastSeen: time.Now(),
Metadata: make(map[string]string),
}
return &NodeDiscovery{
nodeID: nodeID,
nodeInfo: nodeInfo,
natsConn: natsConn,
heartbeat: 30 * time.Second,
timeout: 90 * time.Second,
updates: make(chan NodeUpdate, 100),
ctx: ctx,
}
}
// Start begins node discovery and heartbeating
func (nd *NodeDiscovery) Start() {
// Announce this node joining
nd.announceNode(NodeJoined)
// Start heartbeat
ticker := time.NewTicker(nd.heartbeat)
defer ticker.Stop()
// Subscribe to node announcements
nd.natsConn.Subscribe("aether.discovery", func(msg *nats.Msg) {
var update NodeUpdate
if err := json.Unmarshal(msg.Data, &update); err != nil {
return
}
select {
case nd.updates <- update:
case <-nd.ctx.Done():
}
})
for {
select {
case <-ticker.C:
nd.announceNode(NodeUpdated)
case <-nd.ctx.Done():
nd.announceNode(NodeLeft)
return
}
}
}
// GetUpdates returns the channel for receiving node updates
func (nd *NodeDiscovery) GetUpdates() <-chan NodeUpdate {
return nd.updates
}
// GetNodeInfo returns the current node information
func (nd *NodeDiscovery) GetNodeInfo() *NodeInfo {
return nd.nodeInfo
}
// UpdateLoad updates the node's current load
func (nd *NodeDiscovery) UpdateLoad(load float64) {
nd.nodeInfo.Load = load
}
// UpdateVMCount updates the number of VMs on this node
func (nd *NodeDiscovery) UpdateVMCount(count int) {
nd.nodeInfo.VMCount = count
}
// announceNode publishes node status to the cluster
func (nd *NodeDiscovery) announceNode(updateType NodeUpdateType) {
nd.nodeInfo.LastSeen = time.Now()
update := NodeUpdate{
Type: updateType,
Node: nd.nodeInfo,
}
data, err := json.Marshal(update)
if err != nil {
return
}
nd.natsConn.Publish("aether.discovery", data)
}
// Stop gracefully stops the node discovery service
func (nd *NodeDiscovery) Stop() {
nd.announceNode(NodeLeft)
}

221
cluster/distributed.go Normal file
View File

@@ -0,0 +1,221 @@
package cluster
import (
"context"
"encoding/json"
"fmt"
"github.com/nats-io/nats.go"
)
// DistributedVM manages a cluster of runtime nodes with VM-per-instance architecture
type DistributedVM struct {
nodeID string
cluster *ClusterManager
localRuntime Runtime // Interface to avoid import cycles
sharding *ShardManager
discovery *NodeDiscovery
natsConn *nats.Conn
ctx context.Context
cancel context.CancelFunc
}
// Runtime interface to avoid import cycles with main aether package
type Runtime interface {
Start() error
LoadModel(model interface{}) error
SendMessage(message interface{}) error
}
// DistributedVMRegistry implements VMRegistry using DistributedVM's local runtime and sharding
type DistributedVMRegistry struct {
runtime interface{} // Runtime interface to avoid import cycles
sharding *ShardManager
}
// NewDistributedVM creates a distributed VM runtime cluster node
func NewDistributedVM(nodeID string, natsURLs []string, localRuntime Runtime) (*DistributedVM, error) {
ctx, cancel := context.WithCancel(context.Background())
// Connect to NATS cluster
natsURL := natsURLs[0] // Use first URL for simplicity
natsConn, err := nats.Connect(natsURL,
nats.Name(fmt.Sprintf("aether-runtime-%s", nodeID)))
if err != nil {
cancel()
return nil, fmt.Errorf("failed to connect to NATS: %w", err)
}
// Create cluster components
discovery := NewNodeDiscovery(nodeID, natsConn, ctx)
sharding := NewShardManager(1024, 3) // 1024 shards, 3 replicas
cluster, err := NewClusterManager(nodeID, natsConn, ctx)
if err != nil {
cancel()
natsConn.Close()
return nil, fmt.Errorf("failed to create cluster manager: %w", err)
}
dvm := &DistributedVM{
nodeID: nodeID,
cluster: cluster,
localRuntime: localRuntime,
sharding: sharding,
discovery: discovery,
natsConn: natsConn,
ctx: ctx,
cancel: cancel,
}
// Create VM registry and connect it to cluster manager
vmRegistry := &DistributedVMRegistry{
runtime: localRuntime,
sharding: sharding,
}
cluster.SetVMRegistry(vmRegistry)
return dvm, nil
}
// Start begins the distributed VM cluster node
func (dvm *DistributedVM) Start() error {
// Start local runtime
if err := dvm.localRuntime.Start(); err != nil {
return fmt.Errorf("failed to start local runtime: %w", err)
}
// Start cluster services
go dvm.discovery.Start()
go dvm.cluster.Start()
// Start message routing
go dvm.startMessageRouting()
return nil
}
// Stop gracefully shuts down the distributed VM node
func (dvm *DistributedVM) Stop() {
dvm.cancel()
dvm.cluster.Stop()
dvm.discovery.Stop()
dvm.natsConn.Close()
}
// LoadModel distributes EventStorming model across the cluster with VM templates
func (dvm *DistributedVM) LoadModel(model interface{}) error {
// Load model locally first
if err := dvm.localRuntime.LoadModel(model); err != nil {
return fmt.Errorf("failed to load model locally: %w", err)
}
// Broadcast model to other cluster nodes
msg := ClusterMessage{
Type: "load_model",
From: dvm.nodeID,
To: "broadcast",
Payload: model,
}
return dvm.publishClusterMessage(msg)
}
// SendMessage routes messages across the distributed cluster
func (dvm *DistributedVM) SendMessage(message interface{}) error {
// This is a simplified implementation
// In practice, this would determine the target node based on sharding
// and route the message appropriately
return dvm.localRuntime.SendMessage(message)
}
// GetActorNode determines which node should handle a specific actor
func (dvm *DistributedVM) GetActorNode(actorID string) string {
// Use consistent hashing to determine the target node
return dvm.cluster.hashRing.GetNode(actorID)
}
// IsLocalActor checks if an actor should be handled by this node
func (dvm *DistributedVM) IsLocalActor(actorID string) bool {
targetNode := dvm.GetActorNode(actorID)
return targetNode == dvm.nodeID
}
// GetActorsInShard returns actors that belong to a specific shard on this node
func (dvm *DistributedVM) GetActorsInShard(shardID int) []string {
return dvm.cluster.GetActorsInShard(shardID)
}
// startMessageRouting begins routing messages between cluster nodes
func (dvm *DistributedVM) startMessageRouting() {
// Subscribe to cluster messages
dvm.natsConn.Subscribe("aether.distributed.*", dvm.handleClusterMessage)
}
// handleClusterMessage processes incoming cluster coordination messages
func (dvm *DistributedVM) handleClusterMessage(msg *nats.Msg) {
var clusterMsg ClusterMessage
if err := json.Unmarshal(msg.Data, &clusterMsg); err != nil {
return
}
switch clusterMsg.Type {
case "load_model":
// Handle model loading from other nodes
if model := clusterMsg.Payload; model != nil {
dvm.localRuntime.LoadModel(model)
}
case "route_message":
// Handle message routing from other nodes
if message := clusterMsg.Payload; message != nil {
dvm.localRuntime.SendMessage(message)
}
case "rebalance":
// Handle shard rebalancing requests
dvm.handleRebalanceRequest(clusterMsg)
}
}
// handleRebalanceRequest processes shard rebalancing requests
func (dvm *DistributedVM) handleRebalanceRequest(msg ClusterMessage) {
// Simplified rebalancing logic
// In practice, this would implement complex actor migration
}
// publishClusterMessage sends a message to other cluster nodes
func (dvm *DistributedVM) publishClusterMessage(msg ClusterMessage) error {
data, err := json.Marshal(msg)
if err != nil {
return err
}
subject := fmt.Sprintf("aether.distributed.%s", msg.Type)
return dvm.natsConn.Publish(subject, data)
}
// GetClusterInfo returns information about the cluster state
func (dvm *DistributedVM) GetClusterInfo() map[string]interface{} {
nodes := dvm.cluster.GetNodes()
return map[string]interface{}{
"nodeId": dvm.nodeID,
"isLeader": dvm.cluster.IsLeader(),
"leader": dvm.cluster.GetLeader(),
"nodeCount": len(nodes),
"nodes": nodes,
}
}
// GetActiveVMs returns a map of active VMs (implementation depends on runtime)
func (dvr *DistributedVMRegistry) GetActiveVMs() map[string]interface{} {
// This would need to access the actual runtime's VM registry
// For now, return empty map to avoid import cycles
return make(map[string]interface{})
}
// GetShard returns the shard number for the given actor ID
func (dvr *DistributedVMRegistry) GetShard(actorID string) int {
return dvr.sharding.GetShard(actorID)
}

105
cluster/hashring.go Normal file
View File

@@ -0,0 +1,105 @@
package cluster
import (
"crypto/sha256"
"encoding/binary"
"fmt"
"sort"
)
// ConsistentHashRing implements a consistent hash ring for shard distribution
type ConsistentHashRing struct {
ring map[uint32]string // hash -> node ID
sortedHashes []uint32 // sorted hash keys
nodes map[string]bool // active nodes
}
// NewConsistentHashRing creates a new consistent hash ring
func NewConsistentHashRing() *ConsistentHashRing {
return &ConsistentHashRing{
ring: make(map[uint32]string),
nodes: make(map[string]bool),
}
}
// AddNode adds a node to the hash ring
func (chr *ConsistentHashRing) AddNode(nodeID string) {
if chr.nodes[nodeID] {
return // Node already exists
}
chr.nodes[nodeID] = true
// Add virtual nodes for better distribution
for i := 0; i < VirtualNodes; i++ {
virtualKey := fmt.Sprintf("%s:%d", nodeID, i)
hash := chr.hash(virtualKey)
chr.ring[hash] = nodeID
chr.sortedHashes = append(chr.sortedHashes, hash)
}
sort.Slice(chr.sortedHashes, func(i, j int) bool {
return chr.sortedHashes[i] < chr.sortedHashes[j]
})
}
// RemoveNode removes a node from the hash ring
func (chr *ConsistentHashRing) RemoveNode(nodeID string) {
if !chr.nodes[nodeID] {
return // Node doesn't exist
}
delete(chr.nodes, nodeID)
// Remove all virtual nodes for this physical node
newHashes := make([]uint32, 0)
for _, hash := range chr.sortedHashes {
if chr.ring[hash] != nodeID {
newHashes = append(newHashes, hash)
} else {
delete(chr.ring, hash)
}
}
chr.sortedHashes = newHashes
}
// GetNode returns the node responsible for a given key
func (chr *ConsistentHashRing) GetNode(key string) string {
if len(chr.sortedHashes) == 0 {
return ""
}
hash := chr.hash(key)
// Find the first node with hash >= key hash (clockwise)
idx := sort.Search(len(chr.sortedHashes), func(i int) bool {
return chr.sortedHashes[i] >= hash
})
// Wrap around to the first node if we've gone past the end
if idx == len(chr.sortedHashes) {
idx = 0
}
return chr.ring[chr.sortedHashes[idx]]
}
// hash computes a hash for the given key
func (chr *ConsistentHashRing) hash(key string) uint32 {
h := sha256.Sum256([]byte(key))
return binary.BigEndian.Uint32(h[:4])
}
// GetNodes returns all active nodes in the ring
func (chr *ConsistentHashRing) GetNodes() []string {
nodes := make([]string, 0, len(chr.nodes))
for nodeID := range chr.nodes {
nodes = append(nodes, nodeID)
}
return nodes
}
// IsEmpty returns true if the ring has no nodes
func (chr *ConsistentHashRing) IsEmpty() bool {
return len(chr.nodes) == 0
}

414
cluster/leader.go Normal file
View File

@@ -0,0 +1,414 @@
package cluster
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"sync"
"time"
"github.com/nats-io/nats.go"
)
// LeaderElection manages NATS-based leader election using lease-based coordination
type LeaderElection struct {
nodeID string
natsConn *nats.Conn
js nats.JetStreamContext
kv nats.KeyValue
isLeader bool
currentLeader string
leaderTerm uint64
ctx context.Context
cancel context.CancelFunc
mutex sync.RWMutex
logger *log.Logger
callbacks LeaderElectionCallbacks
}
// NewLeaderElection creates a new NATS-based leader election system
func NewLeaderElection(nodeID string, natsConn *nats.Conn, callbacks LeaderElectionCallbacks) (*LeaderElection, error) {
ctx, cancel := context.WithCancel(context.Background())
// Create JetStream context
js, err := natsConn.JetStream()
if err != nil {
cancel()
return nil, fmt.Errorf("failed to create JetStream context: %w", err)
}
// Create or get KV store for leader election
kv, err := js.CreateKeyValue(&nats.KeyValueConfig{
Bucket: "aether-leader-election",
Description: "Aether cluster leader election coordination",
TTL: LeaderLeaseTimeout * 2, // Auto-cleanup expired leases
MaxBytes: 1024 * 1024, // 1MB max
Replicas: 1, // Single replica for simplicity
})
if err != nil {
// Try to get existing KV store
kv, err = js.KeyValue("aether-leader-election")
if err != nil {
cancel()
return nil, fmt.Errorf("failed to create/get KV store: %w", err)
}
}
return &LeaderElection{
nodeID: nodeID,
natsConn: natsConn,
js: js,
kv: kv,
ctx: ctx,
cancel: cancel,
logger: log.New(os.Stdout, fmt.Sprintf("[Leader %s] ", nodeID), log.LstdFlags),
callbacks: callbacks,
}, nil
}
// Start begins the leader election process
func (le *LeaderElection) Start() {
le.logger.Printf("🗳️ Starting leader election")
// Start election loop in background
go le.electionLoop()
// Start lease renewal loop in background
go le.leaseRenewalLoop()
// Start leader monitoring
go le.monitorLeadership()
}
// Stop stops the leader election process
func (le *LeaderElection) Stop() {
le.logger.Printf("🛑 Stopping leader election")
le.cancel()
// If we're the leader, resign gracefully
if le.IsLeader() {
le.resignLeadership()
}
}
// IsLeader returns whether this node is currently the leader
func (le *LeaderElection) IsLeader() bool {
le.mutex.RLock()
defer le.mutex.RUnlock()
return le.isLeader
}
// GetLeader returns the current leader ID
func (le *LeaderElection) GetLeader() string {
le.mutex.RLock()
defer le.mutex.RUnlock()
return le.currentLeader
}
// GetTerm returns the current leadership term
func (le *LeaderElection) GetTerm() uint64 {
le.mutex.RLock()
defer le.mutex.RUnlock()
return le.leaderTerm
}
// electionLoop runs the main election process
func (le *LeaderElection) electionLoop() {
ticker := time.NewTicker(ElectionTimeout)
defer ticker.Stop()
// Try to become leader immediately
le.tryBecomeLeader()
for {
select {
case <-le.ctx.Done():
return
case <-ticker.C:
// Periodically check if we should try to become leader
if !le.IsLeader() && le.shouldTryElection() {
le.tryBecomeLeader()
}
}
}
}
// leaseRenewalLoop renews the leadership lease if we're the leader
func (le *LeaderElection) leaseRenewalLoop() {
ticker := time.NewTicker(HeartbeatInterval)
defer ticker.Stop()
for {
select {
case <-le.ctx.Done():
return
case <-ticker.C:
if le.IsLeader() {
if err := le.renewLease(); err != nil {
le.logger.Printf("❌ Failed to renew leadership lease: %v", err)
le.loseLeadership()
}
}
}
}
}
// monitorLeadership watches for leadership changes
func (le *LeaderElection) monitorLeadership() {
watcher, err := le.kv.Watch("leader")
if err != nil {
le.logger.Printf("❌ Failed to watch leadership: %v", err)
return
}
defer watcher.Stop()
for {
select {
case <-le.ctx.Done():
return
case entry := <-watcher.Updates():
if entry == nil {
continue
}
le.handleLeadershipUpdate(entry)
}
}
}
// tryBecomeLeader attempts to acquire leadership
func (le *LeaderElection) tryBecomeLeader() {
le.logger.Printf("🗳️ Attempting to become leader")
now := time.Now()
newLease := LeadershipLease{
LeaderID: le.nodeID,
Term: le.leaderTerm + 1,
ExpiresAt: now.Add(LeaderLeaseTimeout),
StartedAt: now,
}
leaseData, err := json.Marshal(newLease)
if err != nil {
le.logger.Printf("❌ Failed to marshal lease: %v", err)
return
}
// Try to create the leader key (atomic operation)
_, err = le.kv.Create("leader", leaseData)
if err != nil {
// Leader key exists, check if it's expired
if le.tryClaimExpiredLease() {
return // Successfully claimed expired lease
}
// Another node is leader
return
}
// Successfully became leader!
le.becomeLeader(newLease.Term)
}
// tryClaimExpiredLease attempts to claim an expired leadership lease
func (le *LeaderElection) tryClaimExpiredLease() bool {
entry, err := le.kv.Get("leader")
if err != nil {
return false
}
var currentLease LeadershipLease
if err := json.Unmarshal(entry.Value(), &currentLease); err != nil {
return false
}
// Check if lease is expired
if time.Now().Before(currentLease.ExpiresAt) {
// Lease is still valid
le.updateCurrentLeader(currentLease.LeaderID, currentLease.Term)
return false
}
// Lease is expired, try to claim it
le.logger.Printf("🕐 Attempting to claim expired lease from %s", currentLease.LeaderID)
now := time.Now()
newLease := LeadershipLease{
LeaderID: le.nodeID,
Term: currentLease.Term + 1,
ExpiresAt: now.Add(LeaderLeaseTimeout),
StartedAt: now,
}
leaseData, err := json.Marshal(newLease)
if err != nil {
return false
}
// Atomically update the lease
_, err = le.kv.Update("leader", leaseData, entry.Revision())
if err != nil {
return false
}
// Successfully claimed expired lease!
le.becomeLeader(newLease.Term)
return true
}
// renewLease renews the current leadership lease
func (le *LeaderElection) renewLease() error {
entry, err := le.kv.Get("leader")
if err != nil {
return err
}
var currentLease LeadershipLease
if err := json.Unmarshal(entry.Value(), &currentLease); err != nil {
return err
}
// Verify we're still the leader
if currentLease.LeaderID != le.nodeID {
return fmt.Errorf("no longer leader, current leader is %s", currentLease.LeaderID)
}
// Renew the lease
renewedLease := currentLease
renewedLease.ExpiresAt = time.Now().Add(LeaderLeaseTimeout)
leaseData, err := json.Marshal(renewedLease)
if err != nil {
return err
}
_, err = le.kv.Update("leader", leaseData, entry.Revision())
if err != nil {
return fmt.Errorf("failed to renew lease: %w", err)
}
le.logger.Printf("💓 Renewed leadership lease until %s", renewedLease.ExpiresAt.Format(time.RFC3339))
return nil
}
// becomeLeader handles becoming the cluster leader
func (le *LeaderElection) becomeLeader(term uint64) {
le.mutex.Lock()
le.isLeader = true
le.currentLeader = le.nodeID
le.leaderTerm = term
le.mutex.Unlock()
le.logger.Printf("👑 Became cluster leader (term %d)", term)
if le.callbacks.OnBecameLeader != nil {
le.callbacks.OnBecameLeader()
}
}
// loseLeadership handles losing leadership
func (le *LeaderElection) loseLeadership() {
le.mutex.Lock()
wasLeader := le.isLeader
le.isLeader = false
le.mutex.Unlock()
if wasLeader {
le.logger.Printf("📉 Lost cluster leadership")
if le.callbacks.OnLostLeader != nil {
le.callbacks.OnLostLeader()
}
}
}
// resignLeadership gracefully resigns from leadership
func (le *LeaderElection) resignLeadership() {
if !le.IsLeader() {
return
}
le.logger.Printf("👋 Resigning from cluster leadership")
// Delete the leadership key
err := le.kv.Delete("leader")
if err != nil {
le.logger.Printf("⚠️ Failed to delete leadership key: %v", err)
}
le.loseLeadership()
}
// shouldTryElection determines if this node should attempt to become leader
func (le *LeaderElection) shouldTryElection() bool {
// Always try if no current leader
if le.GetLeader() == "" {
return true
}
// Check if current lease is expired
entry, err := le.kv.Get("leader")
if err != nil {
// Can't read lease, try to become leader
return true
}
var currentLease LeadershipLease
if err := json.Unmarshal(entry.Value(), &currentLease); err != nil {
// Invalid lease, try to become leader
return true
}
// Try if lease is expired
return time.Now().After(currentLease.ExpiresAt)
}
// handleLeadershipUpdate processes leadership change notifications
func (le *LeaderElection) handleLeadershipUpdate(entry nats.KeyValueEntry) {
if entry.Operation() == nats.KeyValueDelete {
// Leadership was vacated
le.updateCurrentLeader("", 0)
return
}
var lease LeadershipLease
if err := json.Unmarshal(entry.Value(), &lease); err != nil {
le.logger.Printf("⚠️ Invalid leadership lease: %v", err)
return
}
le.updateCurrentLeader(lease.LeaderID, lease.Term)
}
// updateCurrentLeader updates the current leader information
func (le *LeaderElection) updateCurrentLeader(leaderID string, term uint64) {
le.mutex.Lock()
oldLeader := le.currentLeader
le.currentLeader = leaderID
le.leaderTerm = term
// Update our leadership status
if leaderID == le.nodeID {
le.isLeader = true
} else {
if le.isLeader {
le.isLeader = false
le.mutex.Unlock()
if le.callbacks.OnLostLeader != nil {
le.callbacks.OnLostLeader()
}
le.mutex.Lock()
} else {
le.isLeader = false
}
}
le.mutex.Unlock()
// Notify of leader change
if oldLeader != leaderID && leaderID != "" && leaderID != le.nodeID {
le.logger.Printf("🔄 New cluster leader: %s (term %d)", leaderID, term)
if le.callbacks.OnNewLeader != nil {
le.callbacks.OnNewLeader(leaderID)
}
}
}

331
cluster/manager.go Normal file
View File

@@ -0,0 +1,331 @@
package cluster
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"sync"
"time"
"github.com/nats-io/nats.go"
)
// VMRegistry provides access to local VM information for cluster operations
type VMRegistry interface {
GetActiveVMs() map[string]interface{} // VirtualMachine interface to avoid import cycles
GetShard(actorID string) int
}
// ClusterManager coordinates distributed VM operations across the cluster
type ClusterManager struct {
nodeID string
nodes map[string]*NodeInfo
nodeUpdates chan NodeUpdate
shardMap *ShardMap
hashRing *ConsistentHashRing
election *LeaderElection
natsConn *nats.Conn
ctx context.Context
mutex sync.RWMutex
logger *log.Logger
vmRegistry VMRegistry // Interface to access local VMs
}
// NewClusterManager creates a cluster coordination manager
func NewClusterManager(nodeID string, natsConn *nats.Conn, ctx context.Context) (*ClusterManager, error) {
cm := &ClusterManager{
nodeID: nodeID,
nodes: make(map[string]*NodeInfo),
nodeUpdates: make(chan NodeUpdate, 100),
shardMap: &ShardMap{Shards: make(map[int][]string), Nodes: make(map[string]NodeInfo)},
hashRing: NewConsistentHashRing(),
natsConn: natsConn,
ctx: ctx,
logger: log.New(os.Stdout, fmt.Sprintf("[ClusterMgr %s] ", nodeID), log.LstdFlags),
vmRegistry: nil, // Will be set later via SetVMRegistry
}
// Create leadership election with callbacks
callbacks := LeaderElectionCallbacks{
OnBecameLeader: func() {
cm.logger.Printf("👑 This node became the cluster leader - can initiate rebalancing")
},
OnLostLeader: func() {
cm.logger.Printf("📉 This node lost cluster leadership")
},
OnNewLeader: func(leaderID string) {
cm.logger.Printf("🔄 Cluster leadership changed to: %s", leaderID)
},
}
election, err := NewLeaderElection(nodeID, natsConn, callbacks)
if err != nil {
return nil, fmt.Errorf("failed to create leader election: %w", err)
}
cm.election = election
return cm, nil
}
// Start begins cluster management operations
func (cm *ClusterManager) Start() {
cm.logger.Printf("🚀 Starting cluster manager")
// Start leader election
cm.election.Start()
// Subscribe to cluster messages
cm.natsConn.Subscribe("aether.cluster.*", cm.handleClusterMessage)
// Start node monitoring
go cm.monitorNodes()
// Start shard rebalancing (only if leader)
go cm.rebalanceLoop()
}
// Stop gracefully stops the cluster manager
func (cm *ClusterManager) Stop() {
cm.logger.Printf("🛑 Stopping cluster manager")
if cm.election != nil {
cm.election.Stop()
}
}
// IsLeader returns whether this node is the cluster leader
func (cm *ClusterManager) IsLeader() bool {
if cm.election == nil {
return false
}
return cm.election.IsLeader()
}
// GetLeader returns the current cluster leader ID
func (cm *ClusterManager) GetLeader() string {
if cm.election == nil {
return ""
}
return cm.election.GetLeader()
}
// SetVMRegistry sets the VM registry for accessing local VM information
func (cm *ClusterManager) SetVMRegistry(registry VMRegistry) {
cm.vmRegistry = registry
}
// GetActorsInShard returns actors that belong to a specific shard on this node
func (cm *ClusterManager) GetActorsInShard(shardID int) []string {
if cm.vmRegistry == nil {
return []string{}
}
activeVMs := cm.vmRegistry.GetActiveVMs()
var actors []string
for actorID := range activeVMs {
if cm.vmRegistry.GetShard(actorID) == shardID {
actors = append(actors, actorID)
}
}
return actors
}
// handleClusterMessage processes incoming cluster coordination messages
func (cm *ClusterManager) handleClusterMessage(msg *nats.Msg) {
var clusterMsg ClusterMessage
if err := json.Unmarshal(msg.Data, &clusterMsg); err != nil {
cm.logger.Printf("⚠️ Invalid cluster message: %v", err)
return
}
switch clusterMsg.Type {
case "rebalance":
cm.handleRebalanceRequest(clusterMsg)
case "migrate":
cm.handleMigrationRequest(clusterMsg)
case "node_update":
if update, ok := clusterMsg.Payload.(NodeUpdate); ok {
cm.handleNodeUpdate(update)
}
default:
cm.logger.Printf("⚠️ Unknown cluster message type: %s", clusterMsg.Type)
}
}
// handleNodeUpdate processes node status updates
func (cm *ClusterManager) handleNodeUpdate(update NodeUpdate) {
cm.mutex.Lock()
defer cm.mutex.Unlock()
switch update.Type {
case NodeJoined:
cm.nodes[update.Node.ID] = update.Node
cm.hashRing.AddNode(update.Node.ID)
cm.logger.Printf(" Node joined: %s", update.Node.ID)
case NodeLeft:
delete(cm.nodes, update.Node.ID)
cm.hashRing.RemoveNode(update.Node.ID)
cm.logger.Printf(" Node left: %s", update.Node.ID)
case NodeUpdated:
if node, exists := cm.nodes[update.Node.ID]; exists {
// Update existing node info
*node = *update.Node
} else {
// New node
cm.nodes[update.Node.ID] = update.Node
cm.hashRing.AddNode(update.Node.ID)
}
}
// Check for failed nodes and mark them
now := time.Now()
for _, node := range cm.nodes {
if now.Sub(node.LastSeen) > 90*time.Second && node.Status != NodeStatusFailed {
node.Status = NodeStatusFailed
cm.logger.Printf("❌ Node marked as failed: %s (last seen: %s)",
node.ID, node.LastSeen.Format(time.RFC3339))
}
}
// Trigger rebalancing if we're the leader and there are significant changes
if cm.IsLeader() {
activeNodeCount := 0
for _, node := range cm.nodes {
if node.Status == NodeStatusActive {
activeNodeCount++
}
}
// Simple trigger: rebalance if we have different number of active nodes
// than shards assigned (this is a simplified logic)
if activeNodeCount > 0 {
cm.triggerShardRebalancing("node topology changed")
}
}
}
// handleRebalanceRequest processes cluster rebalancing requests
func (cm *ClusterManager) handleRebalanceRequest(msg ClusterMessage) {
cm.logger.Printf("🔄 Handling rebalance request from %s", msg.From)
// Implementation would handle the specific rebalancing logic
// This is a simplified version
}
// handleMigrationRequest processes actor migration requests
func (cm *ClusterManager) handleMigrationRequest(msg ClusterMessage) {
cm.logger.Printf("🚚 Handling migration request from %s", msg.From)
// Implementation would handle the specific migration logic
// This is a simplified version
}
// triggerShardRebalancing initiates shard rebalancing across the cluster
func (cm *ClusterManager) triggerShardRebalancing(reason string) {
if !cm.IsLeader() {
return // Only leader can initiate rebalancing
}
cm.logger.Printf("⚖️ Triggering shard rebalancing: %s", reason)
// Get active nodes
var activeNodes []*NodeInfo
cm.mutex.RLock()
for _, node := range cm.nodes {
if node.Status == NodeStatusActive {
activeNodes = append(activeNodes, node)
}
}
cm.mutex.RUnlock()
if len(activeNodes) == 0 {
cm.logger.Printf("⚠️ No active nodes available for rebalancing")
return
}
// This would implement the actual rebalancing logic
cm.logger.Printf("🎯 Would rebalance across %d active nodes", len(activeNodes))
}
// monitorNodes periodically checks node health and updates
func (cm *ClusterManager) monitorNodes() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
// Health check logic would go here
cm.checkNodeHealth()
case <-cm.ctx.Done():
return
}
}
}
// checkNodeHealth verifies the health of known nodes
func (cm *ClusterManager) checkNodeHealth() {
cm.mutex.Lock()
defer cm.mutex.Unlock()
now := time.Now()
for _, node := range cm.nodes {
if now.Sub(node.LastSeen) > 90*time.Second && node.Status == NodeStatusActive {
node.Status = NodeStatusFailed
cm.logger.Printf("💔 Node failed: %s", node.ID)
}
}
}
// rebalanceLoop runs periodic rebalancing checks (leader only)
func (cm *ClusterManager) rebalanceLoop() {
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if cm.IsLeader() {
cm.triggerShardRebalancing("periodic rebalance check")
}
case <-cm.ctx.Done():
return
}
}
}
// GetNodes returns a copy of the current cluster nodes
func (cm *ClusterManager) GetNodes() map[string]*NodeInfo {
cm.mutex.RLock()
defer cm.mutex.RUnlock()
nodes := make(map[string]*NodeInfo)
for id, node := range cm.nodes {
// Create a copy to prevent external mutation
nodeCopy := *node
nodes[id] = &nodeCopy
}
return nodes
}
// GetShardMap returns the current shard mapping
func (cm *ClusterManager) GetShardMap() *ShardMap {
cm.mutex.RLock()
defer cm.mutex.RUnlock()
// Return a copy to prevent external mutation
return &ShardMap{
Version: cm.shardMap.Version,
Shards: make(map[int][]string),
Nodes: make(map[string]NodeInfo),
UpdateTime: cm.shardMap.UpdateTime,
}
}

188
cluster/shard.go Normal file
View File

@@ -0,0 +1,188 @@
package cluster
import (
"crypto/sha256"
"encoding/binary"
"fmt"
"hash"
"hash/fnv"
)
// MigrationStatus tracks actor migration progress
type MigrationStatus string
const (
MigrationPending MigrationStatus = "pending"
MigrationInProgress MigrationStatus = "in_progress"
MigrationCompleted MigrationStatus = "completed"
MigrationFailed MigrationStatus = "failed"
)
// PlacementStrategy determines where to place new actors
type PlacementStrategy interface {
PlaceActor(actorID string, shardMap *ShardMap, nodes map[string]*NodeInfo) (string, error)
RebalanceShards(shardMap *ShardMap, nodes map[string]*NodeInfo) (*ShardMap, error)
}
// ShardManager handles actor placement and distribution
type ShardManager struct {
shardCount int
shardMap *ShardMap
hasher hash.Hash
placement PlacementStrategy
replication int
}
// NewShardManager creates a new shard manager
func NewShardManager(shardCount, replication int) *ShardManager {
return &ShardManager{
shardCount: shardCount,
shardMap: &ShardMap{Shards: make(map[int][]string), Nodes: make(map[string]NodeInfo)},
hasher: fnv.New64a(),
placement: &ConsistentHashPlacement{},
replication: replication,
}
}
// GetShard returns the shard number for a given actor ID
func (sm *ShardManager) GetShard(actorID string) int {
h := sha256.Sum256([]byte(actorID))
shardID := binary.BigEndian.Uint32(h[:4]) % uint32(sm.shardCount)
return int(shardID)
}
// GetShardNodes returns the nodes responsible for a shard
func (sm *ShardManager) GetShardNodes(shardID int) []string {
if nodes, exists := sm.shardMap.Shards[shardID]; exists {
return nodes
}
return []string{}
}
// AssignShard assigns a shard to specific nodes
func (sm *ShardManager) AssignShard(shardID int, nodes []string) {
if sm.shardMap.Shards == nil {
sm.shardMap.Shards = make(map[int][]string)
}
sm.shardMap.Shards[shardID] = nodes
}
// GetPrimaryNode returns the primary node for a shard
func (sm *ShardManager) GetPrimaryNode(shardID int) string {
nodes := sm.GetShardNodes(shardID)
if len(nodes) > 0 {
return nodes[0] // First node is primary
}
return ""
}
// GetReplicaNodes returns the replica nodes for a shard
func (sm *ShardManager) GetReplicaNodes(shardID int) []string {
nodes := sm.GetShardNodes(shardID)
if len(nodes) > 1 {
return nodes[1:] // All nodes except first are replicas
}
return []string{}
}
// UpdateShardMap updates the entire shard map
func (sm *ShardManager) UpdateShardMap(newShardMap *ShardMap) {
sm.shardMap = newShardMap
}
// GetShardMap returns a copy of the current shard map
func (sm *ShardManager) GetShardMap() *ShardMap {
// Return a deep copy to prevent external mutation
copy := &ShardMap{
Version: sm.shardMap.Version,
Shards: make(map[int][]string),
Nodes: make(map[string]NodeInfo),
UpdateTime: sm.shardMap.UpdateTime,
}
// Copy the shard assignments
for shardID, nodes := range sm.shardMap.Shards {
copy.Shards[shardID] = append([]string(nil), nodes...)
}
// Copy the node info
for nodeID, nodeInfo := range sm.shardMap.Nodes {
copy.Nodes[nodeID] = nodeInfo
}
return copy
}
// RebalanceShards redistributes shards across available nodes
func (sm *ShardManager) RebalanceShards(nodes map[string]*NodeInfo) (*ShardMap, error) {
if sm.placement == nil {
return nil, fmt.Errorf("no placement strategy configured")
}
return sm.placement.RebalanceShards(sm.shardMap, nodes)
}
// PlaceActor determines which node should handle a new actor
func (sm *ShardManager) PlaceActor(actorID string, nodes map[string]*NodeInfo) (string, error) {
if sm.placement == nil {
return "", fmt.Errorf("no placement strategy configured")
}
return sm.placement.PlaceActor(actorID, sm.shardMap, nodes)
}
// GetActorsInShard returns actors that belong to a specific shard on a specific node
func (sm *ShardManager) GetActorsInShard(shardID int, nodeID string, vmRegistry VMRegistry) []string {
if vmRegistry == nil {
return []string{}
}
activeVMs := vmRegistry.GetActiveVMs()
var actors []string
for actorID := range activeVMs {
if sm.GetShard(actorID) == shardID {
actors = append(actors, actorID)
}
}
return actors
}
// ConsistentHashPlacement implements PlacementStrategy using consistent hashing
type ConsistentHashPlacement struct{}
// PlaceActor places an actor using consistent hashing
func (chp *ConsistentHashPlacement) PlaceActor(actorID string, shardMap *ShardMap, nodes map[string]*NodeInfo) (string, error) {
if len(nodes) == 0 {
return "", fmt.Errorf("no nodes available for placement")
}
// Simple consistent hash placement - in a real implementation,
// this would use the consistent hash ring
h := sha256.Sum256([]byte(actorID))
nodeIndex := binary.BigEndian.Uint32(h[:4]) % uint32(len(nodes))
i := 0
for nodeID := range nodes {
if i == int(nodeIndex) {
return nodeID, nil
}
i++
}
// Fallback to first node
for nodeID := range nodes {
return nodeID, nil
}
return "", fmt.Errorf("failed to place actor")
}
// RebalanceShards rebalances shards across nodes
func (chp *ConsistentHashPlacement) RebalanceShards(currentMap *ShardMap, nodes map[string]*NodeInfo) (*ShardMap, error) {
// This is a simplified implementation
// In practice, this would implement sophisticated rebalancing logic
return currentMap, nil
}

110
cluster/types.go Normal file
View File

@@ -0,0 +1,110 @@
package cluster
import (
"time"
)
const (
// NumShards defines the total number of shards in the cluster
NumShards = 1024
// VirtualNodes defines the number of virtual nodes per physical node for consistent hashing
VirtualNodes = 150
// Leadership election constants
LeaderLeaseTimeout = 10 * time.Second // How long a leader lease lasts
HeartbeatInterval = 3 * time.Second // How often leader sends heartbeats
ElectionTimeout = 2 * time.Second // How long to wait for election
)
// NodeStatus represents the health status of a node
type NodeStatus string
const (
NodeStatusActive NodeStatus = "active"
NodeStatusDraining NodeStatus = "draining"
NodeStatusFailed NodeStatus = "failed"
)
// NodeInfo represents information about a cluster node
type NodeInfo struct {
ID string `json:"id"`
Address string `json:"address"`
Port int `json:"port"`
Status NodeStatus `json:"status"`
Capacity float64 `json:"capacity"` // Maximum load capacity
Load float64 `json:"load"` // Current CPU/memory load
LastSeen time.Time `json:"lastSeen"` // Last heartbeat timestamp
Timestamp time.Time `json:"timestamp"`
Metadata map[string]string `json:"metadata"`
IsLeader bool `json:"isLeader"`
VMCount int `json:"vmCount"` // Number of VMs on this node
ShardIDs []int `json:"shardIds"` // Shards assigned to this node
}
// NodeUpdateType represents the type of node update
type NodeUpdateType string
const (
NodeJoined NodeUpdateType = "joined"
NodeLeft NodeUpdateType = "left"
NodeUpdated NodeUpdateType = "updated"
)
// NodeUpdate represents a node status update
type NodeUpdate struct {
Type NodeUpdateType `json:"type"`
Node *NodeInfo `json:"node"`
}
// ShardMap represents the distribution of shards across cluster nodes
type ShardMap struct {
Version uint64 `json:"version"` // Incremented on each change
Shards map[int][]string `json:"shards"` // shard ID -> [primary, replica1, replica2]
Nodes map[string]NodeInfo `json:"nodes"` // node ID -> node info
UpdateTime time.Time `json:"updateTime"`
}
// ClusterMessage represents inter-node communication
type ClusterMessage struct {
Type string `json:"type"`
From string `json:"from"`
To string `json:"to"`
Payload interface{} `json:"payload"`
Timestamp time.Time `json:"timestamp"`
}
// RebalanceRequest represents a request to rebalance shards
type RebalanceRequest struct {
RequestID string `json:"requestId"`
FromNode string `json:"fromNode"`
ToNode string `json:"toNode"`
ShardIDs []int `json:"shardIds"`
Reason string `json:"reason"`
Migrations []ActorMigration `json:"migrations"`
}
// ActorMigration represents the migration of an actor between nodes
type ActorMigration struct {
ActorID string `json:"actorId"`
FromNode string `json:"fromNode"`
ToNode string `json:"toNode"`
ShardID int `json:"shardId"`
State map[string]interface{} `json:"state"`
Version int64 `json:"version"`
Status string `json:"status"` // "pending", "in_progress", "completed", "failed"
}
// LeaderElectionCallbacks defines callbacks for leadership changes
type LeaderElectionCallbacks struct {
OnBecameLeader func()
OnLostLeader func()
OnNewLeader func(leaderID string)
}
// LeadershipLease represents a leadership lease in the cluster
type LeadershipLease struct {
LeaderID string `json:"leaderId"`
Term uint64 `json:"term"`
ExpiresAt time.Time `json:"expiresAt"`
StartedAt time.Time `json:"startedAt"`
}