Initial aether repository structure

Distributed actor system with event sourcing for Go: - event.go - Event, ActorSnapshot, EventStore interface - eventbus.go - EventBus, EventBroadcaster for pub/sub - nats_eventbus.go - NATS-backed cross-node event broadcasting - store/ - InMemoryEventStore (testing), JetStreamEventStore (production) - cluster/ - Node discovery, leader election, shard distribution - model/ - EventStorming model types Extracted from arcadia as open-source infrastructure component. Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-08 19:30:02 +01:00
commit e9e50c021f
22 changed files with 2588 additions and 0 deletions
--- a/cluster/distributed.go
+++ b/cluster/distributed.go
@@ -0,0 +1,221 @@
+package cluster
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/nats-io/nats.go"
+)
+
+// DistributedVM manages a cluster of runtime nodes with VM-per-instance architecture
+type DistributedVM struct {
+	nodeID       string
+	cluster      *ClusterManager
+	localRuntime Runtime // Interface to avoid import cycles
+	sharding     *ShardManager
+	discovery    *NodeDiscovery
+	natsConn     *nats.Conn
+	ctx          context.Context
+	cancel       context.CancelFunc
+}
+
+// Runtime interface to avoid import cycles with main aether package
+type Runtime interface {
+	Start() error
+	LoadModel(model interface{}) error
+	SendMessage(message interface{}) error
+}
+
+// DistributedVMRegistry implements VMRegistry using DistributedVM's local runtime and sharding
+type DistributedVMRegistry struct {
+	runtime  interface{} // Runtime interface to avoid import cycles
+	sharding *ShardManager
+}
+
+// NewDistributedVM creates a distributed VM runtime cluster node
+func NewDistributedVM(nodeID string, natsURLs []string, localRuntime Runtime) (*DistributedVM, error) {
+	ctx, cancel := context.WithCancel(context.Background())
+
+	// Connect to NATS cluster
+	natsURL := natsURLs[0] // Use first URL for simplicity
+	natsConn, err := nats.Connect(natsURL,
+		nats.Name(fmt.Sprintf("aether-runtime-%s", nodeID)))
+	if err != nil {
+		cancel()
+		return nil, fmt.Errorf("failed to connect to NATS: %w", err)
+	}
+
+	// Create cluster components
+	discovery := NewNodeDiscovery(nodeID, natsConn, ctx)
+	sharding := NewShardManager(1024, 3) // 1024 shards, 3 replicas
+	cluster, err := NewClusterManager(nodeID, natsConn, ctx)
+	if err != nil {
+		cancel()
+		natsConn.Close()
+		return nil, fmt.Errorf("failed to create cluster manager: %w", err)
+	}
+
+	dvm := &DistributedVM{
+		nodeID:       nodeID,
+		cluster:      cluster,
+		localRuntime: localRuntime,
+		sharding:     sharding,
+		discovery:    discovery,
+		natsConn:     natsConn,
+		ctx:          ctx,
+		cancel:       cancel,
+	}
+
+	// Create VM registry and connect it to cluster manager
+	vmRegistry := &DistributedVMRegistry{
+		runtime:  localRuntime,
+		sharding: sharding,
+	}
+	cluster.SetVMRegistry(vmRegistry)
+
+	return dvm, nil
+}
+
+// Start begins the distributed VM cluster node
+func (dvm *DistributedVM) Start() error {
+	// Start local runtime
+	if err := dvm.localRuntime.Start(); err != nil {
+		return fmt.Errorf("failed to start local runtime: %w", err)
+	}
+
+	// Start cluster services
+	go dvm.discovery.Start()
+	go dvm.cluster.Start()
+
+	// Start message routing
+	go dvm.startMessageRouting()
+
+	return nil
+}
+
+// Stop gracefully shuts down the distributed VM node
+func (dvm *DistributedVM) Stop() {
+	dvm.cancel()
+	dvm.cluster.Stop()
+	dvm.discovery.Stop()
+	dvm.natsConn.Close()
+}
+
+// LoadModel distributes EventStorming model across the cluster with VM templates
+func (dvm *DistributedVM) LoadModel(model interface{}) error {
+	// Load model locally first
+	if err := dvm.localRuntime.LoadModel(model); err != nil {
+		return fmt.Errorf("failed to load model locally: %w", err)
+	}
+
+	// Broadcast model to other cluster nodes
+	msg := ClusterMessage{
+		Type:    "load_model",
+		From:    dvm.nodeID,
+		To:      "broadcast",
+		Payload: model,
+	}
+
+	return dvm.publishClusterMessage(msg)
+}
+
+// SendMessage routes messages across the distributed cluster
+func (dvm *DistributedVM) SendMessage(message interface{}) error {
+	// This is a simplified implementation
+	// In practice, this would determine the target node based on sharding
+	// and route the message appropriately
+
+	return dvm.localRuntime.SendMessage(message)
+}
+
+// GetActorNode determines which node should handle a specific actor
+func (dvm *DistributedVM) GetActorNode(actorID string) string {
+	// Use consistent hashing to determine the target node
+	return dvm.cluster.hashRing.GetNode(actorID)
+}
+
+// IsLocalActor checks if an actor should be handled by this node
+func (dvm *DistributedVM) IsLocalActor(actorID string) bool {
+	targetNode := dvm.GetActorNode(actorID)
+	return targetNode == dvm.nodeID
+}
+
+// GetActorsInShard returns actors that belong to a specific shard on this node
+func (dvm *DistributedVM) GetActorsInShard(shardID int) []string {
+	return dvm.cluster.GetActorsInShard(shardID)
+}
+
+// startMessageRouting begins routing messages between cluster nodes
+func (dvm *DistributedVM) startMessageRouting() {
+	// Subscribe to cluster messages
+	dvm.natsConn.Subscribe("aether.distributed.*", dvm.handleClusterMessage)
+}
+
+// handleClusterMessage processes incoming cluster coordination messages
+func (dvm *DistributedVM) handleClusterMessage(msg *nats.Msg) {
+	var clusterMsg ClusterMessage
+	if err := json.Unmarshal(msg.Data, &clusterMsg); err != nil {
+		return
+	}
+
+	switch clusterMsg.Type {
+	case "load_model":
+		// Handle model loading from other nodes
+		if model := clusterMsg.Payload; model != nil {
+			dvm.localRuntime.LoadModel(model)
+		}
+
+	case "route_message":
+		// Handle message routing from other nodes
+		if message := clusterMsg.Payload; message != nil {
+			dvm.localRuntime.SendMessage(message)
+		}
+
+	case "rebalance":
+		// Handle shard rebalancing requests
+		dvm.handleRebalanceRequest(clusterMsg)
+	}
+}
+
+// handleRebalanceRequest processes shard rebalancing requests
+func (dvm *DistributedVM) handleRebalanceRequest(msg ClusterMessage) {
+	// Simplified rebalancing logic
+	// In practice, this would implement complex actor migration
+}
+
+// publishClusterMessage sends a message to other cluster nodes
+func (dvm *DistributedVM) publishClusterMessage(msg ClusterMessage) error {
+	data, err := json.Marshal(msg)
+	if err != nil {
+		return err
+	}
+
+	subject := fmt.Sprintf("aether.distributed.%s", msg.Type)
+	return dvm.natsConn.Publish(subject, data)
+}
+
+// GetClusterInfo returns information about the cluster state
+func (dvm *DistributedVM) GetClusterInfo() map[string]interface{} {
+	nodes := dvm.cluster.GetNodes()
+
+	return map[string]interface{}{
+		"nodeId":     dvm.nodeID,
+		"isLeader":   dvm.cluster.IsLeader(),
+		"leader":     dvm.cluster.GetLeader(),
+		"nodeCount":  len(nodes),
+		"nodes":      nodes,
+	}
+}
+
+// GetActiveVMs returns a map of active VMs (implementation depends on runtime)
+func (dvr *DistributedVMRegistry) GetActiveVMs() map[string]interface{} {
+	// This would need to access the actual runtime's VM registry
+	// For now, return empty map to avoid import cycles
+	return make(map[string]interface{})
+}
+
+// GetShard returns the shard number for the given actor ID
+func (dvr *DistributedVMRegistry) GetShard(actorID string) int {
+	return dvr.sharding.GetShard(actorID)
+}