Initial aether repository structure
All checks were successful
CI / build (push) Successful in 1m13s
All checks were successful
CI / build (push) Successful in 1m13s
Distributed actor system with event sourcing for Go: - event.go - Event, ActorSnapshot, EventStore interface - eventbus.go - EventBus, EventBroadcaster for pub/sub - nats_eventbus.go - NATS-backed cross-node event broadcasting - store/ - InMemoryEventStore (testing), JetStreamEventStore (production) - cluster/ - Node discovery, leader election, shard distribution - model/ - EventStorming model types Extracted from arcadia as open-source infrastructure component. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
221
cluster/distributed.go
Normal file
221
cluster/distributed.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/nats-io/nats.go"
|
||||
)
|
||||
|
||||
// DistributedVM manages a cluster of runtime nodes with VM-per-instance architecture
|
||||
type DistributedVM struct {
|
||||
nodeID string
|
||||
cluster *ClusterManager
|
||||
localRuntime Runtime // Interface to avoid import cycles
|
||||
sharding *ShardManager
|
||||
discovery *NodeDiscovery
|
||||
natsConn *nats.Conn
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// Runtime interface to avoid import cycles with main aether package
|
||||
type Runtime interface {
|
||||
Start() error
|
||||
LoadModel(model interface{}) error
|
||||
SendMessage(message interface{}) error
|
||||
}
|
||||
|
||||
// DistributedVMRegistry implements VMRegistry using DistributedVM's local runtime and sharding
|
||||
type DistributedVMRegistry struct {
|
||||
runtime interface{} // Runtime interface to avoid import cycles
|
||||
sharding *ShardManager
|
||||
}
|
||||
|
||||
// NewDistributedVM creates a distributed VM runtime cluster node
|
||||
func NewDistributedVM(nodeID string, natsURLs []string, localRuntime Runtime) (*DistributedVM, error) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Connect to NATS cluster
|
||||
natsURL := natsURLs[0] // Use first URL for simplicity
|
||||
natsConn, err := nats.Connect(natsURL,
|
||||
nats.Name(fmt.Sprintf("aether-runtime-%s", nodeID)))
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to connect to NATS: %w", err)
|
||||
}
|
||||
|
||||
// Create cluster components
|
||||
discovery := NewNodeDiscovery(nodeID, natsConn, ctx)
|
||||
sharding := NewShardManager(1024, 3) // 1024 shards, 3 replicas
|
||||
cluster, err := NewClusterManager(nodeID, natsConn, ctx)
|
||||
if err != nil {
|
||||
cancel()
|
||||
natsConn.Close()
|
||||
return nil, fmt.Errorf("failed to create cluster manager: %w", err)
|
||||
}
|
||||
|
||||
dvm := &DistributedVM{
|
||||
nodeID: nodeID,
|
||||
cluster: cluster,
|
||||
localRuntime: localRuntime,
|
||||
sharding: sharding,
|
||||
discovery: discovery,
|
||||
natsConn: natsConn,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
|
||||
// Create VM registry and connect it to cluster manager
|
||||
vmRegistry := &DistributedVMRegistry{
|
||||
runtime: localRuntime,
|
||||
sharding: sharding,
|
||||
}
|
||||
cluster.SetVMRegistry(vmRegistry)
|
||||
|
||||
return dvm, nil
|
||||
}
|
||||
|
||||
// Start begins the distributed VM cluster node
|
||||
func (dvm *DistributedVM) Start() error {
|
||||
// Start local runtime
|
||||
if err := dvm.localRuntime.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start local runtime: %w", err)
|
||||
}
|
||||
|
||||
// Start cluster services
|
||||
go dvm.discovery.Start()
|
||||
go dvm.cluster.Start()
|
||||
|
||||
// Start message routing
|
||||
go dvm.startMessageRouting()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop gracefully shuts down the distributed VM node
|
||||
func (dvm *DistributedVM) Stop() {
|
||||
dvm.cancel()
|
||||
dvm.cluster.Stop()
|
||||
dvm.discovery.Stop()
|
||||
dvm.natsConn.Close()
|
||||
}
|
||||
|
||||
// LoadModel distributes EventStorming model across the cluster with VM templates
|
||||
func (dvm *DistributedVM) LoadModel(model interface{}) error {
|
||||
// Load model locally first
|
||||
if err := dvm.localRuntime.LoadModel(model); err != nil {
|
||||
return fmt.Errorf("failed to load model locally: %w", err)
|
||||
}
|
||||
|
||||
// Broadcast model to other cluster nodes
|
||||
msg := ClusterMessage{
|
||||
Type: "load_model",
|
||||
From: dvm.nodeID,
|
||||
To: "broadcast",
|
||||
Payload: model,
|
||||
}
|
||||
|
||||
return dvm.publishClusterMessage(msg)
|
||||
}
|
||||
|
||||
// SendMessage routes messages across the distributed cluster
|
||||
func (dvm *DistributedVM) SendMessage(message interface{}) error {
|
||||
// This is a simplified implementation
|
||||
// In practice, this would determine the target node based on sharding
|
||||
// and route the message appropriately
|
||||
|
||||
return dvm.localRuntime.SendMessage(message)
|
||||
}
|
||||
|
||||
// GetActorNode determines which node should handle a specific actor
|
||||
func (dvm *DistributedVM) GetActorNode(actorID string) string {
|
||||
// Use consistent hashing to determine the target node
|
||||
return dvm.cluster.hashRing.GetNode(actorID)
|
||||
}
|
||||
|
||||
// IsLocalActor checks if an actor should be handled by this node
|
||||
func (dvm *DistributedVM) IsLocalActor(actorID string) bool {
|
||||
targetNode := dvm.GetActorNode(actorID)
|
||||
return targetNode == dvm.nodeID
|
||||
}
|
||||
|
||||
// GetActorsInShard returns actors that belong to a specific shard on this node
|
||||
func (dvm *DistributedVM) GetActorsInShard(shardID int) []string {
|
||||
return dvm.cluster.GetActorsInShard(shardID)
|
||||
}
|
||||
|
||||
// startMessageRouting begins routing messages between cluster nodes
|
||||
func (dvm *DistributedVM) startMessageRouting() {
|
||||
// Subscribe to cluster messages
|
||||
dvm.natsConn.Subscribe("aether.distributed.*", dvm.handleClusterMessage)
|
||||
}
|
||||
|
||||
// handleClusterMessage processes incoming cluster coordination messages
|
||||
func (dvm *DistributedVM) handleClusterMessage(msg *nats.Msg) {
|
||||
var clusterMsg ClusterMessage
|
||||
if err := json.Unmarshal(msg.Data, &clusterMsg); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch clusterMsg.Type {
|
||||
case "load_model":
|
||||
// Handle model loading from other nodes
|
||||
if model := clusterMsg.Payload; model != nil {
|
||||
dvm.localRuntime.LoadModel(model)
|
||||
}
|
||||
|
||||
case "route_message":
|
||||
// Handle message routing from other nodes
|
||||
if message := clusterMsg.Payload; message != nil {
|
||||
dvm.localRuntime.SendMessage(message)
|
||||
}
|
||||
|
||||
case "rebalance":
|
||||
// Handle shard rebalancing requests
|
||||
dvm.handleRebalanceRequest(clusterMsg)
|
||||
}
|
||||
}
|
||||
|
||||
// handleRebalanceRequest processes shard rebalancing requests
|
||||
func (dvm *DistributedVM) handleRebalanceRequest(msg ClusterMessage) {
|
||||
// Simplified rebalancing logic
|
||||
// In practice, this would implement complex actor migration
|
||||
}
|
||||
|
||||
// publishClusterMessage sends a message to other cluster nodes
|
||||
func (dvm *DistributedVM) publishClusterMessage(msg ClusterMessage) error {
|
||||
data, err := json.Marshal(msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("aether.distributed.%s", msg.Type)
|
||||
return dvm.natsConn.Publish(subject, data)
|
||||
}
|
||||
|
||||
// GetClusterInfo returns information about the cluster state
|
||||
func (dvm *DistributedVM) GetClusterInfo() map[string]interface{} {
|
||||
nodes := dvm.cluster.GetNodes()
|
||||
|
||||
return map[string]interface{}{
|
||||
"nodeId": dvm.nodeID,
|
||||
"isLeader": dvm.cluster.IsLeader(),
|
||||
"leader": dvm.cluster.GetLeader(),
|
||||
"nodeCount": len(nodes),
|
||||
"nodes": nodes,
|
||||
}
|
||||
}
|
||||
|
||||
// GetActiveVMs returns a map of active VMs (implementation depends on runtime)
|
||||
func (dvr *DistributedVMRegistry) GetActiveVMs() map[string]interface{} {
|
||||
// This would need to access the actual runtime's VM registry
|
||||
// For now, return empty map to avoid import cycles
|
||||
return make(map[string]interface{})
|
||||
}
|
||||
|
||||
// GetShard returns the shard number for the given actor ID
|
||||
func (dvr *DistributedVMRegistry) GetShard(actorID string) int {
|
||||
return dvr.sharding.GetShard(actorID)
|
||||
}
|
||||
Reference in New Issue
Block a user