diff --git a/cluster/shard_test.go b/cluster/shard_test.go new file mode 100644 index 0000000..1128f23 --- /dev/null +++ b/cluster/shard_test.go @@ -0,0 +1,713 @@ +package cluster + +import ( + "fmt" + "testing" +) + +func TestNewShardManager(t *testing.T) { + sm := NewShardManager(16, 3) + + if sm == nil { + t.Fatal("NewShardManager returned nil") + } + if sm.shardCount != 16 { + t.Errorf("expected shardCount 16, got %d", sm.shardCount) + } + if sm.replication != 3 { + t.Errorf("expected replication 3, got %d", sm.replication) + } + if sm.shardMap == nil { + t.Error("shardMap is nil") + } + if sm.placement == nil { + t.Error("placement strategy is nil") + } +} + +func TestNewShardManager_DefaultsForZeroValues(t *testing.T) { + sm := NewShardManagerWithConfig(ShardConfig{}) + + if sm.shardCount != DefaultNumShards { + t.Errorf("expected default shardCount %d, got %d", DefaultNumShards, sm.shardCount) + } + if sm.replication != 1 { + t.Errorf("expected default replication 1, got %d", sm.replication) + } +} + +func TestNewShardManagerWithConfig_CustomValues(t *testing.T) { + config := ShardConfig{ + ShardCount: 256, + ReplicationFactor: 2, + } + sm := NewShardManagerWithConfig(config) + + if sm.shardCount != 256 { + t.Errorf("expected shardCount 256, got %d", sm.shardCount) + } + if sm.replication != 2 { + t.Errorf("expected replication 2, got %d", sm.replication) + } +} + +func TestGetShard_ReturnsCorrectShardForActor(t *testing.T) { + sm := NewShardManager(16, 1) + + // Test that GetShard returns consistent results + actorID := "actor-123" + shard1 := sm.GetShard(actorID) + shard2 := sm.GetShard(actorID) + + if shard1 != shard2 { + t.Errorf("GetShard not consistent: got %d and %d for same actor", shard1, shard2) + } + + // Verify shard is within valid range + if shard1 < 0 || shard1 >= 16 { + t.Errorf("shard %d is out of range [0, 16)", shard1) + } +} + +func TestGetShard_DifferentActorsCanMapToDifferentShards(t *testing.T) { + sm := NewShardManager(16, 1) + + // With enough actors, we should see different shards + shardsSeen := make(map[int]bool) + for i := 0; i < 100; i++ { + actorID := fmt.Sprintf("actor-%d", i) + shard := sm.GetShard(actorID) + shardsSeen[shard] = true + } + + // We should see multiple different shards + if len(shardsSeen) < 2 { + t.Errorf("expected multiple different shards, got %d unique shards", len(shardsSeen)) + } +} + +func TestGetShard_DistributesActorsAcrossShards(t *testing.T) { + sm := NewShardManager(16, 1) + + distribution := make(map[int]int) + numActors := 1000 + + for i := 0; i < numActors; i++ { + actorID := fmt.Sprintf("actor-%d", i) + shard := sm.GetShard(actorID) + distribution[shard]++ + } + + // Verify all shards are within valid range + for shard := range distribution { + if shard < 0 || shard >= 16 { + t.Errorf("shard %d is out of range [0, 16)", shard) + } + } + + // With good hashing, we should see fairly even distribution + expectedPerShard := numActors / 16 + for shard, count := range distribution { + deviation := float64(count-expectedPerShard) / float64(expectedPerShard) + if deviation > 0.5 || deviation < -0.5 { + t.Logf("shard %d has %d actors (%.1f%% deviation)", shard, count, deviation*100) + } + } +} + +func TestGetShardNodes_EmptyShard(t *testing.T) { + sm := NewShardManager(16, 1) + + nodes := sm.GetShardNodes(0) + + if nodes == nil { + t.Error("GetShardNodes returned nil, expected empty slice") + } + if len(nodes) != 0 { + t.Errorf("expected empty slice for unassigned shard, got %v", nodes) + } +} + +func TestGetShardNodes_ReturnsAssignedNodes(t *testing.T) { + sm := NewShardManager(16, 3) + + // Assign nodes to shard + sm.AssignShard(0, []string{"node-1", "node-2", "node-3"}) + + nodes := sm.GetShardNodes(0) + + if len(nodes) != 3 { + t.Errorf("expected 3 nodes, got %d", len(nodes)) + } + if nodes[0] != "node-1" || nodes[1] != "node-2" || nodes[2] != "node-3" { + t.Errorf("unexpected nodes: %v", nodes) + } +} + +func TestGetShardNodes_NonExistentShard(t *testing.T) { + sm := NewShardManager(16, 1) + + // Query a shard that has no assignments + nodes := sm.GetShardNodes(999) + + if len(nodes) != 0 { + t.Errorf("expected empty slice for non-existent shard, got %v", nodes) + } +} + +func TestAssignShard_CreatesNewAssignment(t *testing.T) { + sm := NewShardManager(16, 1) + + sm.AssignShard(5, []string{"node-a"}) + + nodes := sm.GetShardNodes(5) + if len(nodes) != 1 || nodes[0] != "node-a" { + t.Errorf("expected [node-a], got %v", nodes) + } +} + +func TestAssignShard_UpdatesExistingAssignment(t *testing.T) { + sm := NewShardManager(16, 1) + + sm.AssignShard(5, []string{"node-a"}) + sm.AssignShard(5, []string{"node-b", "node-c"}) + + nodes := sm.GetShardNodes(5) + if len(nodes) != 2 { + t.Errorf("expected 2 nodes, got %d", len(nodes)) + } + if nodes[0] != "node-b" || nodes[1] != "node-c" { + t.Errorf("expected [node-b, node-c], got %v", nodes) + } +} + +func TestAssignShard_MultipleShards(t *testing.T) { + sm := NewShardManager(16, 1) + + sm.AssignShard(0, []string{"node-1"}) + sm.AssignShard(1, []string{"node-2"}) + sm.AssignShard(2, []string{"node-3"}) + + if nodes := sm.GetShardNodes(0); len(nodes) != 1 || nodes[0] != "node-1" { + t.Errorf("shard 0: expected [node-1], got %v", nodes) + } + if nodes := sm.GetShardNodes(1); len(nodes) != 1 || nodes[0] != "node-2" { + t.Errorf("shard 1: expected [node-2], got %v", nodes) + } + if nodes := sm.GetShardNodes(2); len(nodes) != 1 || nodes[0] != "node-3" { + t.Errorf("shard 2: expected [node-3], got %v", nodes) + } +} + +func TestGetPrimaryNode(t *testing.T) { + sm := NewShardManager(16, 3) + + sm.AssignShard(0, []string{"primary", "replica1", "replica2"}) + + primary := sm.GetPrimaryNode(0) + if primary != "primary" { + t.Errorf("expected 'primary', got %q", primary) + } +} + +func TestGetPrimaryNode_EmptyShard(t *testing.T) { + sm := NewShardManager(16, 1) + + primary := sm.GetPrimaryNode(0) + if primary != "" { + t.Errorf("expected empty string for unassigned shard, got %q", primary) + } +} + +func TestGetReplicaNodes(t *testing.T) { + sm := NewShardManager(16, 3) + + sm.AssignShard(0, []string{"primary", "replica1", "replica2"}) + + replicas := sm.GetReplicaNodes(0) + if len(replicas) != 2 { + t.Errorf("expected 2 replicas, got %d", len(replicas)) + } + if replicas[0] != "replica1" || replicas[1] != "replica2" { + t.Errorf("expected [replica1, replica2], got %v", replicas) + } +} + +func TestGetReplicaNodes_SingleNode(t *testing.T) { + sm := NewShardManager(16, 1) + + sm.AssignShard(0, []string{"only-node"}) + + replicas := sm.GetReplicaNodes(0) + if len(replicas) != 0 { + t.Errorf("expected no replicas for single-node shard, got %v", replicas) + } +} + +func TestGetReplicaNodes_EmptyShard(t *testing.T) { + sm := NewShardManager(16, 1) + + replicas := sm.GetReplicaNodes(0) + if len(replicas) != 0 { + t.Errorf("expected empty slice for unassigned shard, got %v", replicas) + } +} + +func TestPlaceActor_NoNodes(t *testing.T) { + sm := NewShardManager(16, 1) + + _, err := sm.PlaceActor("actor-1", map[string]*NodeInfo{}) + + if err == nil { + t.Error("expected error when no nodes available") + } +} + +func TestPlaceActor_SingleNode(t *testing.T) { + sm := NewShardManager(16, 1) + + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1", Status: NodeStatusActive}, + } + + nodeID, err := sm.PlaceActor("actor-1", nodes) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if nodeID != "node-1" { + t.Errorf("expected node-1, got %q", nodeID) + } +} + +func TestPlaceActor_ReturnsValidNode(t *testing.T) { + sm := NewShardManager(16, 1) + + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1", Status: NodeStatusActive}, + "node-2": {ID: "node-2", Status: NodeStatusActive}, + "node-3": {ID: "node-3", Status: NodeStatusActive}, + } + + // PlaceActor should always return one of the available nodes + for i := 0; i < 100; i++ { + nodeID, err := sm.PlaceActor(fmt.Sprintf("actor-%d", i), nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if _, exists := nodes[nodeID]; !exists { + t.Errorf("PlaceActor returned invalid node: %q", nodeID) + } + } +} +func TestPlaceActor_DistributesAcrossNodes(t *testing.T) { + sm := NewShardManager(16, 1) + + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1", Status: NodeStatusActive}, + "node-2": {ID: "node-2", Status: NodeStatusActive}, + "node-3": {ID: "node-3", Status: NodeStatusActive}, + } + + distribution := make(map[string]int) + for i := 0; i < 100; i++ { + nodeID, _ := sm.PlaceActor(fmt.Sprintf("actor-%d", i), nodes) + distribution[nodeID]++ + } + + // Should use multiple nodes + if len(distribution) < 2 { + t.Errorf("expected distribution across multiple nodes, got %v", distribution) + } +} + +func TestUpdateShardMap(t *testing.T) { + sm := NewShardManager(16, 1) + + newMap := &ShardMap{ + Version: 5, + Shards: map[int][]string{ + 0: {"node-a", "node-b"}, + 1: {"node-c"}, + }, + Nodes: map[string]NodeInfo{ + "node-a": {ID: "node-a"}, + "node-b": {ID: "node-b"}, + "node-c": {ID: "node-c"}, + }, + } + + sm.UpdateShardMap(newMap) + + result := sm.GetShardMap() + if result.Version != 5 { + t.Errorf("expected version 5, got %d", result.Version) + } + if len(result.Shards[0]) != 2 { + t.Errorf("expected 2 nodes for shard 0, got %d", len(result.Shards[0])) + } +} + +func TestGetShardMap_ReturnsDeepCopy(t *testing.T) { + sm := NewShardManager(16, 1) + + sm.AssignShard(0, []string{"node-1", "node-2"}) + + copy1 := sm.GetShardMap() + copy2 := sm.GetShardMap() + + // Modify copy1 + copy1.Shards[0][0] = "modified" + copy1.Version = 999 + + // copy2 should be unaffected + if copy2.Shards[0][0] == "modified" { + t.Error("GetShardMap did not return a deep copy (shard nodes modified)") + } + if copy2.Version == 999 { + t.Error("GetShardMap did not return a deep copy (version modified)") + } + + // Original should be unaffected + nodes := sm.GetShardNodes(0) + if nodes[0] == "modified" { + t.Error("original shard map was modified through copy") + } +} + +func TestGetShardCount(t *testing.T) { + sm := NewShardManager(64, 1) + + if sm.GetShardCount() != 64 { + t.Errorf("expected 64, got %d", sm.GetShardCount()) + } +} + +func TestGetReplicationFactor(t *testing.T) { + sm := NewShardManager(16, 3) + + if sm.GetReplicationFactor() != 3 { + t.Errorf("expected 3, got %d", sm.GetReplicationFactor()) + } +} + +func TestRebalanceShards_NoPlacementStrategy(t *testing.T) { + sm := NewShardManager(16, 1) + sm.placement = nil // Remove placement strategy + + _, err := sm.RebalanceShards(map[string]*NodeInfo{}) + + if err == nil { + t.Error("expected error when no placement strategy configured") + } +} + +func TestRebalanceShards_WithNodes(t *testing.T) { + sm := NewShardManager(16, 1) + + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1", Status: NodeStatusActive}, + "node-2": {ID: "node-2", Status: NodeStatusActive}, + } + + result, err := sm.RebalanceShards(nodes) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if result == nil { + t.Error("expected non-nil result") + } +} + +// Test shard assignment with node failures +func TestShardAssignment_NodeFailure(t *testing.T) { + sm := NewShardManager(16, 3) + + // Initial assignment with 3 replicas + sm.AssignShard(0, []string{"node-1", "node-2", "node-3"}) + + // Simulate node failure by reassigning without the failed node + sm.AssignShard(0, []string{"node-1", "node-3"}) + + nodes := sm.GetShardNodes(0) + if len(nodes) != 2 { + t.Errorf("expected 2 nodes after failure, got %d", len(nodes)) + } + + // Verify primary is still correct + primary := sm.GetPrimaryNode(0) + if primary != "node-1" { + t.Errorf("expected node-1 as primary, got %q", primary) + } + + // Verify replica count + replicas := sm.GetReplicaNodes(0) + if len(replicas) != 1 || replicas[0] != "node-3" { + t.Errorf("expected [node-3] as replicas, got %v", replicas) + } +} + +func TestShardAssignment_AllNodesFailExceptOne(t *testing.T) { + sm := NewShardManager(16, 3) + + sm.AssignShard(0, []string{"node-1", "node-2", "node-3"}) + + // Simulate all but one node failing + sm.AssignShard(0, []string{"node-3"}) + + nodes := sm.GetShardNodes(0) + if len(nodes) != 1 || nodes[0] != "node-3" { + t.Errorf("expected [node-3], got %v", nodes) + } + + primary := sm.GetPrimaryNode(0) + if primary != "node-3" { + t.Errorf("expected node-3 as primary, got %q", primary) + } + + replicas := sm.GetReplicaNodes(0) + if len(replicas) != 0 { + t.Errorf("expected no replicas, got %v", replicas) + } +} + +// Test replication factor is respected +func TestReplicationFactor_Respected(t *testing.T) { + sm := NewShardManager(16, 3) + + if sm.GetReplicationFactor() != 3 { + t.Errorf("expected replication factor 3, got %d", sm.GetReplicationFactor()) + } + + // Assign with exactly the replication factor + sm.AssignShard(0, []string{"node-1", "node-2", "node-3"}) + + nodes := sm.GetShardNodes(0) + if len(nodes) != 3 { + t.Errorf("expected 3 nodes matching replication factor, got %d", len(nodes)) + } +} + +func TestReplicationFactor_CanExceed(t *testing.T) { + // Note: ShardManager doesn't enforce max replication, it just tracks what's assigned + sm := NewShardManager(16, 2) + + // Assign more nodes than replication factor + sm.AssignShard(0, []string{"node-1", "node-2", "node-3", "node-4"}) + + nodes := sm.GetShardNodes(0) + if len(nodes) != 4 { + t.Errorf("expected 4 nodes, got %d", len(nodes)) + } +} + +func TestReplicationFactor_LessThanFactor(t *testing.T) { + sm := NewShardManager(16, 3) + + // Assign fewer nodes than replication factor (possible during degraded state) + sm.AssignShard(0, []string{"node-1"}) + + nodes := sm.GetShardNodes(0) + if len(nodes) != 1 { + t.Errorf("expected 1 node, got %d", len(nodes)) + } + + // System should track that we're under-replicated + // (in practice, cluster manager would handle this) +} + +// Mock VM registry for testing GetActorsInShard +type mockVMRegistry struct { + activeVMs map[string]VirtualMachine +} + +func (m *mockVMRegistry) GetActiveVMs() map[string]VirtualMachine { + return m.activeVMs +} + +func (m *mockVMRegistry) GetShard(actorID string) int { + // This would use the same logic as ShardManager + return 0 +} + +type mockVM struct { + id string + actorID string + state VMState +} + +func (m *mockVM) GetID() string { return m.id } +func (m *mockVM) GetActorID() string { return m.actorID } +func (m *mockVM) GetState() VMState { return m.state } + +func TestGetActorsInShard_NilRegistry(t *testing.T) { + sm := NewShardManager(16, 1) + + actors := sm.GetActorsInShard(0, "node-1", nil) + + if len(actors) != 0 { + t.Errorf("expected empty slice for nil registry, got %v", actors) + } +} + +func TestGetActorsInShard_WithActors(t *testing.T) { + sm := NewShardManager(16, 1) + + // Create mock VMs - need to find actors that map to the same shard + // First, find some actor IDs that map to shard 0 + var actorsInShard0 []string + for i := 0; i < 100; i++ { + actorID := fmt.Sprintf("actor-%d", i) + if sm.GetShard(actorID) == 0 { + actorsInShard0 = append(actorsInShard0, actorID) + if len(actorsInShard0) >= 3 { + break + } + } + } + + activeVMs := make(map[string]VirtualMachine) + for _, actorID := range actorsInShard0 { + activeVMs[actorID] = &mockVM{ + id: "vm-" + actorID, + actorID: actorID, + state: VMStateRunning, + } + } + + registry := &mockVMRegistry{activeVMs: activeVMs} + + actors := sm.GetActorsInShard(0, "node-1", registry) + + if len(actors) != len(actorsInShard0) { + t.Errorf("expected %d actors in shard 0, got %d", len(actorsInShard0), len(actors)) + } +} + +func TestGetActorsInShard_EmptyRegistry(t *testing.T) { + sm := NewShardManager(16, 1) + + registry := &mockVMRegistry{activeVMs: make(map[string]VirtualMachine)} + + actors := sm.GetActorsInShard(0, "node-1", registry) + + if len(actors) != 0 { + t.Errorf("expected empty slice for empty registry, got %v", actors) + } +} + +// Tests for ConsistentHashPlacement +func TestConsistentHashPlacement_PlaceActor_NoNodes(t *testing.T) { + placement := &ConsistentHashPlacement{} + shardMap := &ShardMap{} + + _, err := placement.PlaceActor("actor-1", shardMap, map[string]*NodeInfo{}) + + if err == nil { + t.Error("expected error when no nodes available") + } +} + +func TestConsistentHashPlacement_PlaceActor_SingleNode(t *testing.T) { + placement := &ConsistentHashPlacement{} + shardMap := &ShardMap{} + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1"}, + } + + nodeID, err := placement.PlaceActor("actor-1", shardMap, nodes) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if nodeID != "node-1" { + t.Errorf("expected node-1, got %q", nodeID) + } +} + +func TestConsistentHashPlacement_PlaceActor_ReturnsValidNode(t *testing.T) { + placement := &ConsistentHashPlacement{} + shardMap := &ShardMap{} + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1"}, + "node-2": {ID: "node-2"}, + "node-3": {ID: "node-3"}, + } + + // PlaceActor should always return one of the available nodes + for i := 0; i < 100; i++ { + nodeID, err := placement.PlaceActor(fmt.Sprintf("actor-%d", i), shardMap, nodes) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if _, exists := nodes[nodeID]; !exists { + t.Errorf("PlaceActor returned invalid node: %q", nodeID) + } + } +} +func TestConsistentHashPlacement_RebalanceShards(t *testing.T) { + placement := &ConsistentHashPlacement{} + currentMap := &ShardMap{ + Version: 1, + Shards: map[int][]string{0: {"node-1"}}, + } + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1"}, + "node-2": {ID: "node-2"}, + } + + result, err := placement.RebalanceShards(currentMap, nodes) + + if err != nil { + t.Errorf("unexpected error: %v", err) + } + // Current implementation returns unchanged map + if result != currentMap { + t.Error("expected same map returned (simplified implementation)") + } +} + +// Benchmark tests +func BenchmarkGetShard(b *testing.B) { + sm := NewShardManager(1024, 1) + + actorIDs := make([]string, 1000) + for i := range actorIDs { + actorIDs[i] = fmt.Sprintf("actor-%d", i) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + sm.GetShard(actorIDs[i%len(actorIDs)]) + } +} + +func BenchmarkAssignShard(b *testing.B) { + sm := NewShardManager(1024, 1) + nodes := []string{"node-1", "node-2", "node-3"} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + sm.AssignShard(i%1024, nodes) + } +} + +func BenchmarkPlaceActor(b *testing.B) { + sm := NewShardManager(1024, 1) + nodes := map[string]*NodeInfo{ + "node-1": {ID: "node-1"}, + "node-2": {ID: "node-2"}, + "node-3": {ID: "node-3"}, + } + + actorIDs := make([]string, 1000) + for i := range actorIDs { + actorIDs[i] = fmt.Sprintf("actor-%d", i) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + sm.PlaceActor(actorIDs[i%len(actorIDs)], nodes) + } +}