From efb06f37f7e5452984e625740de11de52611ade4 Mon Sep 17 00:00:00 2001 From: "Matthew R. Kasun" Date: Mon, 13 Jun 2022 15:19:25 -0400 Subject: [PATCH 1/4] only check for zombies on same network --- logic/nodes.go | 3 +- logic/zombie.go | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 2 ++ 3 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 logic/zombie.go diff --git a/logic/nodes.go b/logic/nodes.go index 23838842..dc2659ce 100644 --- a/logic/nodes.go +++ b/logic/nodes.go @@ -213,7 +213,7 @@ func DeleteNodeByID(node *models.Node, exterminate bool) error { // ignoring for now, could hit a nil pointer if delete called twice logger.Log(2, "attempted to remove node ACL for node", node.Name, node.ID) } - + removeZombie <- node.ID return removeLocalServer(node) } @@ -313,6 +313,7 @@ func CreateNode(node *models.Node) error { if err != nil { return err } + CheckZombies(node) nodebytes, err := json.Marshal(&node) if err != nil { diff --git a/logic/zombie.go b/logic/zombie.go new file mode 100644 index 00000000..badd7d1d --- /dev/null +++ b/logic/zombie.go @@ -0,0 +1,96 @@ +package logic + +import ( + "context" + "time" + + "github.com/gravitl/netmaker/logger" + "github.com/gravitl/netmaker/models" +) + +const ( + ZOMBIE_TIMEOUT = 60 // timeout in seconds + ZOMBIE_DELETE_TIME = 10 // minutes +) + +var ( + zombies []string + removeZombie chan string = make(chan (string)) + newZombie chan string = make(chan (string)) +) + +// CheckZombies - checks if new node has same macaddress as existing node +// if so, existing node is added to zombie node quarantine list +func CheckZombies(newnode *models.Node) { + nodes, err := GetNetworkNodes(newnode.Network) + if err != nil { + logger.Log(1, "Failed to retrieve network nodes", newnode.Network, err.Error()) + return + } + for _, node := range nodes { + if node.MacAddress == newnode.MacAddress { + newZombie <- node.ID + } + } +} + +// ManageZombies - goroutine which adds/removes/deletes nodes from the zombie node quarantine list +func ManageZombies(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case id := <-newZombie: + logger.Log(1, "adding", id, "to zombie quaratine list") + zombies = append(zombies, id) + case id := <-removeZombie: + for i, zombie := range zombies { + if zombie == id { + logger.Log(1, "removing zombie from quaratine list", zombie) + zombies = append(zombies[:i], zombies[i+1:]...) + } + } + logger.Log(3, "no zombies found") + case <-time.After(time.Second * ZOMBIE_TIMEOUT): + for i, zombie := range zombies { + node, err := GetNodeByID(zombie) + if err != nil { + logger.Log(1, "error retrieving zombie node", zombie, err.Error()) + continue + } + if time.Since(time.Unix(node.LastCheckIn, 0)) > time.Minute*ZOMBIE_DELETE_TIME { + if err := DeleteNodeByID(&node, true); err != nil { + logger.Log(1, "error deleting zombie node", zombie, err.Error()) + } + logger.Log(1, "deleting zombie node", node.Name) + zombies = append(zombies[:i], zombies[i+1:]...) + } + } + } + } +} + +//InitializeZombies - populates the zombie quarantine list (should be called from initialization) +func InitalizeZombies() { + nodes, err := GetAllNodes() + if err != nil { + logger.Log(1, "failed to retrieve nodes", err.Error()) + return + } + for _, node := range nodes { + othernodes, err := GetNetworkNodes(node.Network) + if err != nil { + logger.Log(1, "failled to retrieve nodes for network", node.Network, err.Error()) + continue + } + for _, othernode := range othernodes { + if node.MacAddress == othernode.MacAddress { + if node.LastCheckIn > othernode.LastCheckIn { + zombies = append(zombies, othernode.ID) + } else { + zombies = append(zombies, node.ID) + } + } + } + } +} diff --git a/main.go b/main.go index 6eb7dc93..bb4a500b 100644 --- a/main.go +++ b/main.go @@ -127,6 +127,7 @@ func initialize() { // Client Mode Prereq Check logger.Log(0, "error occurred when notifying nodes of startup", err.Error()) } } + logic.InitalizeZombies() } func startControllers() { @@ -169,6 +170,7 @@ func runMessageQueue(wg *sync.WaitGroup) { var client = mq.SetupMQTT(false) // Set up the subscription listener ctx, cancel := context.WithCancel(context.Background()) go mq.Keepalive(ctx) + go logic.ManageZombies(ctx) quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGTERM, os.Interrupt) <-quit From d7a470b030cdc2217a0cf1eb60247fd8fc23a672 Mon Sep 17 00:00:00 2001 From: "Matthew R. Kasun" Date: Mon, 13 Jun 2022 15:27:49 -0400 Subject: [PATCH 2/4] skip yourself when checking zombies --- logic/zombie.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/logic/zombie.go b/logic/zombie.go index badd7d1d..0deeaaf3 100644 --- a/logic/zombie.go +++ b/logic/zombie.go @@ -84,6 +84,9 @@ func InitalizeZombies() { continue } for _, othernode := range othernodes { + if node.ID == othernode.ID { + continue + } if node.MacAddress == othernode.MacAddress { if node.LastCheckIn > othernode.LastCheckIn { zombies = append(zombies, othernode.ID) From 43c1cea2b00c59256be62d0e39df9bfd59517be7 Mon Sep 17 00:00:00 2001 From: "Matthew R. Kasun" Date: Tue, 14 Jun 2022 08:27:30 -0400 Subject: [PATCH 3/4] code review changes --- logic/zombie.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/logic/zombie.go b/logic/zombie.go index 0deeaaf3..d868ce92 100644 --- a/logic/zombie.go +++ b/logic/zombie.go @@ -9,8 +9,10 @@ import ( ) const ( - ZOMBIE_TIMEOUT = 60 // timeout in seconds - ZOMBIE_DELETE_TIME = 10 // minutes + // ZOMBIE_TIMEOUT - timeout in seconds for checking zombie status + ZOMBIE_TIMEOUT = 60 + // ZOMBIE_DELETE_TIME - timeout in minutes for zombie node deletion + ZOMBIE_DELETE_TIME = 10 ) var ( @@ -44,13 +46,17 @@ func ManageZombies(ctx context.Context) { logger.Log(1, "adding", id, "to zombie quaratine list") zombies = append(zombies, id) case id := <-removeZombie: + found := false for i, zombie := range zombies { if zombie == id { logger.Log(1, "removing zombie from quaratine list", zombie) zombies = append(zombies[:i], zombies[i+1:]...) + found = true } } - logger.Log(3, "no zombies found") + if !found { + logger.Log(3, "no zombies found") + } case <-time.After(time.Second * ZOMBIE_TIMEOUT): for i, zombie := range zombies { node, err := GetNodeByID(zombie) @@ -61,6 +67,7 @@ func ManageZombies(ctx context.Context) { if time.Since(time.Unix(node.LastCheckIn, 0)) > time.Minute*ZOMBIE_DELETE_TIME { if err := DeleteNodeByID(&node, true); err != nil { logger.Log(1, "error deleting zombie node", zombie, err.Error()) + continue } logger.Log(1, "deleting zombie node", node.Name) zombies = append(zombies[:i], zombies[i+1:]...) From b354ff9d8ab2ebacc4ed5a936061655e5df58f75 Mon Sep 17 00:00:00 2001 From: "Matthew R. Kasun" Date: Tue, 14 Jun 2022 08:27:30 -0400 Subject: [PATCH 4/4] code review changes --- logic/zombie.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logic/zombie.go b/logic/zombie.go index d868ce92..bcfdebc2 100644 --- a/logic/zombie.go +++ b/logic/zombie.go @@ -77,7 +77,7 @@ func ManageZombies(ctx context.Context) { } } -//InitializeZombies - populates the zombie quarantine list (should be called from initialization) +// InitializeZombies - populates the zombie quarantine list (should be called from initialization) func InitalizeZombies() { nodes, err := GetAllNodes() if err != nil {