From cce7e30aebba1a9bc16856d830a19a5f8570d667 Mon Sep 17 00:00:00 2001 From: "Matthew R. Kasun" Date: Mon, 13 Jun 2022 15:19:25 -0400 Subject: [PATCH] only check for zombies on same network --- logic/nodes.go | 3 +- logic/zombie.go | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 2 ++ 3 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 logic/zombie.go diff --git a/logic/nodes.go b/logic/nodes.go index 1377951a..218a7f23 100644 --- a/logic/nodes.go +++ b/logic/nodes.go @@ -213,7 +213,7 @@ func DeleteNodeByID(node *models.Node, exterminate bool) error { // ignoring for now, could hit a nil pointer if delete called twice logger.Log(2, "attempted to remove node ACL for node", node.Name, node.ID) } - + removeZombie <- node.ID return removeLocalServer(node) } @@ -313,6 +313,7 @@ func CreateNode(node *models.Node) error { if err != nil { return err } + CheckZombies(node) nodebytes, err := json.Marshal(&node) if err != nil { diff --git a/logic/zombie.go b/logic/zombie.go new file mode 100644 index 00000000..badd7d1d --- /dev/null +++ b/logic/zombie.go @@ -0,0 +1,96 @@ +package logic + +import ( + "context" + "time" + + "github.com/gravitl/netmaker/logger" + "github.com/gravitl/netmaker/models" +) + +const ( + ZOMBIE_TIMEOUT = 60 // timeout in seconds + ZOMBIE_DELETE_TIME = 10 // minutes +) + +var ( + zombies []string + removeZombie chan string = make(chan (string)) + newZombie chan string = make(chan (string)) +) + +// CheckZombies - checks if new node has same macaddress as existing node +// if so, existing node is added to zombie node quarantine list +func CheckZombies(newnode *models.Node) { + nodes, err := GetNetworkNodes(newnode.Network) + if err != nil { + logger.Log(1, "Failed to retrieve network nodes", newnode.Network, err.Error()) + return + } + for _, node := range nodes { + if node.MacAddress == newnode.MacAddress { + newZombie <- node.ID + } + } +} + +// ManageZombies - goroutine which adds/removes/deletes nodes from the zombie node quarantine list +func ManageZombies(ctx context.Context) { + for { + select { + case <-ctx.Done(): + return + case id := <-newZombie: + logger.Log(1, "adding", id, "to zombie quaratine list") + zombies = append(zombies, id) + case id := <-removeZombie: + for i, zombie := range zombies { + if zombie == id { + logger.Log(1, "removing zombie from quaratine list", zombie) + zombies = append(zombies[:i], zombies[i+1:]...) + } + } + logger.Log(3, "no zombies found") + case <-time.After(time.Second * ZOMBIE_TIMEOUT): + for i, zombie := range zombies { + node, err := GetNodeByID(zombie) + if err != nil { + logger.Log(1, "error retrieving zombie node", zombie, err.Error()) + continue + } + if time.Since(time.Unix(node.LastCheckIn, 0)) > time.Minute*ZOMBIE_DELETE_TIME { + if err := DeleteNodeByID(&node, true); err != nil { + logger.Log(1, "error deleting zombie node", zombie, err.Error()) + } + logger.Log(1, "deleting zombie node", node.Name) + zombies = append(zombies[:i], zombies[i+1:]...) + } + } + } + } +} + +//InitializeZombies - populates the zombie quarantine list (should be called from initialization) +func InitalizeZombies() { + nodes, err := GetAllNodes() + if err != nil { + logger.Log(1, "failed to retrieve nodes", err.Error()) + return + } + for _, node := range nodes { + othernodes, err := GetNetworkNodes(node.Network) + if err != nil { + logger.Log(1, "failled to retrieve nodes for network", node.Network, err.Error()) + continue + } + for _, othernode := range othernodes { + if node.MacAddress == othernode.MacAddress { + if node.LastCheckIn > othernode.LastCheckIn { + zombies = append(zombies, othernode.ID) + } else { + zombies = append(zombies, node.ID) + } + } + } + } +} diff --git a/main.go b/main.go index 6eb7dc93..bb4a500b 100644 --- a/main.go +++ b/main.go @@ -127,6 +127,7 @@ func initialize() { // Client Mode Prereq Check logger.Log(0, "error occurred when notifying nodes of startup", err.Error()) } } + logic.InitalizeZombies() } func startControllers() { @@ -169,6 +170,7 @@ func runMessageQueue(wg *sync.WaitGroup) { var client = mq.SetupMQTT(false) // Set up the subscription listener ctx, cancel := context.WithCancel(context.Background()) go mq.Keepalive(ctx) + go logic.ManageZombies(ctx) quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGTERM, os.Interrupt) <-quit