mirror of
https://github.com/gravitl/netmaker.git
synced 2025-09-05 20:54:18 +08:00
NET-1782: Fetch Node Connection Status from metrics (#3237)
* add live status of node * handle static node status * add public IP field to server configuration * get public Ip from config * improve node status logic * improvise status check * use only checkin status on old nodes --------- Co-authored-by: the_aceix <aceixsmartx@gmail.com>
This commit is contained in:
parent
496d541822
commit
31c2311bef
12 changed files with 293 additions and 34 deletions
|
@ -104,6 +104,7 @@ type ServerConfig struct {
|
|||
Stun bool `yaml:"stun"`
|
||||
StunServers string `yaml:"stun_servers"`
|
||||
DefaultDomain string `yaml:"default_domain"`
|
||||
PublicIp string `yaml:"public_ip"`
|
||||
}
|
||||
|
||||
// SQLConfig - Generic SQL Config
|
||||
|
|
|
@ -326,8 +326,9 @@ func getNetworkNodes(w http.ResponseWriter, r *http.Request) {
|
|||
if len(filteredNodes) > 0 {
|
||||
nodes = filteredNodes
|
||||
}
|
||||
nodes = logic.AddStaticNodestoList(nodes)
|
||||
|
||||
nodes = logic.AddStaticNodestoList(nodes)
|
||||
nodes = logic.AddStatusToNodes(nodes)
|
||||
// returns all the nodes in JSON/API format
|
||||
apiNodes := logic.GetAllNodesAPI(nodes[:])
|
||||
logger.Log(2, r.Header.Get("user"), "fetched nodes on network", networkName)
|
||||
|
@ -367,6 +368,7 @@ func getAllNodes(w http.ResponseWriter, r *http.Request) {
|
|||
|
||||
}
|
||||
nodes = logic.AddStaticNodestoList(nodes)
|
||||
nodes = logic.AddStatusToNodes(nodes)
|
||||
// return all the nodes in JSON/API format
|
||||
apiNodes := logic.GetAllNodesAPI(nodes[:])
|
||||
logger.Log(3, r.Header.Get("user"), "fetched all nodes they have access to")
|
||||
|
|
|
@ -445,6 +445,20 @@ func AddStaticNodestoList(nodes []models.Node) []models.Node {
|
|||
return nodes
|
||||
}
|
||||
|
||||
func AddStatusToNodes(nodes []models.Node) (nodesWithStatus []models.Node) {
|
||||
aclDefaultPolicyStatusMap := make(map[string]bool)
|
||||
for _, node := range nodes {
|
||||
if _, ok := aclDefaultPolicyStatusMap[node.Network]; !ok {
|
||||
// check default policy if all allowed return true
|
||||
defaultPolicy, _ := GetDefaultPolicy(models.NetworkID(node.Network), models.DevicePolicy)
|
||||
aclDefaultPolicyStatusMap[node.Network] = defaultPolicy.Enabled
|
||||
}
|
||||
GetNodeStatus(&node, aclDefaultPolicyStatusMap[node.Network])
|
||||
nodesWithStatus = append(nodesWithStatus, node)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GetNetworkByNode - gets the network model from a node
|
||||
func GetNetworkByNode(node *models.Node) (models.Network, error) {
|
||||
|
||||
|
|
26
logic/status.go
Normal file
26
logic/status.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
package logic
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gravitl/netmaker/models"
|
||||
)
|
||||
|
||||
var GetNodeStatus = getNodeStatus
|
||||
|
||||
func getNodeStatus(node *models.Node, t bool) {
|
||||
// On CE check only last check-in time
|
||||
if node.IsStatic {
|
||||
if !node.StaticNode.Enabled {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
}
|
||||
if time.Since(node.LastCheckIn) > time.Minute*10 {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
node.Status = models.OnlineSt
|
||||
}
|
|
@ -6,11 +6,14 @@ import (
|
|||
"encoding/base32"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/c-robinson/iplib"
|
||||
"github.com/gravitl/netmaker/database"
|
||||
"github.com/gravitl/netmaker/logger"
|
||||
|
@ -148,4 +151,28 @@ func IsSlicesEqual(a, b []string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// == private ==
|
||||
// VersionLessThan checks if v1 < v2 semantically
|
||||
// dev is the latest version
|
||||
func VersionLessThan(v1, v2 string) (bool, error) {
|
||||
if v1 == "dev" {
|
||||
return false, nil
|
||||
}
|
||||
if v2 == "dev" {
|
||||
return true, nil
|
||||
}
|
||||
semVer1 := strings.TrimFunc(v1, func(r rune) bool {
|
||||
return !unicode.IsNumber(r)
|
||||
})
|
||||
semVer2 := strings.TrimFunc(v2, func(r rune) bool {
|
||||
return !unicode.IsNumber(r)
|
||||
})
|
||||
sv1, err := semver.Parse(semVer1)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse semver1 (%s): %w", semVer1, err)
|
||||
}
|
||||
sv2, err := semver.Parse(semVer2)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse semver2 (%s): %w", semVer2, err)
|
||||
}
|
||||
return sv1.LT(sv2), nil
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ type ApiNode struct {
|
|||
IsStatic bool `json:"is_static"`
|
||||
IsUserNode bool `json:"is_user_node"`
|
||||
StaticNode ExtClient `json:"static_node"`
|
||||
Status NodeStatus `json:"status"`
|
||||
}
|
||||
|
||||
// ApiNode.ConvertToServerNode - converts an api node to a server node
|
||||
|
@ -192,6 +193,7 @@ func (nm *Node) ConvertToAPINode() *ApiNode {
|
|||
apiNode.IsStatic = nm.IsStatic
|
||||
apiNode.IsUserNode = nm.IsUserNode
|
||||
apiNode.StaticNode = nm.StaticNode
|
||||
apiNode.Status = nm.Status
|
||||
return &apiNode
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,19 @@ import (
|
|||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
)
|
||||
|
||||
type NodeStatus string
|
||||
|
||||
const (
|
||||
OnlineSt NodeStatus = "online"
|
||||
OfflineSt NodeStatus = "offline"
|
||||
WarningSt NodeStatus = "warning"
|
||||
ErrorSt NodeStatus = "error"
|
||||
UnKnown NodeStatus = "unknown"
|
||||
)
|
||||
|
||||
// LastCheckInThreshold - if node's checkin more than this threshold,then node is declared as offline
|
||||
const LastCheckInThreshold = time.Minute * 10
|
||||
|
||||
const (
|
||||
// NODE_SERVER_NAME - the default server name
|
||||
NODE_SERVER_NAME = "netmaker"
|
||||
|
@ -103,6 +116,7 @@ type Node struct {
|
|||
IsStatic bool `json:"is_static"`
|
||||
IsUserNode bool `json:"is_user_node"`
|
||||
StaticNode ExtClient `json:"static_node"`
|
||||
Status NodeStatus `json:"node_status"`
|
||||
}
|
||||
|
||||
// LegacyNode - legacy struct for node model
|
||||
|
|
|
@ -93,9 +93,6 @@ func SendPullSYN() error {
|
|||
return err
|
||||
}
|
||||
encrypted, encryptErr := encryptAESGCM(host.TrafficKeyPublic[0:32], zipped)
|
||||
if encryptErr != nil {
|
||||
return encryptErr
|
||||
}
|
||||
|
||||
if encryptErr != nil {
|
||||
continue
|
||||
|
|
30
mq/util.go
30
mq/util.go
|
@ -12,9 +12,7 @@ import (
|
|||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/gravitl/netmaker/logic"
|
||||
"github.com/gravitl/netmaker/models"
|
||||
"github.com/gravitl/netmaker/netclient/ncutils"
|
||||
|
@ -139,7 +137,7 @@ func publish(host *models.Host, dest string, msg []byte) error {
|
|||
|
||||
var encrypted []byte
|
||||
var encryptErr error
|
||||
vlt, err := versionLessThan(host.Version, "v0.30.0")
|
||||
vlt, err := logic.VersionLessThan(host.Version, "v0.30.0")
|
||||
if err != nil {
|
||||
slog.Warn("error checking version less than", "error", err)
|
||||
return err
|
||||
|
@ -187,29 +185,3 @@ func GetID(topic string) (string, error) {
|
|||
//the last part of the topic will be the node.ID
|
||||
return parts[count-1], nil
|
||||
}
|
||||
|
||||
// versionLessThan checks if v1 < v2 semantically
|
||||
// dev is the latest version
|
||||
func versionLessThan(v1, v2 string) (bool, error) {
|
||||
if v1 == "dev" {
|
||||
return false, nil
|
||||
}
|
||||
if v2 == "dev" {
|
||||
return true, nil
|
||||
}
|
||||
semVer1 := strings.TrimFunc(v1, func(r rune) bool {
|
||||
return !unicode.IsNumber(r)
|
||||
})
|
||||
semVer2 := strings.TrimFunc(v2, func(r rune) bool {
|
||||
return !unicode.IsNumber(r)
|
||||
})
|
||||
sv1, err := semver.Parse(semVer1)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse semver1 (%s): %w", semVer1, err)
|
||||
}
|
||||
sv2, err := semver.Parse(semVer2)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse semver2 (%s): %w", semVer2, err)
|
||||
}
|
||||
return sv1.LT(sv2), nil
|
||||
}
|
||||
|
|
|
@ -140,6 +140,7 @@ func InitPro() {
|
|||
logic.IntialiseGroups = proLogic.UserGroupsInit
|
||||
logic.AddGlobalNetRolesToAdmins = proLogic.AddGlobalNetRolesToAdmins
|
||||
logic.GetUserGroupsInNetwork = proLogic.GetUserGroupsInNetwork
|
||||
logic.GetNodeStatus = proLogic.GetNodeStatus
|
||||
}
|
||||
|
||||
func retrieveProLogo() string {
|
||||
|
|
197
pro/logic/status.go
Normal file
197
pro/logic/status.go
Normal file
|
@ -0,0 +1,197 @@
|
|||
package logic
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gravitl/netmaker/logic"
|
||||
"github.com/gravitl/netmaker/models"
|
||||
)
|
||||
|
||||
func getNodeStatusOld(node *models.Node) {
|
||||
// On CE check only last check-in time
|
||||
if node.IsStatic {
|
||||
if !node.StaticNode.Enabled {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
}
|
||||
if time.Since(node.LastCheckIn) > time.Minute*10 {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
node.Status = models.OnlineSt
|
||||
}
|
||||
|
||||
func GetNodeStatus(node *models.Node, defaultEnabledPolicy bool) {
|
||||
|
||||
if time.Since(node.LastCheckIn) > models.LastCheckInThreshold {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
if node.IsStatic {
|
||||
if !node.StaticNode.Enabled {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
// check extclient connection from metrics
|
||||
ingressMetrics, err := GetMetrics(node.StaticNode.IngressGatewayID)
|
||||
if err != nil || ingressMetrics == nil || ingressMetrics.Connectivity == nil {
|
||||
node.Status = models.UnKnown
|
||||
return
|
||||
}
|
||||
if metric, ok := ingressMetrics.Connectivity[node.StaticNode.ClientID]; ok {
|
||||
if metric.Connected {
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
} else {
|
||||
node.Status = models.OfflineSt
|
||||
return
|
||||
}
|
||||
}
|
||||
node.Status = models.UnKnown
|
||||
return
|
||||
}
|
||||
host, err := logic.GetHost(node.HostID.String())
|
||||
if err != nil {
|
||||
node.Status = models.UnKnown
|
||||
return
|
||||
}
|
||||
vlt, err := logic.VersionLessThan(host.Version, "v0.30.0")
|
||||
if err != nil {
|
||||
node.Status = models.UnKnown
|
||||
return
|
||||
}
|
||||
if vlt {
|
||||
getNodeStatusOld(node)
|
||||
return
|
||||
}
|
||||
metrics, err := logic.GetMetrics(node.ID.String())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if metrics == nil || metrics.Connectivity == nil {
|
||||
if time.Since(node.LastCheckIn) < models.LastCheckInThreshold {
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
}
|
||||
}
|
||||
// if node.IsFailOver {
|
||||
// if time.Since(node.LastCheckIn) < models.LastCheckInThreshold {
|
||||
// node.Status = models.OnlineSt
|
||||
// return
|
||||
// }
|
||||
// }
|
||||
// If all Peers are able to reach me and and the peer is not able to reached by any peer then return online
|
||||
/* 1. FailOver Exists
|
||||
a. check connectivity to failover Node - if no connection return warning
|
||||
b. if getting failedover and still no connection to any of the peers - then show error
|
||||
c. if getting failedOver and has connections to some peers - show warning
|
||||
2. FailOver Doesn't Exist
|
||||
a. check connectivity to pu
|
||||
|
||||
*/
|
||||
|
||||
// failoverNode, exists := FailOverExists(node.Network)
|
||||
// if exists && failoverNode.FailedOverBy != uuid.Nil {
|
||||
// // check connectivity to failover Node
|
||||
// if metric, ok := metrics.Connectivity[failoverNode.ID.String()]; ok {
|
||||
// if time.Since(failoverNode.LastCheckIn) < models.LastCheckInThreshold {
|
||||
// if metric.Connected {
|
||||
// node.Status = models.OnlineSt
|
||||
// return
|
||||
// } else {
|
||||
// checkPeerConnectivity(node, metrics)
|
||||
// return
|
||||
// }
|
||||
// }
|
||||
// } else {
|
||||
// node.Status = models.OnlineSt
|
||||
// return
|
||||
// }
|
||||
|
||||
// }
|
||||
checkPeerConnectivity(node, metrics, defaultEnabledPolicy)
|
||||
|
||||
}
|
||||
|
||||
func checkPeerStatus(node *models.Node, defaultAclPolicy bool) {
|
||||
peerNotConnectedCnt := 0
|
||||
metrics, err := logic.GetMetrics(node.ID.String())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if metrics == nil || metrics.Connectivity == nil {
|
||||
if time.Since(node.LastCheckIn) < models.LastCheckInThreshold {
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
}
|
||||
}
|
||||
for peerID, metric := range metrics.Connectivity {
|
||||
peer, err := logic.GetNodeByID(peerID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !defaultAclPolicy && !logic.IsNodeAllowedToCommunicate(*node, peer, false) {
|
||||
continue
|
||||
}
|
||||
|
||||
if time.Since(peer.LastCheckIn) > models.LastCheckInThreshold {
|
||||
continue
|
||||
}
|
||||
if metric.Connected {
|
||||
continue
|
||||
}
|
||||
if peer.Status == models.ErrorSt {
|
||||
continue
|
||||
}
|
||||
peerNotConnectedCnt++
|
||||
|
||||
}
|
||||
if peerNotConnectedCnt == 0 {
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
}
|
||||
if peerNotConnectedCnt == len(metrics.Connectivity) {
|
||||
node.Status = models.ErrorSt
|
||||
return
|
||||
}
|
||||
node.Status = models.WarningSt
|
||||
}
|
||||
|
||||
func checkPeerConnectivity(node *models.Node, metrics *models.Metrics, defaultAclPolicy bool) {
|
||||
peerNotConnectedCnt := 0
|
||||
for peerID, metric := range metrics.Connectivity {
|
||||
peer, err := logic.GetNodeByID(peerID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if !defaultAclPolicy && !logic.IsNodeAllowedToCommunicate(*node, peer, false) {
|
||||
continue
|
||||
}
|
||||
|
||||
if time.Since(peer.LastCheckIn) > models.LastCheckInThreshold {
|
||||
continue
|
||||
}
|
||||
if metric.Connected {
|
||||
continue
|
||||
}
|
||||
// check if peer is in error state
|
||||
checkPeerStatus(&peer, defaultAclPolicy)
|
||||
if peer.Status == models.ErrorSt {
|
||||
continue
|
||||
}
|
||||
peerNotConnectedCnt++
|
||||
|
||||
}
|
||||
if peerNotConnectedCnt == 0 {
|
||||
node.Status = models.OnlineSt
|
||||
return
|
||||
}
|
||||
if peerNotConnectedCnt == len(metrics.Connectivity) {
|
||||
node.Status = models.ErrorSt
|
||||
return
|
||||
}
|
||||
node.Status = models.WarningSt
|
||||
}
|
|
@ -76,6 +76,7 @@ func GetServerConfig() config.ServerConfig {
|
|||
cfg.Database = GetDB()
|
||||
cfg.Platform = GetPlatform()
|
||||
cfg.Version = GetVersion()
|
||||
cfg.PublicIp = GetServerHostIP()
|
||||
|
||||
// == auth config ==
|
||||
var authInfo = GetAuthProviderInfo()
|
||||
|
@ -180,6 +181,11 @@ func GetVersion() string {
|
|||
return Version
|
||||
}
|
||||
|
||||
// GetServerHostIP - fetches server IP
|
||||
func GetServerHostIP() string {
|
||||
return os.Getenv("SERVER_HOST")
|
||||
}
|
||||
|
||||
// GetDB - gets the database type
|
||||
func GetDB() string {
|
||||
database := "sqlite"
|
||||
|
|
Loading…
Add table
Reference in a new issue