2022-02-17 04:40:51 +08:00
package mq
import (
"encoding/json"
2022-09-14 03:25:56 +08:00
"errors"
2022-02-17 04:40:51 +08:00
"fmt"
2022-09-14 03:25:56 +08:00
"time"
2022-02-17 04:40:51 +08:00
"github.com/gravitl/netmaker/logger"
"github.com/gravitl/netmaker/logic"
2022-09-15 01:26:31 +08:00
"github.com/gravitl/netmaker/logic/metrics"
2022-02-17 04:40:51 +08:00
"github.com/gravitl/netmaker/models"
"github.com/gravitl/netmaker/servercfg"
2022-02-23 05:14:23 +08:00
"github.com/gravitl/netmaker/serverctl"
2022-02-17 04:40:51 +08:00
)
// PublishPeerUpdate --- deterines and publishes a peer update to all the peers of a node
2022-07-07 17:52:43 +08:00
func PublishPeerUpdate ( newNode * models . Node , publishToSelf bool ) error {
2022-02-17 04:40:51 +08:00
if ! servercfg . IsMessageQueueBackend ( ) {
return nil
}
networkNodes , err := logic . GetNetworkNodes ( newNode . Network )
if err != nil {
logger . Log ( 1 , "err getting Network Nodes" , err . Error ( ) )
return err
}
for _ , node := range networkNodes {
2022-07-07 17:52:43 +08:00
if node . IsServer == "yes" {
continue
}
if ! publishToSelf && newNode . ID == node . ID {
//skip self
2022-02-17 04:40:51 +08:00
continue
}
peerUpdate , err := logic . GetPeerUpdate ( & node )
if err != nil {
logger . Log ( 1 , "error getting peer update for node" , node . ID , err . Error ( ) )
continue
}
data , err := json . Marshal ( & peerUpdate )
if err != nil {
logger . Log ( 2 , "error marshaling peer update for node" , node . ID , err . Error ( ) )
continue
}
if err = publish ( & node , fmt . Sprintf ( "peers/%s/%s" , node . Network , node . ID ) , data ) ; err != nil {
logger . Log ( 1 , "failed to publish peer update for node" , node . ID )
} else {
2022-04-22 03:53:44 +08:00
logger . Log ( 1 , "sent peer update for node" , node . Name , "on network:" , node . Network )
2022-02-17 04:40:51 +08:00
}
}
return nil
}
// PublishPeerUpdate --- publishes a peer update to all the peers of a node
func PublishExtPeerUpdate ( node * models . Node ) error {
var err error
if logic . IsLocalServer ( node ) {
if err = logic . ServerUpdate ( node , false ) ; err != nil {
logger . Log ( 1 , "server node:" , node . ID , "failed to update peers with ext clients" )
return err
} else {
return nil
}
}
if ! servercfg . IsMessageQueueBackend ( ) {
return nil
}
peerUpdate , err := logic . GetPeerUpdate ( node )
if err != nil {
return err
}
data , err := json . Marshal ( & peerUpdate )
if err != nil {
return err
}
2022-03-28 21:00:02 +08:00
if err = publish ( node , fmt . Sprintf ( "peers/%s/%s" , node . Network , node . ID ) , data ) ; err != nil {
return err
}
2022-07-07 17:52:43 +08:00
go PublishPeerUpdate ( node , false )
2022-03-28 21:00:02 +08:00
return nil
2022-02-17 04:40:51 +08:00
}
// NodeUpdate -- publishes a node update
func NodeUpdate ( node * models . Node ) error {
2022-02-17 09:42:57 +08:00
if ! servercfg . IsMessageQueueBackend ( ) || node . IsServer == "yes" {
2022-02-17 04:40:51 +08:00
return nil
}
logger . Log ( 3 , "publishing node update to " + node . Name )
2022-09-02 16:52:28 +08:00
if len ( node . NetworkSettings . AccessKeys ) > 0 {
2022-09-02 22:29:29 +08:00
node . NetworkSettings . AccessKeys = [ ] models . AccessKey { } // not to be sent (don't need to spread access keys around the network; we need to know how to reach other nodes, not become them)
2022-09-02 16:52:28 +08:00
}
2022-02-17 04:40:51 +08:00
data , err := json . Marshal ( node )
if err != nil {
logger . Log ( 2 , "error marshalling node update " , err . Error ( ) )
return err
}
if err = publish ( node , fmt . Sprintf ( "update/%s/%s" , node . Network , node . ID ) , data ) ; err != nil {
logger . Log ( 2 , "error publishing node update to peer " , node . ID , err . Error ( ) )
return err
}
return nil
}
// sendPeers - retrieve networks, send peer ports to all peers
func sendPeers ( ) {
2022-02-23 03:53:40 +08:00
2022-09-14 03:25:56 +08:00
networks , err := logic . GetNetworks ( )
if err != nil {
logger . Log ( 1 , "error retrieving networks for keepalive" , err . Error ( ) )
}
2022-02-17 04:40:51 +08:00
var force bool
peer_force_send ++
if peer_force_send == 5 {
2022-03-21 23:52:00 +08:00
// run iptables update to ensure gateways work correctly and mq is forwarded if containerized
if servercfg . ManageIPTables ( ) != "off" {
2022-03-28 22:36:23 +08:00
serverctl . InitIPTables ( false )
2022-03-21 23:52:00 +08:00
}
2022-05-27 22:52:47 +08:00
servercfg . SetHost ( )
2022-03-21 23:52:00 +08:00
2022-02-17 04:40:51 +08:00
force = true
peer_force_send = 0
2022-02-23 03:53:40 +08:00
err := logic . TimerCheckpoint ( ) // run telemetry & log dumps if 24 hours has passed..
if err != nil {
logger . Log ( 3 , "error occurred on timer," , err . Error ( ) )
}
2022-09-14 03:25:56 +08:00
collectServerMetrics ( networks [ : ] )
2022-02-17 04:40:51 +08:00
}
2022-02-23 03:53:40 +08:00
2022-02-17 04:40:51 +08:00
for _ , network := range networks {
2022-07-15 20:55:01 +08:00
serverNode , errN := logic . GetNetworkServerLocal ( network . NetID )
2022-07-06 04:51:11 +08:00
if errN == nil {
2022-02-17 04:40:51 +08:00
serverNode . SetLastCheckIn ( )
2022-07-06 08:18:22 +08:00
if err := logic . UpdateNode ( & serverNode , & serverNode ) ; err != nil {
logger . Log ( 0 , "failed checkin for server node" , serverNode . Name , "on network" , network . NetID , err . Error ( ) )
}
2022-07-06 04:51:11 +08:00
}
2022-07-15 20:55:01 +08:00
isLeader := logic . IsLeader ( & serverNode )
2022-07-06 08:16:12 +08:00
if errN == nil && isLeader {
2022-02-17 04:40:51 +08:00
if network . DefaultUDPHolePunch == "yes" {
if logic . ShouldPublishPeerPorts ( & serverNode ) || force {
if force {
logger . Log ( 2 , "sending scheduled peer update (5 min)" )
}
2022-07-07 17:52:43 +08:00
err = PublishPeerUpdate ( & serverNode , false )
2022-02-17 04:40:51 +08:00
if err != nil {
logger . Log ( 1 , "error publishing udp port updates for network" , network . NetID )
logger . Log ( 1 , errN . Error ( ) )
}
}
}
} else {
2022-07-06 08:16:12 +08:00
if isLeader {
logger . Log ( 1 , "unable to retrieve leader for network " , network . NetID )
}
logger . Log ( 2 , "server checkin complete for server" , serverNode . Name , "on network" , network . NetID )
2022-02-23 05:14:23 +08:00
serverctl . SyncServerNetwork ( network . NetID )
2022-07-01 05:00:12 +08:00
if errN != nil {
logger . Log ( 1 , errN . Error ( ) )
}
2022-02-17 04:40:51 +08:00
}
}
}
2022-04-25 23:12:49 +08:00
// ServerStartNotify - notifies all non server nodes to pull changes after a restart
func ServerStartNotify ( ) error {
nodes , err := logic . GetAllNodes ( )
if err != nil {
return err
}
for i := range nodes {
nodes [ i ] . Action = models . NODE_FORCE_UPDATE
if err = NodeUpdate ( & nodes [ i ] ) ; err != nil {
logger . Log ( 1 , "error when notifying node" , nodes [ i ] . Name , " - " , nodes [ i ] . ID , "of a server startup" )
}
}
return nil
}
2022-09-14 03:25:56 +08:00
// function to collect and store metrics for server nodes
func collectServerMetrics ( networks [ ] models . Network ) {
2022-09-15 01:26:31 +08:00
if ! logic . Is_EE {
2022-09-14 04:35:14 +08:00
return
}
2022-09-14 03:25:56 +08:00
if len ( networks ) > 0 {
for i := range networks {
currentNetworkNodes , err := logic . GetNetworkNodes ( networks [ i ] . NetID )
if err != nil {
continue
}
currentServerNodes := logic . GetServerNodes ( networks [ i ] . NetID )
if len ( currentServerNodes ) > 0 {
for i := range currentServerNodes {
if logic . IsLocalServer ( & currentServerNodes [ i ] ) {
serverMetrics := logic . CollectServerMetrics ( currentServerNodes [ i ] . ID , currentNetworkNodes )
if serverMetrics != nil {
serverMetrics . NodeName = currentServerNodes [ i ] . Name
serverMetrics . NodeID = currentServerNodes [ i ] . ID
serverMetrics . IsServer = "yes"
serverMetrics . Network = currentServerNodes [ i ] . Network
if err = metrics . GetExchangedBytesForNode ( & currentServerNodes [ i ] , serverMetrics ) ; err != nil {
logger . Log ( 1 , fmt . Sprintf ( "failed to update exchanged bytes info for server: %s, err: %v" ,
currentServerNodes [ i ] . Name , err ) )
}
updateNodeMetrics ( & currentServerNodes [ i ] , serverMetrics )
if err = logic . UpdateMetrics ( currentServerNodes [ i ] . ID , serverMetrics ) ; err != nil {
logger . Log ( 1 , "failed to update metrics for server node" , currentServerNodes [ i ] . ID )
}
if servercfg . IsMetricsExporter ( ) {
logger . Log ( 2 , "-------------> SERVER METRICS: " , fmt . Sprintf ( "%+v" , serverMetrics ) )
if err := pushMetricsToExporter ( * serverMetrics ) ; err != nil {
logger . Log ( 2 , "failed to push server metrics to exporter: " , err . Error ( ) )
}
}
}
}
}
}
}
}
}
func pushMetricsToExporter ( metrics models . Metrics ) error {
logger . Log ( 2 , "----> Pushing metrics to exporter" )
data , err := json . Marshal ( metrics )
if err != nil {
return errors . New ( "failed to marshal metrics: " + err . Error ( ) )
}
if token := mqclient . Publish ( "metrics_exporter" , 0 , true , data ) ; ! token . WaitTimeout ( MQ_TIMEOUT * time . Second ) || token . Error ( ) != nil {
var err error
if token . Error ( ) == nil {
err = errors . New ( "connection timeout" )
} else {
err = token . Error ( )
}
return err
}
return nil
}