netmaker/pro/controllers/failover.go
Abhishek K 66069fbc34
NET-1082: Scale Testing Fixes (#2894)
* add additional mutex lock on node acls func

* increase verbosity

* disable acls on cloud emqx

* add emqx creds creation to go routine

* add debug log of mq client id

* comment port check

* uncomment port check

* check for connection mq connection open

* use username for client id

* add write mutex on acl is allowed

* add mq connection lost handler on server

* spin off zombie init as go routine

* get whole api path from config

* Revert "get whole api path from config"

This reverts commit 392f5f4c5f.

* update extclient acls async

* add additional mutex lock on node acls func

(cherry picked from commit 5325f0e7d7)

* increase verbosity

(cherry picked from commit 705b3cf0bf)

* add emqx creds creation to go routine

(cherry picked from commit c8e65f4820)

* add debug log of mq client id

(cherry picked from commit 29c5d6ceca)

* comment port check

(cherry picked from commit db8d6d95ea)

* check for connection mq connection open

(cherry picked from commit 13b11033b0)

* use username for client id

(cherry picked from commit e90c7386de)

* add write mutex on acl is allowed

(cherry picked from commit 4cae1b0bb4)

* add mq connection lost handler on server

(cherry picked from commit c82918ad35)

* spin off zombie init as go routine

(cherry picked from commit 6d65c44c43)

* update extclient acls async

(cherry picked from commit 6557ef1ebe)

* additionl logs for oauth user flow

(cherry picked from commit 61703038ae)

* add more debug logs

(cherry picked from commit 5980beacd1)

* add more debug logs

(cherry picked from commit 4d001f0d27)

* add set auth secret

(cherry picked from commit f41cef5da5)

* fix fetch pass

(cherry picked from commit 825caf4b60)

* make sure auth secret is set only once

(cherry picked from commit ba33ed02aa)

* make sure auth secret is set only once

(cherry picked from commit 920ac4c507)

* comment usage of emqx acls

* replace  read lock with write lock on acls

* replace  read lock with write lock on acls

(cherry picked from commit 808d2135c8)

* use deadlock pkg for visibility

* add additional mutex locks

* remove race flag

* on mq re-connecting donot exit if failed

* on mq re-connecting donot exit if failed

* revert mutex package change

* set mq clean session

* remove debug log

* go mod tidy

* revert on prem emqx acls del
2024-04-11 21:18:57 +05:30

187 lines
5.8 KiB
Go

package controllers
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"github.com/google/uuid"
"github.com/gorilla/mux"
controller "github.com/gravitl/netmaker/controllers"
"github.com/gravitl/netmaker/logger"
"github.com/gravitl/netmaker/logic"
"github.com/gravitl/netmaker/models"
"github.com/gravitl/netmaker/mq"
proLogic "github.com/gravitl/netmaker/pro/logic"
"golang.org/x/exp/slog"
)
// FailOverHandlers - handlers for FailOver
func FailOverHandlers(r *mux.Router) {
r.HandleFunc("/api/v1/node/{nodeid}/failover", logic.SecurityCheck(true, http.HandlerFunc(createfailOver))).Methods(http.MethodPost)
r.HandleFunc("/api/v1/node/{nodeid}/failover", logic.SecurityCheck(true, http.HandlerFunc(deletefailOver))).Methods(http.MethodDelete)
r.HandleFunc("/api/v1/node/{network}/failover/reset", logic.SecurityCheck(true, http.HandlerFunc(resetFailOver))).Methods(http.MethodPost)
r.HandleFunc("/api/v1/node/{nodeid}/failover_me", controller.Authorize(true, false, "host", http.HandlerFunc(failOverME))).Methods(http.MethodPost)
}
// swagger:route POST /api/v1/node/failover node createfailOver
//
// Create a relay.
//
// Schemes: https
//
// Security:
// oauth
//
// Responses:
// 200: nodeResponse
func createfailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
slog.Error("failed to get node:", "error", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
err = proLogic.CreateFailOver(node)
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
return
}
go mq.PublishPeerUpdate(false)
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponseWithJson(w, r, node, "created failover successfully")
}
func resetFailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
net := params["network"]
nodes, err := logic.GetNetworkNodes(net)
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
return
}
for _, node := range nodes {
if node.FailedOverBy != uuid.Nil {
node.FailedOverBy = uuid.Nil
node.FailOverPeers = make(map[string]struct{})
logic.UpsertNode(&node)
}
}
go mq.PublishPeerUpdate(false)
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponse(w, r, "failover has been reset successfully")
}
// swagger:route DELETE /api/v1/node/failover node deletefailOver
//
// Create a relay.
//
// Schemes: https
//
// Security:
// oauth
//
// Responses:
// 200: nodeResponse
func deletefailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
slog.Error("failed to get node:", "error", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
node.IsFailOver = false
// Reset FailOvered Peers
err = logic.UpsertNode(&node)
if err != nil {
slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
return
}
go func() {
proLogic.ResetFailOver(&node)
mq.PublishPeerUpdate(false)
}()
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponseWithJson(w, r, node, "deleted failover successfully")
}
// swagger:route POST /api/node/{nodeid}/failOverME node failOver_me
//
// Create a relay.
//
// Schemes: https
//
// Security:
// oauth
//
// Responses:
// 200: nodeResponse
func failOverME(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
logger.Log(0, r.Header.Get("user"), "failed to get node:", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
host, err := logic.GetHost(node.HostID.String())
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
failOverNode, exists := proLogic.FailOverExists(node.Network)
if !exists {
logic.ReturnErrorResponse(w, r, logic.FormatError(fmt.Errorf("req-from: %s, failover node doesn't exist in the network", host.Name), "badrequest"))
return
}
var failOverReq models.FailOverMeReq
err = json.NewDecoder(r.Body).Decode(&failOverReq)
if err != nil {
logger.Log(0, r.Header.Get("user"), "error decoding request body: ", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
var sendPeerUpdate bool
peerNode, err := logic.GetNodeByID(failOverReq.NodeID)
if err != nil {
slog.Error("peer not found: ", "nodeid", failOverReq.NodeID, "error", err)
logic.ReturnErrorResponse(w, r, logic.FormatError(errors.New("peer not found"), "badrequest"))
return
}
if node.IsRelayed || node.IsFailOver {
logic.ReturnErrorResponse(w, r, logic.FormatError(errors.New("node is relayed or acting as failover"), "badrequest"))
return
}
if peerNode.IsRelayed || peerNode.IsFailOver {
logic.ReturnErrorResponse(w, r, logic.FormatError(errors.New("peer node is relayed or acting as failover"), "badrequest"))
return
}
err = proLogic.SetFailOverCtx(failOverNode, node, peerNode)
if err != nil {
slog.Error("failed to create failover", "id", node.ID.String(),
"network", node.Network, "error", err)
logic.ReturnErrorResponse(w, r, logic.FormatError(fmt.Errorf("failed to create failover: %v", err), "internal"))
return
}
slog.Info("[auto-relay] created relay on node", "node", node.ID.String(), "network", node.Network)
sendPeerUpdate = true
if sendPeerUpdate {
go mq.PublishPeerUpdate(false)
}
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponse(w, r, "relayed successfully")
}