netmaker/pro/controllers/failover.go
Abhishek K 307a3d1e4b
NET-1932: Merge egress and internet gateways (#3436)
* feat: api access tokens

* revoke all user tokens

* redefine access token api routes, add auto egress option to enrollment keys

* add server settings apis, add db table for settigs

* handle server settings updates

* switch to using settings from DB

* fix sever settings migration

* revet force migration for settings

* fix server settings database write

* egress model

* fix revoked tokens to be unauthorized

* update egress model

* remove unused functions

* convert access token to sql schema

* switch access token to sql schema

* fix merge conflicts

* fix server settings types

* bypass basic auth setting for super admin

* add TODO comment

* setup api handlers for egress revamp

* use single DB, fix update nat boolean field

* extend validaiton checks for egress ranges

* add migration to convert to new egress model

* fix panic interface conversion

* publish peer update on settings update

* revoke token generated by an user

* add user token creation restriction by user role

* add forbidden check for access token creation

* revoke user token when group or role is changed

* add default group to admin users on update

* chore(go): import style changes from migration branch;

1. Singular file names for table schema.
2. No table name method.
3. Use .Model instead of .Table.
4. No unnecessary tagging.

* remove nat check on egress gateway request

* Revert "remove nat check on egress gateway request"

This reverts commit 0aff12a189.

* remove nat check on egress gateway request

* feat(go): add db middleware;

* feat(go): restore method;

* feat(go): add user access token schema;

* add inet gw status to egress model

* fetch node ids in the tag, add inet gw info clients

* add inet gw info to node from egress list

* add migration logic internet gws

* create default acl policies

* add egress info

* add egress TODO

* add egress TODO

* fix user auth api:

* add reference id to acl policy

* add egress response from DB

* publish peer update on egress changes

* re initalise oauth and email config

* set verbosity

* normalise cidr on egress req

* add egress id to acl group

* change acls to use egress id

* resolve merge conflicts

* fix egress reference errors

* move egress model to schema

* add api context to DB

* sync auto update settings with hosts

* sync auto update settings with hosts

* check acl for egress node

* check for egress policy in the acl dst groups

* fix acl rules for egress policies with new models

* add status to egress model

* fix inet node func

* mask secret and convert jwt duration to minutes

* enable egress policies on creation

* convert jwt duration to minutes

* add relevant ranges to inet egress

* skip non active egress routes

* resolve merge conflicts

* fix static check

* update gorm tag for primary key on egress model

* create user policies for egress resources

* resolve merge conflicts

* get egress info on failover apis, add egress src validation for inet gws

* add additional validation checks on egress req

* add additional validation checks on egress req

* skip all resources for inet policy

* delete associated egress acl policies

* fix failover of inetclient

* avoid setting inet client asd inet gw

* fix all resource egress policy

* fix inet gw egress rule

* check for node egress on relay req

* fix egress acl rules comms

* add new field for egress info on node

* check acl policy in failover ctx

* avoid default host to be set as inet client

* fix relayed egress node

* add valid error messaging for egress validate func

* return if inet default host

* jump port detection to 51821

* check host ports on pull

* check user access gws via acls

* add validation check for default host and failover for inet clients

* add error messaging for acl policy check

* fix inet gw status

* ignore failover req for peer using inet gw

* check for allowed egress ranges for a peer

* add egress routes to static nodes by access

* avoid setting failvoer as inet client

* fix egress error messaging

* fix extclients egress comms

* fix inet gw acting as inet client

* return formatted error on update acl validation

* add default route for static nodes on inetclient

* check relay node acting as inetclient

* move inet node info to separate field, fix all resouces policy

* remove debug logs

---------

Co-authored-by: Vishal Dalwadi <dalwadivishal26@gmail.com>
2025-05-21 12:50:21 +05:30

454 lines
13 KiB
Go

package controllers
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"github.com/google/uuid"
"github.com/gorilla/mux"
controller "github.com/gravitl/netmaker/controllers"
"github.com/gravitl/netmaker/logger"
"github.com/gravitl/netmaker/logic"
"github.com/gravitl/netmaker/models"
"github.com/gravitl/netmaker/mq"
proLogic "github.com/gravitl/netmaker/pro/logic"
"golang.org/x/exp/slog"
)
// FailOverHandlers - handlers for FailOver
func FailOverHandlers(r *mux.Router) {
r.HandleFunc("/api/v1/node/{nodeid}/failover", controller.Authorize(true, false, "host", http.HandlerFunc(getfailOver))).
Methods(http.MethodGet)
r.HandleFunc("/api/v1/node/{nodeid}/failover", logic.SecurityCheck(true, http.HandlerFunc(createfailOver))).
Methods(http.MethodPost)
r.HandleFunc("/api/v1/node/{nodeid}/failover", logic.SecurityCheck(true, http.HandlerFunc(deletefailOver))).
Methods(http.MethodDelete)
r.HandleFunc("/api/v1/node/{network}/failover/reset", logic.SecurityCheck(true, http.HandlerFunc(resetFailOver))).
Methods(http.MethodPost)
r.HandleFunc("/api/v1/node/{nodeid}/failover_me", controller.Authorize(true, false, "host", http.HandlerFunc(failOverME))).
Methods(http.MethodPost)
r.HandleFunc("/api/v1/node/{nodeid}/failover_check", controller.Authorize(true, false, "host", http.HandlerFunc(checkfailOverCtx))).
Methods(http.MethodGet)
}
// @Summary Get failover node
// @Router /api/v1/node/{nodeid}/failover [get]
// @Tags PRO
// @Param nodeid path string true "Node ID"
// @Success 200 {object} models.Node
// @Failure 400 {object} models.ErrorResponse
// @Failure 404 {object} models.ErrorResponse
func getfailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
failOverNode, exists := proLogic.FailOverExists(node.Network)
if !exists {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("failover node not found"), "notfound"),
)
return
}
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponseWithJson(w, r, failOverNode, "get failover node successfully")
}
// @Summary Create failover node
// @Router /api/v1/node/{nodeid}/failover [post]
// @Tags PRO
// @Param nodeid path string true "Node ID"
// @Success 200 {object} models.Node
// @Failure 400 {object} models.ErrorResponse
// @Failure 500 {object} models.ErrorResponse
func createfailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
slog.Error("failed to get node:", "error", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
err = proLogic.CreateFailOver(node)
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
return
}
go mq.PublishPeerUpdate(false)
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponseWithJson(w, r, node, "created failover successfully")
}
// @Summary Reset failover for a network
// @Router /api/v1/node/{network}/failover/reset [post]
// @Tags PRO
// @Param network path string true "Network ID"
// @Success 200 {object} models.SuccessResponse
// @Failure 500 {object} models.ErrorResponse
func resetFailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
net := params["network"]
nodes, err := logic.GetNetworkNodes(net)
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
return
}
for _, node := range nodes {
if node.FailedOverBy != uuid.Nil {
node.FailedOverBy = uuid.Nil
node.FailOverPeers = make(map[string]struct{})
logic.UpsertNode(&node)
}
}
go mq.PublishPeerUpdate(false)
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponse(w, r, "failover has been reset successfully")
}
// @Summary Delete failover node
// @Router /api/v1/node/{nodeid}/failover [delete]
// @Tags PRO
// @Param nodeid path string true "Node ID"
// @Success 200 {object} models.Node
// @Failure 400 {object} models.ErrorResponse
// @Failure 500 {object} models.ErrorResponse
func deletefailOver(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
slog.Error("failed to get node:", "error", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
node.IsFailOver = false
// Reset FailOvered Peers
err = logic.UpsertNode(&node)
if err != nil {
slog.Error("failed to upsert node", "node", node.ID.String(), "error", err)
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "internal"))
return
}
proLogic.RemoveFailOverFromCache(node.Network)
go func() {
proLogic.ResetFailOver(&node)
mq.PublishPeerUpdate(false)
}()
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponseWithJson(w, r, node, "deleted failover successfully")
}
// @Summary Failover me
// @Router /api/v1/node/{nodeid}/failover_me [post]
// @Tags PRO
// @Param nodeid path string true "Node ID"
// @Accept json
// @Param body body models.FailOverMeReq true "Failover request"
// @Success 200 {object} models.SuccessResponse
// @Failure 400 {object} models.ErrorResponse
// @Failure 500 {object} models.ErrorResponse
func failOverME(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
logger.Log(0, r.Header.Get("user"), "failed to get node:", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
host, err := logic.GetHost(node.HostID.String())
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
failOverNode, exists := proLogic.FailOverExists(node.Network)
if !exists {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
fmt.Errorf("req-from: %s, failover node doesn't exist in the network", host.Name),
"badrequest",
),
)
return
}
var failOverReq models.FailOverMeReq
err = json.NewDecoder(r.Body).Decode(&failOverReq)
if err != nil {
logger.Log(0, r.Header.Get("user"), "error decoding request body: ", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
var sendPeerUpdate bool
peerNode, err := logic.GetNodeByID(failOverReq.NodeID)
if err != nil {
slog.Error("peer not found: ", "nodeid", failOverReq.NodeID, "error", err)
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("peer not found"), "badrequest"),
)
return
}
logic.GetNodeEgressInfo(&node)
logic.GetNodeEgressInfo(&peerNode)
if peerNode.IsFailOver {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
)
return
}
if node.IsFailOver {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("node is acting as failover"), "badrequest"),
)
return
}
if peerNode.IsFailOver {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
)
return
}
if node.IsRelayed && node.RelayedBy == peerNode.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("node is relayed by peer node"), "badrequest"),
)
return
}
if node.IsRelay && peerNode.RelayedBy == node.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("node acting as relay for the peer node"), "badrequest"),
)
return
}
if node.EgressDetails.IsInternetGateway && peerNode.EgressDetails.InternetGwID == node.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
errors.New("node acting as internet gw for the peer node"),
"badrequest",
),
)
return
}
if node.EgressDetails.InternetGwID != "" && node.EgressDetails.InternetGwID == peerNode.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
errors.New("node using a internet gw by the peer node"),
"badrequest",
),
)
return
}
err = proLogic.SetFailOverCtx(failOverNode, node, peerNode)
if err != nil {
slog.Debug("failed to create failover", "id", node.ID.String(),
"network", node.Network, "error", err)
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(fmt.Errorf("failed to create failover: %v", err), "internal"),
)
return
}
slog.Info(
"[auto-relay] created relay on node",
"node",
node.ID.String(),
"network",
node.Network,
)
sendPeerUpdate = true
if sendPeerUpdate {
go mq.PublishPeerUpdate(false)
}
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponse(w, r, "relayed successfully")
}
// @Summary checkfailOverCtx
// @Router /api/v1/node/{nodeid}/failover_check [get]
// @Tags PRO
// @Param nodeid path string true "Node ID"
// @Accept json
// @Param body body models.FailOverMeReq true "Failover request"
// @Success 200 {object} models.SuccessResponse
// @Failure 400 {object} models.ErrorResponse
// @Failure 500 {object} models.ErrorResponse
func checkfailOverCtx(w http.ResponseWriter, r *http.Request) {
var params = mux.Vars(r)
nodeid := params["nodeid"]
// confirm host exists
node, err := logic.GetNodeByID(nodeid)
if err != nil {
logger.Log(0, r.Header.Get("user"), "failed to get node:", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
host, err := logic.GetHost(node.HostID.String())
if err != nil {
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
failOverNode, exists := proLogic.FailOverExists(node.Network)
if !exists {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
fmt.Errorf("req-from: %s, failover node doesn't exist in the network", host.Name),
"badrequest",
),
)
return
}
var failOverReq models.FailOverMeReq
err = json.NewDecoder(r.Body).Decode(&failOverReq)
if err != nil {
logger.Log(0, r.Header.Get("user"), "error decoding request body: ", err.Error())
logic.ReturnErrorResponse(w, r, logic.FormatError(err, "badrequest"))
return
}
peerNode, err := logic.GetNodeByID(failOverReq.NodeID)
if err != nil {
slog.Error("peer not found: ", "nodeid", failOverReq.NodeID, "error", err)
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("peer not found"), "badrequest"),
)
return
}
logic.GetNodeEgressInfo(&node)
logic.GetNodeEgressInfo(&peerNode)
if peerNode.IsFailOver {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
)
return
}
if node.IsFailOver {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("node is acting as failover"), "badrequest"),
)
return
}
if peerNode.IsFailOver {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("peer is acting as failover"), "badrequest"),
)
return
}
if node.IsRelayed && node.RelayedBy == peerNode.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("node is relayed by peer node"), "badrequest"),
)
return
}
if node.IsRelay && peerNode.RelayedBy == node.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(errors.New("node acting as relay for the peer node"), "badrequest"),
)
return
}
if node.EgressDetails.InternetGwID != "" || peerNode.EgressDetails.InternetGwID != "" {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
errors.New("node using a internet gw by the peer node"),
"badrequest",
),
)
return
}
if node.EgressDetails.IsInternetGateway && peerNode.EgressDetails.InternetGwID == node.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
errors.New("node acting as internet gw for the peer node"),
"badrequest",
),
)
return
}
if node.EgressDetails.InternetGwID != "" && node.EgressDetails.InternetGwID == peerNode.ID.String() {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
errors.New("node using a internet gw by the peer node"),
"badrequest",
),
)
return
}
if ok := logic.IsPeerAllowed(node, peerNode, true); !ok {
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(
errors.New("peers are not allowed to communicate"),
"badrequest",
),
)
return
}
err = proLogic.CheckFailOverCtx(failOverNode, node, peerNode)
if err != nil {
slog.Error("failover ctx cannot be set ", "error", err)
logic.ReturnErrorResponse(
w,
r,
logic.FormatError(fmt.Errorf("failover ctx cannot be set: %v", err), "internal"),
)
return
}
w.Header().Set("Content-Type", "application/json")
logic.ReturnSuccessResponse(w, r, "failover can be set")
}