mirror of
https://github.com/simple-login/app.git
synced 2024-09-20 06:55:59 +08:00
chore: add upcloud monitoring (#1835)
* chore: add upcloud monitoring * Added db_role to new_relic metrics --------- Co-authored-by: Adrià Casajús <adria.casajus@proton.ch>
This commit is contained in:
parent
9ab3695d36
commit
0e82801512
|
@ -535,3 +535,7 @@ DISABLE_RATE_LIMIT = "DISABLE_RATE_LIMIT" in os.environ
|
|||
|
||||
SUBSCRIPTION_CHANGE_WEBHOOK = os.environ.get("SUBSCRIPTION_CHANGE_WEBHOOK", None)
|
||||
MAX_API_KEYS = int(os.environ.get("MAX_API_KEYS", 30))
|
||||
|
||||
UPCLOUD_USERNAME = os.environ.get("UPCLOUD_USERNAME", None)
|
||||
UPCLOUD_PASSWORD = os.environ.get("UPCLOUD_PASSWORD", None)
|
||||
UPCLOUD_DB_ID = os.environ.get("UPCLOUD_DB_ID", None)
|
||||
|
|
0
monitor/__init__.py
Normal file
0
monitor/__init__.py
Normal file
21
monitor/metric.py
Normal file
21
monitor/metric.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpcloudRecord:
|
||||
db_role: str
|
||||
label: str
|
||||
time: str
|
||||
value: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpcloudMetric:
|
||||
metric_name: str
|
||||
records: List[UpcloudRecord]
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpcloudMetrics:
|
||||
metrics: List[UpcloudMetric]
|
20
monitor/metric_exporter.py
Normal file
20
monitor/metric_exporter.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
from app.config import UPCLOUD_DB_ID, UPCLOUD_PASSWORD, UPCLOUD_USERNAME
|
||||
from app.log import LOG
|
||||
from monitor.newrelic import NewRelicClient
|
||||
from monitor.upcloud import UpcloudClient
|
||||
|
||||
|
||||
class MetricExporter:
|
||||
def __init__(self, newrelic_license: str):
|
||||
self.__upcloud = UpcloudClient(
|
||||
username=UPCLOUD_USERNAME, password=UPCLOUD_PASSWORD
|
||||
)
|
||||
self.__newrelic = NewRelicClient(newrelic_license)
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
metrics = self.__upcloud.get_metrics(UPCLOUD_DB_ID)
|
||||
self.__newrelic.send(metrics)
|
||||
LOG.info("Upcloud metrics sent to NewRelic")
|
||||
except Exception as e:
|
||||
LOG.warn(f"Could not export metrics: {e}")
|
26
monitor/newrelic.py
Normal file
26
monitor/newrelic.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from monitor.metric import UpcloudMetrics
|
||||
|
||||
from newrelic_telemetry_sdk import GaugeMetric, MetricClient
|
||||
|
||||
_NEWRELIC_BASE_HOST = "metric-api.eu.newrelic.com"
|
||||
|
||||
|
||||
class NewRelicClient:
|
||||
def __init__(self, license_key: str):
|
||||
self.__client = MetricClient(license_key=license_key, host=_NEWRELIC_BASE_HOST)
|
||||
|
||||
def send(self, metrics: UpcloudMetrics):
|
||||
batch = []
|
||||
|
||||
for metric in metrics.metrics:
|
||||
for record in metric.records:
|
||||
batch.append(
|
||||
GaugeMetric(
|
||||
name=f"upcloud.db.{metric.metric_name}",
|
||||
value=record.value,
|
||||
tags={"host": record.label, "db_role": record.db_role},
|
||||
)
|
||||
)
|
||||
|
||||
response = self.__client.send_batch(batch)
|
||||
response.raise_for_status()
|
82
monitor/upcloud.py
Normal file
82
monitor/upcloud.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
from app.log import LOG
|
||||
from monitor.metric import UpcloudMetric, UpcloudMetrics, UpcloudRecord
|
||||
|
||||
import base64
|
||||
import requests
|
||||
from typing import Any
|
||||
|
||||
|
||||
BASE_URL = "https://api.upcloud.com"
|
||||
|
||||
|
||||
def get_metric(json: Any, metric: str) -> UpcloudMetric:
|
||||
records = []
|
||||
|
||||
if metric in json:
|
||||
metric_data = json[metric]
|
||||
data = metric_data["data"]
|
||||
cols = list(map(lambda x: x["label"], data["cols"][1:]))
|
||||
latest = data["rows"][-1]
|
||||
time = latest[0]
|
||||
for column_idx in range(len(cols)):
|
||||
value = latest[1 + column_idx]
|
||||
|
||||
# If the latest value is None, try to fetch the second to last
|
||||
if value is None:
|
||||
value = data["rows"][-2][1 + column_idx]
|
||||
|
||||
if value is not None:
|
||||
label = cols[column_idx]
|
||||
if "(master)" in label:
|
||||
db_role = "master"
|
||||
else:
|
||||
db_role = "standby"
|
||||
records.append(
|
||||
UpcloudRecord(time=time, db_role=db_role, label=label, value=value)
|
||||
)
|
||||
else:
|
||||
LOG.warn(f"Could not get value for metric {metric}")
|
||||
|
||||
return UpcloudMetric(metric_name=metric, records=records)
|
||||
|
||||
|
||||
def get_metrics(json: Any) -> UpcloudMetrics:
|
||||
return UpcloudMetrics(
|
||||
metrics=[
|
||||
get_metric(json, "cpu_usage"),
|
||||
get_metric(json, "disk_usage"),
|
||||
get_metric(json, "diskio_reads"),
|
||||
get_metric(json, "diskio_writes"),
|
||||
get_metric(json, "load_average"),
|
||||
get_metric(json, "mem_usage"),
|
||||
get_metric(json, "net_receive"),
|
||||
get_metric(json, "net_send"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class UpcloudClient:
|
||||
def __init__(self, username: str, password: str):
|
||||
if not username:
|
||||
raise Exception("UpcloudClient username must be set")
|
||||
if not password:
|
||||
raise Exception("UpcloudClient password must be set")
|
||||
|
||||
client = requests.Session()
|
||||
encoded_auth = base64.b64encode(
|
||||
f"{username}:{password}".encode("utf-8")
|
||||
).decode("utf-8")
|
||||
client.headers = {"Authorization": f"Basic {encoded_auth}"}
|
||||
self.__client = client
|
||||
|
||||
def get_metrics(self, db_uuid: str) -> UpcloudMetrics:
|
||||
url = f"{BASE_URL}/1.3/database/{db_uuid}/metrics?period=hour"
|
||||
LOG.d(f"Performing request to {url}")
|
||||
response = self.__client.get(url)
|
||||
LOG.d(f"Status code: {response.status_code}")
|
||||
if response.status_code != 200:
|
||||
return UpcloudMetrics(metrics=[])
|
||||
|
||||
as_json = response.json()
|
||||
|
||||
return get_metrics(as_json)
|
|
@ -1,3 +1,4 @@
|
|||
import configparser
|
||||
import os
|
||||
import subprocess
|
||||
from time import sleep
|
||||
|
@ -7,6 +8,7 @@ import newrelic.agent
|
|||
|
||||
from app.db import Session
|
||||
from app.log import LOG
|
||||
from monitor.metric_exporter import MetricExporter
|
||||
|
||||
# the number of consecutive fails
|
||||
# if more than _max_nb_fails, alert
|
||||
|
@ -19,6 +21,18 @@ _max_nb_fails = 10
|
|||
# the maximum number of emails in incoming & active queue
|
||||
_max_incoming = 50
|
||||
|
||||
_NR_CONFIG_FILE_LOCATION_VAR = "NEW_RELIC_CONFIG_FILE"
|
||||
|
||||
|
||||
def get_newrelic_license() -> str:
|
||||
nr_file = os.environ.get(_NR_CONFIG_FILE_LOCATION_VAR, None)
|
||||
if nr_file is None:
|
||||
raise Exception(f"{_NR_CONFIG_FILE_LOCATION_VAR} not defined")
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
config.read(nr_file)
|
||||
return config["newrelic"]["license_key"]
|
||||
|
||||
|
||||
@newrelic.agent.background_task()
|
||||
def log_postfix_metrics():
|
||||
|
@ -80,10 +94,13 @@ def log_nb_db_connection():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exporter = MetricExporter(get_newrelic_license())
|
||||
while True:
|
||||
log_postfix_metrics()
|
||||
log_nb_db_connection()
|
||||
Session.close()
|
||||
|
||||
exporter.run()
|
||||
|
||||
# 1 min
|
||||
sleep(60)
|
||||
|
|
194
poetry.lock
generated
194
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -111,6 +111,7 @@ Deprecated = "^1.2.13"
|
|||
cryptography = "37.0.1"
|
||||
SQLAlchemy = "1.3.24"
|
||||
redis = "^4.5.3"
|
||||
newrelic-telemetry-sdk = "^0.5.0"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^7.0.0"
|
||||
|
|
0
tests/monitor/__init__.py
Normal file
0
tests/monitor/__init__.py
Normal file
350
tests/monitor/test_upcloud_get_metric.py
Normal file
350
tests/monitor/test_upcloud_get_metric.py
Normal file
|
@ -0,0 +1,350 @@
|
|||
from monitor.upcloud import get_metric, get_metrics
|
||||
from monitor.metric import UpcloudMetrics, UpcloudMetric, UpcloudRecord
|
||||
|
||||
import json
|
||||
|
||||
MOCK_RESPONSE = """
|
||||
{
|
||||
"cpu_usage": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 2.744682398273781, 3.054323473090861],
|
||||
["2022-01-21T13:11:00Z", 3.0735645433218366, 2.972423595745795],
|
||||
["2022-01-21T13:11:30Z", 2.61619694060839, 3.1358378052207883],
|
||||
["2022-01-21T13:12:00Z", 3.275132296130991, 4.196249043309251]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "CPU usage %" }
|
||||
},
|
||||
"disk_usage": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 5.654416415900109, 5.58959125727556],
|
||||
["2022-01-21T13:11:00Z", 5.654416415900109, 5.58959125727556],
|
||||
["2022-01-21T13:11:30Z", 5.654416415900109, 5.58959125727556]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Disk space usage %" }
|
||||
},
|
||||
"diskio_reads": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 0, 0],
|
||||
["2022-01-21T13:11:00Z", 0, 0],
|
||||
["2022-01-21T13:11:30Z", 0, 0]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Disk iops (reads)" }
|
||||
},
|
||||
"diskio_writes": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 3, 2],
|
||||
["2022-01-21T13:11:00Z", 2, 3],
|
||||
["2022-01-21T13:11:30Z", 4, 3]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Disk iops (writes)" }
|
||||
},
|
||||
"load_average": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 0.11, 0.11],
|
||||
["2022-01-21T13:11:00Z", 0.14, 0.1],
|
||||
["2022-01-21T13:11:30Z", 0.14, 0.09]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Load average (5 min)" }
|
||||
},
|
||||
"mem_usage": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 11.491766148261078, 12.318932883261219],
|
||||
["2022-01-21T13:11:00Z", 11.511967645759277, 12.304403727425075],
|
||||
["2022-01-21T13:11:30Z", 11.488581675749048, 12.272260458006759]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Memory usage %" }
|
||||
},
|
||||
"net_receive": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 442, 470],
|
||||
["2022-01-21T13:11:00Z", 439, 384],
|
||||
["2022-01-21T13:11:30Z", 466, 458]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Network receive (bytes/s)" }
|
||||
},
|
||||
"net_send": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 672, 581],
|
||||
["2022-01-21T13:11:00Z", 660, 555],
|
||||
["2022-01-21T13:11:30Z", 694, 573]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "Network transmit (bytes/s)" }
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def test_get_metrics():
|
||||
response = json.loads(MOCK_RESPONSE)
|
||||
metrics = get_metrics(response)
|
||||
assert metrics == UpcloudMetrics(
|
||||
metrics=[
|
||||
UpcloudMetric(
|
||||
metric_name="cpu_usage",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:12:00Z",
|
||||
value=3.275132296130991,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:12:00Z",
|
||||
value=4.196249043309251,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="disk_usage",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=5.654416415900109,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=5.58959125727556,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="diskio_reads",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=0,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=0,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="diskio_writes",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=4,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=3,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="load_average",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=0.14,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=0.09,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="mem_usage",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=11.488581675749048,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=12.272260458006759,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="net_receive",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=466,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=458,
|
||||
),
|
||||
],
|
||||
),
|
||||
UpcloudMetric(
|
||||
metric_name="net_send",
|
||||
records=[
|
||||
UpcloudRecord(
|
||||
db_role="master",
|
||||
label="test-1 " "(master)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=694,
|
||||
),
|
||||
UpcloudRecord(
|
||||
db_role="standby",
|
||||
label="test-2 " "(standby)",
|
||||
time="2022-01-21T13:11:30Z",
|
||||
value=573,
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_get_metric():
|
||||
response = json.loads(MOCK_RESPONSE)
|
||||
metric_name = "cpu_usage"
|
||||
metric = get_metric(response, metric_name)
|
||||
|
||||
assert metric.metric_name == metric_name
|
||||
assert len(metric.records) == 2
|
||||
assert metric.records[0].label == "test-1 (master)"
|
||||
assert metric.records[0].time == "2022-01-21T13:12:00Z"
|
||||
assert metric.records[0].value == 3.275132296130991
|
||||
|
||||
assert metric.records[1].label == "test-2 (standby)"
|
||||
assert metric.records[1].time == "2022-01-21T13:12:00Z"
|
||||
assert metric.records[1].value == 4.196249043309251
|
||||
|
||||
|
||||
def test_get_metric_with_none_value():
|
||||
response_str = """
|
||||
{
|
||||
"cpu_usage": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 2.744682398273781, 3.054323473090861],
|
||||
["2022-01-21T13:11:00Z", 3.0735645433218366, 2.972423595745795],
|
||||
["2022-01-21T13:11:30Z", null, 3.1358378052207883],
|
||||
["2022-01-21T13:12:00Z", 3.275132296130991, null]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "CPU usage %" }
|
||||
}
|
||||
}
|
||||
"""
|
||||
response = json.loads(response_str)
|
||||
metric = get_metric(response, "cpu_usage")
|
||||
|
||||
assert metric.records[0].label == "test-1 (master)"
|
||||
assert metric.records[0].value == 3.275132296130991
|
||||
assert metric.records[1].label == "test-2 (standby)"
|
||||
assert metric.records[1].value == 3.1358378052207883
|
||||
|
||||
|
||||
def test_get_metric_with_none_value_in_last_two_positions():
|
||||
response_str = """
|
||||
{
|
||||
"cpu_usage": {
|
||||
"data": {
|
||||
"cols": [
|
||||
{ "label": "time", "type": "date" },
|
||||
{ "label": "test-1 (master)", "type": "number" },
|
||||
{ "label": "test-2 (standby)", "type": "number" }
|
||||
],
|
||||
"rows": [
|
||||
["2022-01-21T13:10:30Z", 2.744682398273781, 3.054323473090861],
|
||||
["2022-01-21T13:11:00Z", 3.0735645433218366, 2.972423595745795],
|
||||
["2022-01-21T13:11:30Z", null, null],
|
||||
["2022-01-21T13:12:00Z", 3.275132296130991, null]
|
||||
]
|
||||
},
|
||||
"hints": { "title": "CPU usage %" }
|
||||
}
|
||||
}
|
||||
"""
|
||||
response = json.loads(response_str)
|
||||
metric = get_metric(response, "cpu_usage")
|
||||
|
||||
assert len(metric.records) == 1
|
||||
assert metric.records[0].label == "test-1 (master)"
|
||||
assert metric.records[0].value == 3.275132296130991
|
Loading…
Reference in a new issue