mirror of
https://github.com/1Panel-dev/1Panel.git
synced 2025-12-17 21:08:25 +08:00
parent
3f47a6e701
commit
63f9368e26
14 changed files with 654 additions and 342 deletions
|
|
@ -31,6 +31,27 @@ func (b *BaseApi) LoadMonitor(c *gin.Context) {
|
||||||
helper.SuccessWithData(c, data)
|
helper.SuccessWithData(c, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @Tags Monitor
|
||||||
|
// @Summary Load monitor data
|
||||||
|
// @Param request body dto.MonitorGPUSearch true "request"
|
||||||
|
// @Success 200 {object} dto.dto.MonitorGPUData
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Security Timestamp
|
||||||
|
// @Router /hosts/monitor/gpu/search [post]
|
||||||
|
func (b *BaseApi) LoadGPUMonitor(c *gin.Context) {
|
||||||
|
var req dto.MonitorGPUSearch
|
||||||
|
if err := helper.CheckBindAndValidate(&req, c); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := monitorService.LoadGPUMonitorData(req)
|
||||||
|
if err != nil {
|
||||||
|
helper.InternalServer(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
helper.SuccessWithData(c, data)
|
||||||
|
}
|
||||||
|
|
||||||
// @Tags Monitor
|
// @Tags Monitor
|
||||||
// @Summary Clean monitor data
|
// @Summary Clean monitor data
|
||||||
// @Success 200
|
// @Success 200
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ type MonitorSearch struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type MonitorData struct {
|
type MonitorData struct {
|
||||||
Param string `json:"param" validate:"required,oneof=cpu memory load io network"`
|
Param string `json:"param"`
|
||||||
Date []time.Time `json:"date"`
|
Date []time.Time `json:"date"`
|
||||||
Value []interface{} `json:"value"`
|
Value []interface{} `json:"value"`
|
||||||
}
|
}
|
||||||
|
|
@ -37,3 +37,36 @@ type MonitorSettingUpdate struct {
|
||||||
Key string `json:"key" validate:"required,oneof=MonitorStatus MonitorStoreDays MonitorInterval DefaultNetwork DefaultIO"`
|
Key string `json:"key" validate:"required,oneof=MonitorStatus MonitorStoreDays MonitorInterval DefaultNetwork DefaultIO"`
|
||||||
Value string `json:"value"`
|
Value string `json:"value"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MonitorGPUSearch struct {
|
||||||
|
ProductName string `json:"productName"`
|
||||||
|
StartTime time.Time `json:"startTime"`
|
||||||
|
EndTime time.Time `json:"endTime"`
|
||||||
|
}
|
||||||
|
type MonitorGPUData struct {
|
||||||
|
ProductNames []string `json:"productNames"`
|
||||||
|
Date []time.Time `json:"date"`
|
||||||
|
GPUValue []float64 `json:"gpuValue"`
|
||||||
|
TemperatureValue []int `json:"temperatureValue"`
|
||||||
|
PowerValue []GPUPowerUsageHelper `json:"powerValue"`
|
||||||
|
MemoryValue []GPUMemoryUsageHelper `json:"memoryValue"`
|
||||||
|
SpeedValue []int `json:"speedValue"`
|
||||||
|
}
|
||||||
|
type GPUPowerUsageHelper struct {
|
||||||
|
Total float64 `json:"total"`
|
||||||
|
Used float64 `json:"used"`
|
||||||
|
Percent float64 `json:"percent"`
|
||||||
|
}
|
||||||
|
type GPUMemoryUsageHelper struct {
|
||||||
|
Total int `json:"total"`
|
||||||
|
Used int `json:"used"`
|
||||||
|
Percent float64 `json:"percent"`
|
||||||
|
|
||||||
|
GPUProcesses []GPUProcess `json:"gpuProcesses"`
|
||||||
|
}
|
||||||
|
type GPUProcess struct {
|
||||||
|
Pid string `json:"pid"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
ProcessName string `json:"processName"`
|
||||||
|
UsedMemory string `json:"usedMemory"`
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,3 +31,16 @@ type MonitorNetwork struct {
|
||||||
Up float64 `json:"up"`
|
Up float64 `json:"up"`
|
||||||
Down float64 `json:"down"`
|
Down float64 `json:"down"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MonitorGPU struct {
|
||||||
|
BaseModel
|
||||||
|
ProductName string `json:"productName"`
|
||||||
|
GPUUtil float64 `json:"gpuUtil"`
|
||||||
|
Temperature int `json:"temperature"`
|
||||||
|
PowerDraw float64 `json:"powerDraw"`
|
||||||
|
MaxPowerLimit float64 `json:"maxPowerLimit"`
|
||||||
|
MemUsed int `json:"memUsed"`
|
||||||
|
MemTotal int `json:"memTotal"`
|
||||||
|
FanSpeed int `json:"fanSpeed"`
|
||||||
|
Processes string `json:"processes"`
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,21 +5,27 @@ import (
|
||||||
|
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/model"
|
"github.com/1Panel-dev/1Panel/agent/app/model"
|
||||||
"github.com/1Panel-dev/1Panel/agent/global"
|
"github.com/1Panel-dev/1Panel/agent/global"
|
||||||
|
"gorm.io/gorm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MonitorRepo struct{}
|
type MonitorRepo struct{}
|
||||||
|
|
||||||
type IMonitorRepo interface {
|
type IMonitorRepo interface {
|
||||||
GetBase(opts ...DBOption) ([]model.MonitorBase, error)
|
GetBase(opts ...DBOption) ([]model.MonitorBase, error)
|
||||||
|
GetGPU(opts ...DBOption) ([]model.MonitorGPU, error)
|
||||||
GetIO(opts ...DBOption) ([]model.MonitorIO, error)
|
GetIO(opts ...DBOption) ([]model.MonitorIO, error)
|
||||||
GetNetwork(opts ...DBOption) ([]model.MonitorNetwork, error)
|
GetNetwork(opts ...DBOption) ([]model.MonitorNetwork, error)
|
||||||
|
|
||||||
CreateMonitorBase(model model.MonitorBase) error
|
CreateMonitorBase(model model.MonitorBase) error
|
||||||
|
BatchCreateMonitorGPU(list []model.MonitorGPU) error
|
||||||
BatchCreateMonitorIO(ioList []model.MonitorIO) error
|
BatchCreateMonitorIO(ioList []model.MonitorIO) error
|
||||||
BatchCreateMonitorNet(ioList []model.MonitorNetwork) error
|
BatchCreateMonitorNet(ioList []model.MonitorNetwork) error
|
||||||
DelMonitorBase(timeForDelete time.Time) error
|
DelMonitorBase(timeForDelete time.Time) error
|
||||||
|
DelMonitorGPU(timeForDelete time.Time) error
|
||||||
DelMonitorIO(timeForDelete time.Time) error
|
DelMonitorIO(timeForDelete time.Time) error
|
||||||
DelMonitorNet(timeForDelete time.Time) error
|
DelMonitorNet(timeForDelete time.Time) error
|
||||||
|
|
||||||
|
WithByProductName(name string) DBOption
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewIMonitorRepo() IMonitorRepo {
|
func NewIMonitorRepo() IMonitorRepo {
|
||||||
|
|
@ -53,10 +59,22 @@ func (u *MonitorRepo) GetNetwork(opts ...DBOption) ([]model.MonitorNetwork, erro
|
||||||
err := db.Find(&data).Error
|
err := db.Find(&data).Error
|
||||||
return data, err
|
return data, err
|
||||||
}
|
}
|
||||||
|
func (u *MonitorRepo) GetGPU(opts ...DBOption) ([]model.MonitorGPU, error) {
|
||||||
|
var data []model.MonitorGPU
|
||||||
|
db := global.GPUMonitorDB
|
||||||
|
for _, opt := range opts {
|
||||||
|
db = opt(db)
|
||||||
|
}
|
||||||
|
err := db.Find(&data).Error
|
||||||
|
return data, err
|
||||||
|
}
|
||||||
|
|
||||||
func (u *MonitorRepo) CreateMonitorBase(model model.MonitorBase) error {
|
func (u *MonitorRepo) CreateMonitorBase(model model.MonitorBase) error {
|
||||||
return global.MonitorDB.Create(&model).Error
|
return global.MonitorDB.Create(&model).Error
|
||||||
}
|
}
|
||||||
|
func (s *MonitorRepo) BatchCreateMonitorGPU(list []model.MonitorGPU) error {
|
||||||
|
return global.GPUMonitorDB.CreateInBatches(&list, len(list)).Error
|
||||||
|
}
|
||||||
func (u *MonitorRepo) BatchCreateMonitorIO(ioList []model.MonitorIO) error {
|
func (u *MonitorRepo) BatchCreateMonitorIO(ioList []model.MonitorIO) error {
|
||||||
return global.MonitorDB.CreateInBatches(ioList, len(ioList)).Error
|
return global.MonitorDB.CreateInBatches(ioList, len(ioList)).Error
|
||||||
}
|
}
|
||||||
|
|
@ -72,3 +90,12 @@ func (u *MonitorRepo) DelMonitorIO(timeForDelete time.Time) error {
|
||||||
func (u *MonitorRepo) DelMonitorNet(timeForDelete time.Time) error {
|
func (u *MonitorRepo) DelMonitorNet(timeForDelete time.Time) error {
|
||||||
return global.MonitorDB.Where("created_at < ?", timeForDelete).Delete(&model.MonitorNetwork{}).Error
|
return global.MonitorDB.Where("created_at < ?", timeForDelete).Delete(&model.MonitorNetwork{}).Error
|
||||||
}
|
}
|
||||||
|
func (s *MonitorRepo) DelMonitorGPU(timeForDelete time.Time) error {
|
||||||
|
return global.GPUMonitorDB.Where("created_at < ?", timeForDelete).Delete(&model.MonitorGPU{}).Error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MonitorRepo) WithByProductName(name string) DBOption {
|
||||||
|
return func(g *gorm.DB) *gorm.DB {
|
||||||
|
return g.Where("product_name = ?", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ package repo
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/model"
|
"github.com/1Panel-dev/1Panel/agent/app/model"
|
||||||
"github.com/1Panel-dev/1Panel/agent/global"
|
"github.com/1Panel-dev/1Panel/agent/global"
|
||||||
|
|
@ -19,12 +18,6 @@ type ISettingRepo interface {
|
||||||
Update(key, value string) error
|
Update(key, value string) error
|
||||||
WithByKey(key string) DBOption
|
WithByKey(key string) DBOption
|
||||||
|
|
||||||
CreateMonitorBase(model model.MonitorBase) error
|
|
||||||
BatchCreateMonitorIO(ioList []model.MonitorIO) error
|
|
||||||
BatchCreateMonitorNet(ioList []model.MonitorNetwork) error
|
|
||||||
DelMonitorBase(timeForDelete time.Time) error
|
|
||||||
DelMonitorIO(timeForDelete time.Time) error
|
|
||||||
DelMonitorNet(timeForDelete time.Time) error
|
|
||||||
UpdateOrCreate(key, value string) error
|
UpdateOrCreate(key, value string) error
|
||||||
|
|
||||||
GetDescription(opts ...DBOption) (model.CommonDescription, error)
|
GetDescription(opts ...DBOption) (model.CommonDescription, error)
|
||||||
|
|
@ -85,25 +78,6 @@ func (s *SettingRepo) Update(key, value string) error {
|
||||||
return global.DB.Model(&model.Setting{}).Where("key = ?", key).Updates(map[string]interface{}{"value": value}).Error
|
return global.DB.Model(&model.Setting{}).Where("key = ?", key).Updates(map[string]interface{}{"value": value}).Error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SettingRepo) CreateMonitorBase(model model.MonitorBase) error {
|
|
||||||
return global.MonitorDB.Create(&model).Error
|
|
||||||
}
|
|
||||||
func (s *SettingRepo) BatchCreateMonitorIO(ioList []model.MonitorIO) error {
|
|
||||||
return global.MonitorDB.CreateInBatches(ioList, len(ioList)).Error
|
|
||||||
}
|
|
||||||
func (s *SettingRepo) BatchCreateMonitorNet(ioList []model.MonitorNetwork) error {
|
|
||||||
return global.MonitorDB.CreateInBatches(ioList, len(ioList)).Error
|
|
||||||
}
|
|
||||||
func (s *SettingRepo) DelMonitorBase(timeForDelete time.Time) error {
|
|
||||||
return global.MonitorDB.Where("created_at < ?", timeForDelete).Delete(&model.MonitorBase{}).Error
|
|
||||||
}
|
|
||||||
func (s *SettingRepo) DelMonitorIO(timeForDelete time.Time) error {
|
|
||||||
return global.MonitorDB.Where("created_at < ?", timeForDelete).Delete(&model.MonitorIO{}).Error
|
|
||||||
}
|
|
||||||
func (s *SettingRepo) DelMonitorNet(timeForDelete time.Time) error {
|
|
||||||
return global.MonitorDB.Where("created_at < ?", timeForDelete).Delete(&model.MonitorNetwork{}).Error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SettingRepo) UpdateOrCreate(key, value string) error {
|
func (s *SettingRepo) UpdateOrCreate(key, value string) error {
|
||||||
var setting model.Setting
|
var setting model.Setting
|
||||||
result := global.DB.Where("key = ?", key).First(&setting)
|
result := global.DB.Where("key = ?", key).First(&setting)
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/repo"
|
"github.com/1Panel-dev/1Panel/agent/app/repo"
|
||||||
|
|
@ -15,6 +16,8 @@ import (
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/dto"
|
"github.com/1Panel-dev/1Panel/agent/app/dto"
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/model"
|
"github.com/1Panel-dev/1Panel/agent/app/model"
|
||||||
"github.com/1Panel-dev/1Panel/agent/global"
|
"github.com/1Panel-dev/1Panel/agent/global"
|
||||||
|
"github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu"
|
||||||
|
"github.com/1Panel-dev/1Panel/agent/utils/ai_tools/xpu"
|
||||||
"github.com/1Panel-dev/1Panel/agent/utils/common"
|
"github.com/1Panel-dev/1Panel/agent/utils/common"
|
||||||
"github.com/robfig/cron/v3"
|
"github.com/robfig/cron/v3"
|
||||||
"github.com/shirou/gopsutil/v4/cpu"
|
"github.com/shirou/gopsutil/v4/cpu"
|
||||||
|
|
@ -35,6 +38,7 @@ var monitorCancel context.CancelFunc
|
||||||
type IMonitorService interface {
|
type IMonitorService interface {
|
||||||
Run()
|
Run()
|
||||||
LoadMonitorData(req dto.MonitorSearch) ([]dto.MonitorData, error)
|
LoadMonitorData(req dto.MonitorSearch) ([]dto.MonitorData, error)
|
||||||
|
LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.MonitorGPUData, error)
|
||||||
LoadSetting() (*dto.MonitorSetting, error)
|
LoadSetting() (*dto.MonitorSetting, error)
|
||||||
UpdateSetting(key, value string) error
|
UpdateSetting(key, value string) error
|
||||||
CleanData() error
|
CleanData() error
|
||||||
|
|
@ -113,6 +117,67 @@ func (m *MonitorService) LoadMonitorData(req dto.MonitorSearch) ([]dto.MonitorDa
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.MonitorGPUData, error) {
|
||||||
|
loc, _ := time.LoadLocation(common.LoadTimeZoneByCmd())
|
||||||
|
req.StartTime = req.StartTime.In(loc)
|
||||||
|
req.EndTime = req.EndTime.In(loc)
|
||||||
|
|
||||||
|
var data dto.MonitorGPUData
|
||||||
|
gpuExist, gpuclient := gpu.New()
|
||||||
|
xpuExist, xpuClient := xpu.New()
|
||||||
|
if !gpuExist && !xpuExist {
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
if len(req.ProductName) == 0 {
|
||||||
|
if gpuExist {
|
||||||
|
gpuInfo, err := gpuclient.LoadGpuInfo()
|
||||||
|
if err != nil || len(gpuInfo.GPUs) == 0 {
|
||||||
|
return data, buserr.New("ErrRecordNotFound")
|
||||||
|
}
|
||||||
|
req.ProductName = gpuInfo.GPUs[0].ProductName
|
||||||
|
for _, item := range gpuInfo.GPUs {
|
||||||
|
data.ProductNames = append(data.ProductNames, item.ProductName)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
xpuInfo, err := xpuClient.LoadGpuInfo()
|
||||||
|
if err != nil || len(xpuInfo.Xpu) == 0 {
|
||||||
|
return data, buserr.New("ErrRecordNotFound")
|
||||||
|
}
|
||||||
|
req.ProductName = xpuInfo.Xpu[0].Basic.DeviceName
|
||||||
|
for _, item := range xpuInfo.Xpu {
|
||||||
|
data.ProductNames = append(data.ProductNames, item.Basic.DeviceName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gpuList, err := monitorRepo.GetGPU(repo.WithByCreatedAt(req.StartTime, req.EndTime), monitorRepo.WithByProductName(req.ProductName))
|
||||||
|
if err != nil {
|
||||||
|
return data, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, gpu := range gpuList {
|
||||||
|
data.Date = append(data.Date, gpu.CreatedAt)
|
||||||
|
data.GPUValue = append(data.GPUValue, gpu.GPUUtil)
|
||||||
|
data.TemperatureValue = append(data.TemperatureValue, gpu.Temperature)
|
||||||
|
data.PowerValue = append(data.PowerValue, dto.GPUPowerUsageHelper{
|
||||||
|
Total: gpu.MaxPowerLimit,
|
||||||
|
Used: gpu.PowerDraw,
|
||||||
|
Percent: gpu.PowerDraw / gpu.MaxPowerLimit * 100,
|
||||||
|
})
|
||||||
|
memItem := dto.GPUMemoryUsageHelper{
|
||||||
|
Total: gpu.MemTotal,
|
||||||
|
Used: gpu.MemUsed,
|
||||||
|
Percent: float64(gpu.MemUsed) / float64(gpu.MemTotal) * 100,
|
||||||
|
}
|
||||||
|
var process []dto.GPUProcess
|
||||||
|
if err := json.Unmarshal([]byte(gpu.Processes), &process); err == nil {
|
||||||
|
memItem.GPUProcesses = process
|
||||||
|
}
|
||||||
|
data.MemoryValue = append(data.MemoryValue, memItem)
|
||||||
|
data.SpeedValue = append(data.SpeedValue, gpu.FanSpeed)
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m *MonitorService) LoadSetting() (*dto.MonitorSetting, error) {
|
func (m *MonitorService) LoadSetting() (*dto.MonitorSetting, error) {
|
||||||
setting, err := settingRepo.GetList()
|
setting, err := settingRepo.GetList()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -174,10 +239,13 @@ func (m *MonitorService) CleanData() error {
|
||||||
if err := global.MonitorDB.Exec("DELETE FROM monitor_networks").Error; err != nil {
|
if err := global.MonitorDB.Exec("DELETE FROM monitor_networks").Error; err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
_ = global.GPUMonitorDB.Exec("DELETE FROM monitor_gpus").Error
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MonitorService) Run() {
|
func (m *MonitorService) Run() {
|
||||||
|
saveGPUDataToDB()
|
||||||
|
saveXPUDataToDB()
|
||||||
var itemModel model.MonitorBase
|
var itemModel model.MonitorBase
|
||||||
totalPercent, _ := cpu.Percent(3*time.Second, false)
|
totalPercent, _ := cpu.Percent(3*time.Second, false)
|
||||||
if len(totalPercent) == 1 {
|
if len(totalPercent) == 1 {
|
||||||
|
|
@ -207,7 +275,7 @@ func (m *MonitorService) Run() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := settingRepo.CreateMonitorBase(itemModel); err != nil {
|
if err := monitorRepo.CreateMonitorBase(itemModel); err != nil {
|
||||||
global.LOG.Errorf("Insert basic monitoring data failed, err: %v", err)
|
global.LOG.Errorf("Insert basic monitoring data failed, err: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -220,9 +288,9 @@ func (m *MonitorService) Run() {
|
||||||
}
|
}
|
||||||
storeDays, _ := strconv.Atoi(MonitorStoreDays.Value)
|
storeDays, _ := strconv.Atoi(MonitorStoreDays.Value)
|
||||||
timeForDelete := time.Now().AddDate(0, 0, -storeDays)
|
timeForDelete := time.Now().AddDate(0, 0, -storeDays)
|
||||||
_ = settingRepo.DelMonitorBase(timeForDelete)
|
_ = monitorRepo.DelMonitorBase(timeForDelete)
|
||||||
_ = settingRepo.DelMonitorIO(timeForDelete)
|
_ = monitorRepo.DelMonitorIO(timeForDelete)
|
||||||
_ = settingRepo.DelMonitorNet(timeForDelete)
|
_ = monitorRepo.DelMonitorNet(timeForDelete)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MonitorService) loadDiskIO() {
|
func (m *MonitorService) loadDiskIO() {
|
||||||
|
|
@ -302,7 +370,7 @@ func (m *MonitorService) saveIODataToDB(ctx context.Context, interval float64) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := settingRepo.BatchCreateMonitorIO(ioList); err != nil {
|
if err := monitorRepo.BatchCreateMonitorIO(ioList); err != nil {
|
||||||
global.LOG.Errorf("Insert io monitoring data failed, err: %v", err)
|
global.LOG.Errorf("Insert io monitoring data failed, err: %v", err)
|
||||||
}
|
}
|
||||||
m.DiskIO <- ioStat2
|
m.DiskIO <- ioStat2
|
||||||
|
|
@ -341,7 +409,7 @@ func (m *MonitorService) saveNetDataToDB(ctx context.Context, interval float64)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := settingRepo.BatchCreateMonitorNet(netList); err != nil {
|
if err := monitorRepo.BatchCreateMonitorNet(netList); err != nil {
|
||||||
global.LOG.Errorf("Insert network monitoring data failed, err: %v", err)
|
global.LOG.Errorf("Insert network monitoring data failed, err: %v", err)
|
||||||
}
|
}
|
||||||
m.NetIO <- netStat2
|
m.NetIO <- netStat2
|
||||||
|
|
@ -482,3 +550,90 @@ func StartMonitor(removeBefore bool, interval string) error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func saveGPUDataToDB() {
|
||||||
|
exist, client := gpu.New()
|
||||||
|
if !exist {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
gpuInfo, err := client.LoadGpuInfo()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var list []model.MonitorGPU
|
||||||
|
for _, gpuItem := range gpuInfo.GPUs {
|
||||||
|
item := model.MonitorGPU{
|
||||||
|
ProductName: gpuItem.ProductName,
|
||||||
|
GPUUtil: loadGPUInfoFloat(gpuItem.GPUUtil),
|
||||||
|
Temperature: loadGPUInfoInt(gpuItem.Temperature),
|
||||||
|
PowerDraw: loadGPUInfoFloat(gpuItem.PowerDraw),
|
||||||
|
MaxPowerLimit: loadGPUInfoFloat(gpuItem.MaxPowerLimit),
|
||||||
|
MemUsed: loadGPUInfoInt(gpuItem.MemUsed),
|
||||||
|
MemTotal: loadGPUInfoInt(gpuItem.MemTotal),
|
||||||
|
FanSpeed: loadGPUInfoInt(gpuItem.FanSpeed),
|
||||||
|
}
|
||||||
|
process, _ := json.Marshal(gpuItem.Processes)
|
||||||
|
if len(process) != 0 {
|
||||||
|
item.Processes = string(process)
|
||||||
|
}
|
||||||
|
list = append(list, item)
|
||||||
|
}
|
||||||
|
if err := repo.NewIMonitorRepo().BatchCreateMonitorGPU(list); err != nil {
|
||||||
|
global.LOG.Errorf("batch create gpu monitor data failed, err: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func saveXPUDataToDB() {
|
||||||
|
exist, client := xpu.New()
|
||||||
|
if !exist {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
xpuInfo, err := client.LoadGpuInfo()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var list []model.MonitorGPU
|
||||||
|
for _, xpuItem := range xpuInfo.Xpu {
|
||||||
|
item := model.MonitorGPU{
|
||||||
|
ProductName: xpuItem.Basic.DeviceName,
|
||||||
|
GPUUtil: loadGPUInfoFloat(xpuItem.Stats.MemoryUtil),
|
||||||
|
Temperature: loadGPUInfoInt(xpuItem.Stats.Temperature),
|
||||||
|
PowerDraw: loadGPUInfoFloat(xpuItem.Stats.Power),
|
||||||
|
MemUsed: loadGPUInfoInt(xpuItem.Stats.MemoryUsed),
|
||||||
|
MemTotal: loadGPUInfoInt(xpuItem.Basic.Memory),
|
||||||
|
}
|
||||||
|
var processItem []dto.GPUProcess
|
||||||
|
for _, ps := range xpuItem.Processes {
|
||||||
|
processItem = append(processItem, dto.GPUProcess{
|
||||||
|
Pid: fmt.Sprintf("%v", ps.PID),
|
||||||
|
Type: ps.SHR,
|
||||||
|
ProcessName: ps.Command,
|
||||||
|
UsedMemory: ps.Memory,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
process, _ := json.Marshal(processItem)
|
||||||
|
if len(process) != 0 {
|
||||||
|
item.Processes = string(process)
|
||||||
|
}
|
||||||
|
list = append(list, item)
|
||||||
|
}
|
||||||
|
if err := repo.NewIMonitorRepo().BatchCreateMonitorGPU(list); err != nil {
|
||||||
|
global.LOG.Errorf("batch create gpu monitor data failed, err: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func loadGPUInfoInt(val string) int {
|
||||||
|
valItem := strings.ReplaceAll(val, "MiB", "")
|
||||||
|
valItem = strings.ReplaceAll(valItem, "C", "")
|
||||||
|
valItem = strings.ReplaceAll(valItem, "%", "")
|
||||||
|
valItem = strings.TrimSpace(valItem)
|
||||||
|
data, _ := strconv.Atoi(valItem)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
func loadGPUInfoFloat(val string) float64 {
|
||||||
|
valItem := strings.ReplaceAll(val, "W", "")
|
||||||
|
valItem = strings.ReplaceAll(valItem, "%", "")
|
||||||
|
valItem = strings.TrimSpace(valItem)
|
||||||
|
data, _ := strconv.ParseFloat(valItem, 64)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,11 +13,12 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
DB *gorm.DB
|
DB *gorm.DB
|
||||||
MonitorDB *gorm.DB
|
MonitorDB *gorm.DB
|
||||||
TaskDB *gorm.DB
|
GPUMonitorDB *gorm.DB
|
||||||
CoreDB *gorm.DB
|
TaskDB *gorm.DB
|
||||||
AlertDB *gorm.DB
|
CoreDB *gorm.DB
|
||||||
|
AlertDB *gorm.DB
|
||||||
|
|
||||||
LOG *logrus.Logger
|
LOG *logrus.Logger
|
||||||
CONF ServerConfig
|
CONF ServerConfig
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ func Init() {
|
||||||
global.DB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "agent.db"), "agent")
|
global.DB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "agent.db"), "agent")
|
||||||
global.TaskDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "task.db"), "task")
|
global.TaskDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "task.db"), "task")
|
||||||
global.MonitorDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "monitor.db"), "monitor")
|
global.MonitorDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "monitor.db"), "monitor")
|
||||||
|
global.GPUMonitorDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "gpu_monitor.db"), "gpu_monitor")
|
||||||
global.AlertDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "alert.db"), "alert")
|
global.AlertDB = common.LoadDBConnByPath(path.Join(global.Dir.DbDir, "alert.db"), "alert")
|
||||||
|
|
||||||
if global.IsMaster {
|
if global.IsMaster {
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,7 @@ func InitAgentDB() {
|
||||||
migrations.AddIptablesFilterRuleTable,
|
migrations.AddIptablesFilterRuleTable,
|
||||||
migrations.AddCommonDescription,
|
migrations.AddCommonDescription,
|
||||||
migrations.UpdateDatabase,
|
migrations.UpdateDatabase,
|
||||||
|
migrations.AddGPUMonitor,
|
||||||
})
|
})
|
||||||
if err := m.Migrate(); err != nil {
|
if err := m.Migrate(); err != nil {
|
||||||
global.LOG.Error(err)
|
global.LOG.Error(err)
|
||||||
|
|
|
||||||
|
|
@ -719,3 +719,10 @@ var UpdateDatabase = &gormigrate.Migration{
|
||||||
return tx.AutoMigrate(&model.Database{})
|
return tx.AutoMigrate(&model.Database{})
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var AddGPUMonitor = &gormigrate.Migration{
|
||||||
|
ID: "20251119-add-gpu-monitor",
|
||||||
|
Migrate: func(tx *gorm.DB) error {
|
||||||
|
return global.GPUMonitorDB.AutoMigrate(&model.MonitorGPU{})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ func (s *HostRouter) InitRouter(Router *gin.RouterGroup) {
|
||||||
hostRouter.POST("/firewall/filter/chain/status", baseApi.LoadChainStatus)
|
hostRouter.POST("/firewall/filter/chain/status", baseApi.LoadChainStatus)
|
||||||
|
|
||||||
hostRouter.POST("/monitor/search", baseApi.LoadMonitor)
|
hostRouter.POST("/monitor/search", baseApi.LoadMonitor)
|
||||||
|
hostRouter.POST("/monitor/gpu/search", baseApi.LoadGPUMonitor)
|
||||||
hostRouter.POST("/monitor/clean", baseApi.CleanMonitor)
|
hostRouter.POST("/monitor/clean", baseApi.CleanMonitor)
|
||||||
hostRouter.GET("/monitor/netoptions", baseApi.GetNetworkOptions)
|
hostRouter.GET("/monitor/netoptions", baseApi.GetNetworkOptions)
|
||||||
hostRouter.GET("/monitor/iooptions", baseApi.GetIOOptions)
|
hostRouter.GET("/monitor/iooptions", baseApi.GetIOOptions)
|
||||||
|
|
|
||||||
|
|
@ -161,6 +161,38 @@ export namespace Host {
|
||||||
endTime: Date;
|
endTime: Date;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface MonitorGPUSearch {
|
||||||
|
productName: string;
|
||||||
|
startTime: Date;
|
||||||
|
endTime: Date;
|
||||||
|
}
|
||||||
|
export interface MonitorGPUData {
|
||||||
|
productNames: Array<string>;
|
||||||
|
date: Array<Date>;
|
||||||
|
gpuValue: Array<number>;
|
||||||
|
temperatureValue: Array<number>;
|
||||||
|
powerValue: Array<GPUPowerUsageHelper>;
|
||||||
|
memoryValue: Array<GPUMemoryUsageHelper>;
|
||||||
|
speedValue: Array<number>;
|
||||||
|
}
|
||||||
|
export interface GPUPowerUsageHelper {
|
||||||
|
total: number;
|
||||||
|
used: number;
|
||||||
|
percent: number;
|
||||||
|
}
|
||||||
|
export interface GPUMemoryUsageHelper {
|
||||||
|
total: number;
|
||||||
|
used: number;
|
||||||
|
percent: number;
|
||||||
|
gpuProcesses: Array<GPUProcess>;
|
||||||
|
}
|
||||||
|
export interface GPUProcess {
|
||||||
|
pid: string;
|
||||||
|
type: string;
|
||||||
|
processName: string;
|
||||||
|
usedMemory: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface SSHInfo {
|
export interface SSHInfo {
|
||||||
autoStart: boolean;
|
autoStart: boolean;
|
||||||
isActive: boolean;
|
isActive: boolean;
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,9 @@ export const operateFilterChain = (name: string, op: string) => {
|
||||||
export const loadMonitor = (param: Host.MonitorSearch) => {
|
export const loadMonitor = (param: Host.MonitorSearch) => {
|
||||||
return http.post<Array<Host.MonitorData>>(`/hosts/monitor/search`, param);
|
return http.post<Array<Host.MonitorData>>(`/hosts/monitor/search`, param);
|
||||||
};
|
};
|
||||||
|
export const loadGPUMonitor = (param: Host.MonitorGPUSearch) => {
|
||||||
|
return http.post<Host.MonitorGPUData>(`/hosts/monitor/gpu/search`, param);
|
||||||
|
};
|
||||||
export const getNetworkOptions = () => {
|
export const getNetworkOptions = () => {
|
||||||
return http.get<Array<string>>(`/hosts/monitor/netoptions`);
|
return http.get<Array<string>>(`/hosts/monitor/netoptions`);
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
<template>
|
<template>
|
||||||
<div>
|
<div v-loading="loading">
|
||||||
<RouterButton
|
<RouterButton
|
||||||
:buttons="[
|
:buttons="[
|
||||||
{
|
{
|
||||||
|
|
@ -9,242 +9,129 @@
|
||||||
]"
|
]"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<div v-if="gpuType == 'nvidia'">
|
<div class="content-container__search" v-if="options.length !== 0">
|
||||||
<LayoutContent
|
<el-card>
|
||||||
v-loading="loading"
|
<div>
|
||||||
:title="$t('aiTools.gpu.gpu')"
|
<el-date-picker
|
||||||
:divider="true"
|
@change="search()"
|
||||||
v-if="gpuInfo.driverVersion.length !== 0 && !loading"
|
v-model="timeRangeGlobal"
|
||||||
>
|
type="datetimerange"
|
||||||
<template #toolbar>
|
range-separator="-"
|
||||||
<el-row>
|
:start-placeholder="$t('commons.search.timeStart')"
|
||||||
<el-col :xs="24" :sm="16" :md="16" :lg="16" :xl="16" />
|
:end-placeholder="$t('commons.search.timeEnd')"
|
||||||
<el-col :xs="24" :sm="8" :md="8" :lg="8" :xl="8">
|
:shortcuts="shortcuts"
|
||||||
<TableSetting title="gpu-refresh" @search="refresh()" />
|
style="max-width: 360px; width: 100%"
|
||||||
</el-col>
|
:size="mobile ? 'small' : 'default'"
|
||||||
</el-row>
|
></el-date-picker>
|
||||||
</template>
|
<el-select class="p-w-300 ml-2" v-model="searchInfo.productName" @change="search()">
|
||||||
<template #main>
|
<el-option v-for="item in options" :key="item" :label="item" :value="item" />
|
||||||
<el-descriptions direction="vertical" :column="14" border>
|
</el-select>
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.driverVersion')" width="50%" :span="7">
|
<TableRefresh class="float-right" @search="search()" />
|
||||||
{{ gpuInfo.driverVersion }}
|
</div>
|
||||||
</el-descriptions-item>
|
</el-card>
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.cudaVersion')" :span="7">
|
</div>
|
||||||
{{ gpuInfo.cudaVersion }}
|
<el-row :gutter="7" class="card-interval" v-if="options.length !== 0">
|
||||||
</el-descriptions-item>
|
<el-col :span="24">
|
||||||
</el-descriptions>
|
<el-card style="overflow: inherit">
|
||||||
<el-collapse v-model="activeNames" class="card-interval">
|
<template #header>
|
||||||
<el-collapse-item v-for="item in gpuInfo.gpu" :key="item.index" :name="item.index">
|
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||||
<template #title>
|
<span class="title">{{ $t('monitor.gpuUtil') }}</span>
|
||||||
<span class="name-class">{{ item.index + '. ' + item.productName }}</span>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
<span class="title-class">{{ $t('aiTools.gpu.base') }}</span>
|
<div class="chart">
|
||||||
<el-descriptions direction="vertical" :column="6" border size="small" class="mt-2">
|
<v-charts
|
||||||
<el-descriptions-item :label="$t('monitor.gpuUtil')">
|
height="400px"
|
||||||
{{ item.gpuUtil }}
|
id="loadGPUChart"
|
||||||
</el-descriptions-item>
|
type="line"
|
||||||
<el-descriptions-item>
|
:option="chartsOption['loadGPUChart']"
|
||||||
<template #label>
|
v-if="chartsOption['loadGPUChart']"
|
||||||
<div class="cell-item">
|
:dataZoom="true"
|
||||||
{{ $t('monitor.temperature') }}
|
/>
|
||||||
<el-tooltip placement="top" :content="$t('aiTools.gpu.temperatureHelper')">
|
</div>
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
</el-card>
|
||||||
</el-tooltip>
|
</el-col>
|
||||||
</div>
|
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||||
</template>
|
<el-card style="overflow: inherit">
|
||||||
{{ item.temperature.replaceAll('C', '°C') }}
|
<template #header>
|
||||||
</el-descriptions-item>
|
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||||
<el-descriptions-item>
|
<span class="title">{{ $t('monitor.memoryUsage') }}</span>
|
||||||
<template #label>
|
</div>
|
||||||
<div class="cell-item">
|
</template>
|
||||||
{{ $t('monitor.performanceState') }}
|
<div class="chart">
|
||||||
<el-tooltip
|
<v-charts
|
||||||
placement="top"
|
height="400px"
|
||||||
:content="$t('aiTools.gpu.performanceStateHelper')"
|
id="loadMemoryChart"
|
||||||
>
|
type="line"
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
:option="chartsOption['loadMemoryChart']"
|
||||||
</el-tooltip>
|
v-if="chartsOption['loadMemoryChart']"
|
||||||
</div>
|
:dataZoom="true"
|
||||||
</template>
|
/>
|
||||||
{{ item.performanceState }}
|
</div>
|
||||||
</el-descriptions-item>
|
</el-card>
|
||||||
<el-descriptions-item :label="$t('monitor.powerUsage')">
|
</el-col>
|
||||||
{{ item.powerDraw }} / {{ item.maxPowerLimit }}
|
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||||
</el-descriptions-item>
|
<el-card style="overflow: inherit">
|
||||||
<el-descriptions-item :label="$t('monitor.memoryUsage')">
|
<template #header>
|
||||||
{{ item.memUsed }} / {{ item.memTotal }}
|
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||||
</el-descriptions-item>
|
<span class="title">{{ $t('monitor.powerUsage') }}</span>
|
||||||
<el-descriptions-item :label="$t('monitor.fanSpeed')">
|
</div>
|
||||||
{{ item.fanSpeed }}
|
</template>
|
||||||
</el-descriptions-item>
|
<div class="chart">
|
||||||
|
<v-charts
|
||||||
|
height="400px"
|
||||||
|
id="loadPowerChart"
|
||||||
|
type="line"
|
||||||
|
:option="chartsOption['loadPowerChart']"
|
||||||
|
v-if="chartsOption['loadPowerChart']"
|
||||||
|
:dataZoom="true"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</el-card>
|
||||||
|
</el-col>
|
||||||
|
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||||
|
<el-card style="overflow: inherit">
|
||||||
|
<template #header>
|
||||||
|
<div>
|
||||||
|
{{ $t('monitor.temperature') }}
|
||||||
|
<el-tooltip placement="top" :content="$t('aiTools.gpu.temperatureHelper')">
|
||||||
|
<el-icon size="15"><InfoFilled /></el-icon>
|
||||||
|
</el-tooltip>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
<div class="chart">
|
||||||
|
<v-charts
|
||||||
|
height="400px"
|
||||||
|
id="loadTemperatureChart"
|
||||||
|
type="line"
|
||||||
|
:option="chartsOption['loadTemperatureChart']"
|
||||||
|
v-if="chartsOption['loadTemperatureChart']"
|
||||||
|
:dataZoom="true"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</el-card>
|
||||||
|
</el-col>
|
||||||
|
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||||
|
<el-card style="overflow: inherit">
|
||||||
|
<template #header>
|
||||||
|
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||||
|
<span class="title">{{ $t('monitor.fanSpeed') }}</span>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
<div class="chart">
|
||||||
|
<v-charts
|
||||||
|
height="400px"
|
||||||
|
id="loadSpeedChart"
|
||||||
|
type="line"
|
||||||
|
:option="chartsOption['loadSpeedChart']"
|
||||||
|
v-if="chartsOption['loadSpeedChart']"
|
||||||
|
:dataZoom="true"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</el-card>
|
||||||
|
</el-col>
|
||||||
|
</el-row>
|
||||||
|
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.busID')">
|
<LayoutContent :title="$t('aiTools.gpu.gpu')" :divider="true" v-else>
|
||||||
{{ item.busID }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item>
|
|
||||||
<template #label>
|
|
||||||
<div class="cell-item">
|
|
||||||
{{ $t('aiTools.gpu.persistenceMode') }}
|
|
||||||
<el-tooltip
|
|
||||||
placement="top"
|
|
||||||
:content="$t('aiTools.gpu.persistenceModeHelper')"
|
|
||||||
>
|
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
|
||||||
</el-tooltip>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
{{ $t('aiTools.gpu.' + item.persistenceMode.toLowerCase()) }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.displayActive')">
|
|
||||||
{{
|
|
||||||
lowerCase(item.displayActive) === 'disabled'
|
|
||||||
? $t('aiTools.gpu.displayActiveF')
|
|
||||||
: $t('aiTools.gpu.displayActiveT')
|
|
||||||
}}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item>
|
|
||||||
<template #label>
|
|
||||||
<div class="cell-item">
|
|
||||||
Uncorr. ECC
|
|
||||||
<el-tooltip placement="top" :content="$t('aiTools.gpu.ecc')">
|
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
|
||||||
</el-tooltip>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
{{ loadEcc(item.ecc) }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.computeMode')">
|
|
||||||
<template #label>
|
|
||||||
<div class="cell-item">
|
|
||||||
{{ $t('aiTools.gpu.computeMode') }}
|
|
||||||
<el-tooltip placement="top">
|
|
||||||
<template #content>
|
|
||||||
{{ $t('aiTools.gpu.defaultHelper') }}
|
|
||||||
<br />
|
|
||||||
{{ $t('aiTools.gpu.exclusiveProcessHelper') }}
|
|
||||||
<br />
|
|
||||||
{{ $t('aiTools.gpu.exclusiveThreadHelper') }}
|
|
||||||
<br />
|
|
||||||
{{ $t('aiTools.gpu.prohibitedHelper') }}
|
|
||||||
</template>
|
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
|
||||||
</el-tooltip>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
{{ loadComputeMode(item.computeMode) }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item label="MIG.M">
|
|
||||||
<template #label>
|
|
||||||
<div class="cell-item">
|
|
||||||
MIG M.
|
|
||||||
<el-tooltip placement="top">
|
|
||||||
<template #content>
|
|
||||||
{{ $t('aiTools.gpu.migModeHelper') }}
|
|
||||||
</template>
|
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
|
||||||
</el-tooltip>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
{{
|
|
||||||
item.migMode === 'N/A'
|
|
||||||
? $t('aiTools.gpu.migModeNA')
|
|
||||||
: $t('aiTools.gpu.' + lowerCase(item.migMode))
|
|
||||||
}}
|
|
||||||
</el-descriptions-item>
|
|
||||||
</el-descriptions>
|
|
||||||
<div class="card-interval">
|
|
||||||
<span class="title-class">{{ $t('aiTools.gpu.process') }}</span>
|
|
||||||
</div>
|
|
||||||
<el-table :data="item.processes" v-if="item.processes?.length !== 0">
|
|
||||||
<el-table-column label="PID" prop="pid" />
|
|
||||||
<el-table-column :label="$t('aiTools.gpu.type')" prop="type">
|
|
||||||
<template #default="{ row }">
|
|
||||||
{{ loadProcessType(row.type) }}
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column :label="$t('aiTools.gpu.processName')" prop="processName" />
|
|
||||||
<el-table-column :label="$t('aiTools.gpu.processMemoryUsage')" prop="usedMemory" />
|
|
||||||
</el-table>
|
|
||||||
</el-collapse-item>
|
|
||||||
</el-collapse>
|
|
||||||
</template>
|
|
||||||
</LayoutContent>
|
|
||||||
</div>
|
|
||||||
<div v-else>
|
|
||||||
<LayoutContent
|
|
||||||
v-loading="loading"
|
|
||||||
:title="$t('aiTools.gpu.gpu')"
|
|
||||||
:divider="true"
|
|
||||||
v-if="xpuInfo.driverVersion.length !== 0 && !loading"
|
|
||||||
>
|
|
||||||
<template #toolbar>
|
|
||||||
<el-row>
|
|
||||||
<el-col :xs="24" :sm="16" :md="16" :lg="16" :xl="16" />
|
|
||||||
<el-col :xs="24" :sm="8" :md="8" :lg="8" :xl="8">
|
|
||||||
<TableSetting title="xpu-refresh" @search="refresh()" />
|
|
||||||
</el-col>
|
|
||||||
</el-row>
|
|
||||||
</template>
|
|
||||||
<template #main>
|
|
||||||
<el-descriptions direction="vertical" :column="14" border>
|
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.driverVersion')" width="50%" :span="7">
|
|
||||||
{{ xpuInfo.driverVersion }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
</el-descriptions>
|
|
||||||
<el-collapse v-model="activeNames" class="card-interval">
|
|
||||||
<el-collapse-item
|
|
||||||
v-for="item in xpuInfo.xpu"
|
|
||||||
:key="item.basic.deviceID"
|
|
||||||
:name="item.basic.deviceID"
|
|
||||||
>
|
|
||||||
<template #title>
|
|
||||||
<span class="name-class">{{ item.basic.deviceID + '. ' + item.basic.deviceName }}</span>
|
|
||||||
</template>
|
|
||||||
<span class="title-class">{{ $t('aiTools.gpu.base') }}</span>
|
|
||||||
<el-descriptions direction="vertical" :column="6" border size="small" class="mt-2">
|
|
||||||
<el-descriptions-item :label="$t('monitor.gpuUtil')">
|
|
||||||
{{ item.stats.memoryUtil }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item>
|
|
||||||
<template #label>
|
|
||||||
<div class="cell-item">
|
|
||||||
{{ $t('monitor.temperature') }}
|
|
||||||
<el-tooltip placement="top" :content="$t('aiTools.gpu.temperatureHelper')">
|
|
||||||
<el-icon class="icon-item"><InfoFilled /></el-icon>
|
|
||||||
</el-tooltip>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
{{ item.stats.temperature }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item :label="$t('monitor.powerUsage')">
|
|
||||||
{{ item.stats.power }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item :label="$t('monitor.memoryUsage')">
|
|
||||||
{{ item.stats.memoryUsed }} / {{ item.basic.memory }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
<el-descriptions-item :label="$t('aiTools.gpu.busID')">
|
|
||||||
{{ item.basic.pciBdfAddress }}
|
|
||||||
</el-descriptions-item>
|
|
||||||
</el-descriptions>
|
|
||||||
<div class="card-interval">
|
|
||||||
<span class="title-class">{{ $t('aiTools.gpu.process') }}</span>
|
|
||||||
</div>
|
|
||||||
<el-table :data="item.processes" v-if="item.processes?.length !== 0">
|
|
||||||
<el-table-column label="PID" prop="pid" />
|
|
||||||
<el-table-column :label="$t('aiTools.gpu.processName')" prop="command" />
|
|
||||||
<el-table-column :label="$t('aiTools.gpu.shr')" prop="shr" />
|
|
||||||
<el-table-column :label="$t('aiTools.gpu.processMemoryUsage')" prop="memory" />
|
|
||||||
</el-table>
|
|
||||||
</el-collapse-item>
|
|
||||||
</el-collapse>
|
|
||||||
</template>
|
|
||||||
</LayoutContent>
|
|
||||||
</div>
|
|
||||||
<LayoutContent
|
|
||||||
:title="$t('aiTools.gpu.gpu')"
|
|
||||||
:divider="true"
|
|
||||||
v-if="gpuInfo.driverVersion.length === 0 && xpuInfo.driverVersion.length == 0 && !loading"
|
|
||||||
>
|
|
||||||
<template #main>
|
<template #main>
|
||||||
<div class="app-warn">
|
<div class="app-warn">
|
||||||
<div class="flx-center">
|
<div class="flx-center">
|
||||||
|
|
@ -259,79 +146,237 @@
|
||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script lang="ts" setup>
|
<script setup lang="ts">
|
||||||
import { onMounted, ref } from 'vue';
|
import { ref, reactive, onMounted, computed } from 'vue';
|
||||||
import { loadGPUInfo } from '@/api/modules/ai';
|
import { loadGPUMonitor } from '@/api/modules/host';
|
||||||
import { AI } from '@/api/interface/ai';
|
import { dateFormatWithoutYear } from '@/utils/util';
|
||||||
|
import { GlobalStore } from '@/store';
|
||||||
|
import { shortcuts } from '@/utils/shortcuts';
|
||||||
|
import { Host } from '@/api/interface/host';
|
||||||
import i18n from '@/lang';
|
import i18n from '@/lang';
|
||||||
|
|
||||||
const loading = ref();
|
const globalStore = GlobalStore();
|
||||||
const activeNames = ref(0);
|
|
||||||
const gpuInfo = ref<AI.Info>({
|
const mobile = computed(() => {
|
||||||
cudaVersion: '',
|
return globalStore.isMobile();
|
||||||
driverVersion: '',
|
|
||||||
type: 'nvidia',
|
|
||||||
gpu: [],
|
|
||||||
});
|
});
|
||||||
const xpuInfo = ref<AI.XpuInfo>({
|
|
||||||
driverVersion: '',
|
const loading = ref(false);
|
||||||
type: 'xpu',
|
const options = ref([]);
|
||||||
xpu: [],
|
const timeRangeGlobal = ref<[Date, Date]>([new Date(new Date().setHours(0, 0, 0, 0)), new Date()]);
|
||||||
|
const chartsOption = ref({
|
||||||
|
loadPowerChart: null,
|
||||||
|
loadGPUChart: null,
|
||||||
|
loadMemoryChart: null,
|
||||||
|
loadTemperatureChart: null,
|
||||||
|
loadSpeedChart: null,
|
||||||
|
});
|
||||||
|
|
||||||
|
const searchTime = ref();
|
||||||
|
const searchInfo = reactive<Host.MonitorGPUSearch>({
|
||||||
|
productName: '',
|
||||||
|
startTime: new Date(new Date().setHours(0, 0, 0, 0)),
|
||||||
|
endTime: new Date(),
|
||||||
});
|
});
|
||||||
const gpuType = ref('nvidia');
|
|
||||||
|
|
||||||
const search = async () => {
|
const search = async () => {
|
||||||
|
if (searchTime.value && searchTime.value.length === 2) {
|
||||||
|
searchInfo.startTime = searchTime.value[0];
|
||||||
|
searchInfo.endTime = searchTime.value[1];
|
||||||
|
}
|
||||||
loading.value = true;
|
loading.value = true;
|
||||||
await loadGPUInfo()
|
await loadGPUMonitor(searchInfo)
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
loading.value = false;
|
loading.value = false;
|
||||||
gpuType.value = res.data.type;
|
options.value = res.data.productNames || [];
|
||||||
if (res.data.type == 'nvidia') {
|
searchInfo.productName = searchInfo.productName || (options.value.length > 0 ? options.value[0] : '');
|
||||||
gpuInfo.value = res.data;
|
let baseDate = res.data.date.length === 0 ? loadEmptyDate(timeRangeGlobal.value) : res.data.date;
|
||||||
} else {
|
let date = baseDate.map(function (item: any) {
|
||||||
xpuInfo.value = res.data;
|
return dateFormatWithoutYear(item);
|
||||||
}
|
});
|
||||||
|
initCPUCharts(date, res.data.gpuValue);
|
||||||
|
initMemoryCharts(date, res.data.memoryValue);
|
||||||
|
initPowerCharts(date, res.data.powerValue);
|
||||||
|
initSpeedCharts(date, res.data.speedValue);
|
||||||
|
initTemperatureCharts(date, res.data.temperatureValue);
|
||||||
})
|
})
|
||||||
.catch(() => {
|
.catch(() => {
|
||||||
loading.value = false;
|
loading.value = false;
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
const refresh = async () => {
|
function initCPUCharts(baseDate: any, items: any) {
|
||||||
const res = await loadGPUInfo();
|
let percents = items.map(function (item: any) {
|
||||||
gpuInfo.value = res.data;
|
return Number(item.toFixed(2));
|
||||||
};
|
});
|
||||||
|
let data = percents.length === 0 ? loadEmptyData() : percents;
|
||||||
|
chartsOption.value['loadGPUChart'] = {
|
||||||
|
xData: baseDate,
|
||||||
|
yData: [
|
||||||
|
{
|
||||||
|
name: i18n.global.t('monitor.gpuUtil'),
|
||||||
|
data: data,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
formatStr: '%',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
function initMemoryCharts(baseDate: any, items: any) {
|
||||||
|
let lists = items.map(function (item: any) {
|
||||||
|
return { value: Number(item.percent.toFixed(2)), data: item };
|
||||||
|
});
|
||||||
|
lists = lists.length === 0 ? loadEmptyData2() : lists;
|
||||||
|
chartsOption.value['loadMemoryChart'] = {
|
||||||
|
xData: baseDate,
|
||||||
|
yData: [
|
||||||
|
{
|
||||||
|
name: i18n.global.t('monitor.memoryUsage'),
|
||||||
|
data: lists,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tooltip: {
|
||||||
|
trigger: 'axis',
|
||||||
|
formatter: function (list: any) {
|
||||||
|
return withMemoryProcess(list);
|
||||||
|
},
|
||||||
|
},
|
||||||
|
formatStr: '%',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
function initPowerCharts(baseDate: any, items: any) {
|
||||||
|
let list = items.map(function (item: any) {
|
||||||
|
return { value: Number(item.percent.toFixed(2)), data: item };
|
||||||
|
});
|
||||||
|
list = list.length === 0 ? loadEmptyData2() : list;
|
||||||
|
chartsOption.value['loadPowerChart'] = {
|
||||||
|
xData: baseDate,
|
||||||
|
yData: [
|
||||||
|
{
|
||||||
|
name: i18n.global.t('monitor.powerUsage'),
|
||||||
|
data: list,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tooltip: {
|
||||||
|
trigger: 'axis',
|
||||||
|
formatter: function (list: any) {
|
||||||
|
let res = loadDate(list[0].name);
|
||||||
|
for (const item of list) {
|
||||||
|
res += loadSeries(item, item.data.value ? item.data.value : item.data, '%');
|
||||||
|
res += `( ${item.data?.data.used} W / ${item.data?.data.total} W)<br/>`;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
formatStr: '%',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
function initTemperatureCharts(baseDate: any, items: any) {
|
||||||
|
let temperatures = items.map(function (item: any) {
|
||||||
|
return Number(item);
|
||||||
|
});
|
||||||
|
temperatures = temperatures.length === 0 ? loadEmptyData() : temperatures;
|
||||||
|
chartsOption.value['loadTemperatureChart'] = {
|
||||||
|
xData: baseDate,
|
||||||
|
yData: [
|
||||||
|
{
|
||||||
|
name: i18n.global.t('monitor.temperature'),
|
||||||
|
data: temperatures,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
formatStr: '°C',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
function initSpeedCharts(baseDate: any, items: any) {
|
||||||
|
let speeds = items.map(function (item: any) {
|
||||||
|
return Number(item);
|
||||||
|
});
|
||||||
|
speeds = speeds.length === 0 ? loadEmptyData() : speeds;
|
||||||
|
chartsOption.value['loadSpeedChart'] = {
|
||||||
|
xData: baseDate,
|
||||||
|
yData: [
|
||||||
|
{
|
||||||
|
name: i18n.global.t('monitor.fanSpeed'),
|
||||||
|
data: speeds,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
formatStr: '%',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const lowerCase = (val: string) => {
|
function loadEmptyDate(timeRange: any) {
|
||||||
return val.toLowerCase();
|
if (timeRange.length != 2) {
|
||||||
};
|
return;
|
||||||
|
}
|
||||||
|
let date1 = new Date(timeRange[0]);
|
||||||
|
let date2 = new Date(timeRange[1]);
|
||||||
|
return [date1, date2];
|
||||||
|
}
|
||||||
|
function loadEmptyData() {
|
||||||
|
return [0, 0];
|
||||||
|
}
|
||||||
|
function loadEmptyData2() {
|
||||||
|
return [
|
||||||
|
{ value: 0, data: {} },
|
||||||
|
{ value: 0, data: {} },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
const loadComputeMode = (val: string) => {
|
function withMemoryProcess(list: any) {
|
||||||
switch (val) {
|
let process;
|
||||||
case 'Default':
|
let res = loadDate(list[0].name);
|
||||||
return i18n.global.t('aiTools.gpu.default');
|
for (const item of list) {
|
||||||
case 'Exclusive Process':
|
if (item.data?.data) {
|
||||||
return i18n.global.t('aiTools.gpu.exclusiveProcess');
|
process = item.data?.data.gpuProcesses || [];
|
||||||
case 'Exclusive Thread':
|
}
|
||||||
return i18n.global.t('aiTools.gpu.exclusiveThread');
|
res += loadSeries(item, item.data.value ? item.data.value : item.data, '%');
|
||||||
case 'Prohibited':
|
res += `( ${item.data?.data.used} MiB / ${item.data?.data.total} MiB)<br/>`;
|
||||||
return i18n.global.t('aiTools.gpu.prohibited');
|
|
||||||
}
|
}
|
||||||
};
|
if (!process) {
|
||||||
|
return res;
|
||||||
const loadEcc = (val: string) => {
|
|
||||||
if (val === 'N/A') {
|
|
||||||
return i18n.global.t('aiTools.gpu.migModeNA');
|
|
||||||
}
|
}
|
||||||
if (val === 'Disabled') {
|
res += `
|
||||||
return i18n.global.t('aiTools.gpu.disabled');
|
<div style="margin-top: 10px; border-bottom: 1px dashed black;"></div>
|
||||||
|
<table style="border-collapse: collapse; margin-top: 20px; font-size: 12px;">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="padding: 6px 8px;">PID</th>
|
||||||
|
<th style="padding: 6px 8px;">${i18n.global.t('aiTools.gpu.type')}</th>
|
||||||
|
<th style="padding: 6px 8px;">${i18n.global.t('aiTools.gpu.processName')}</th>
|
||||||
|
<th style="padding: 6px 8px;">${i18n.global.t('aiTools.gpu.processMemoryUsage')}</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
`;
|
||||||
|
for (const row of process) {
|
||||||
|
res += `
|
||||||
|
<tr>
|
||||||
|
<td style="padding: 6px 8px; text-align: center;">
|
||||||
|
${row.pid}
|
||||||
|
</td>
|
||||||
|
<td style="padding: 6px 8px; text-align: center;">
|
||||||
|
${loadProcessType(row.type)}
|
||||||
|
</td>
|
||||||
|
<td style="padding: 6px 8px; text-align: center;">
|
||||||
|
${row.processName}
|
||||||
|
</td>
|
||||||
|
<td style="padding: 6px 8px; text-align: center;">
|
||||||
|
${row.usedMemory}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
`;
|
||||||
}
|
}
|
||||||
if (val === 'Enabled') {
|
return res;
|
||||||
return i18n.global.t('aiTools.gpu.enabled');
|
}
|
||||||
}
|
function loadDate(name: any) {
|
||||||
return val || 0;
|
return ` <div style="display: inline-block; width: 100%; padding-bottom: 10px;">
|
||||||
};
|
${i18n.global.t('commons.search.date')}: ${name.replaceAll('\n', ' ')}
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
function loadSeries(item: any, data: any, unit: any) {
|
||||||
|
return `<div style="width: 100%;">
|
||||||
|
${item.marker} ${item.seriesName}: ${data} ${unit}
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
const loadProcessType = (val: string) => {
|
const loadProcessType = (val: string) => {
|
||||||
if (val === 'C' || val === 'G') {
|
if (val === 'C' || val === 'G') {
|
||||||
return i18n.global.t('aiTools.gpu.type' + val);
|
return i18n.global.t('aiTools.gpu.type' + val);
|
||||||
|
|
@ -347,21 +392,19 @@ onMounted(() => {
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style lang="scss" scoped>
|
<style scoped lang="scss">
|
||||||
.name-class {
|
.content-container__search {
|
||||||
font-size: 18px;
|
margin-top: 7px;
|
||||||
font-weight: 500;
|
.el-card {
|
||||||
}
|
--el-card-padding: 12px;
|
||||||
.title-class {
|
|
||||||
font-size: 14px;
|
|
||||||
font-weight: 500;
|
|
||||||
}
|
|
||||||
.cell-item {
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
.icon-item {
|
|
||||||
margin-left: 4px;
|
|
||||||
margin-top: -1px;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
.title {
|
||||||
|
font-size: 16px;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
.chart {
|
||||||
|
width: 100%;
|
||||||
|
height: 400px;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue