mirror of
https://github.com/1Panel-dev/1Panel.git
synced 2025-12-19 22:09:03 +08:00
feat: Compatible with XPU monitoring (#11088)
This commit is contained in:
parent
18c65c3096
commit
8e03b24b0c
9 changed files with 108 additions and 52 deletions
|
|
@ -44,6 +44,7 @@ type MonitorGPUSearch struct {
|
|||
EndTime time.Time `json:"endTime"`
|
||||
}
|
||||
type MonitorGPUData struct {
|
||||
GPUType string `json:"gpuType"`
|
||||
ProductNames []string `json:"productNames"`
|
||||
Date []time.Time `json:"date"`
|
||||
GPUValue []float64 `json:"gpuValue"`
|
||||
|
|
@ -58,8 +59,8 @@ type GPUPowerUsageHelper struct {
|
|||
Percent float64 `json:"percent"`
|
||||
}
|
||||
type GPUMemoryUsageHelper struct {
|
||||
Total int `json:"total"`
|
||||
Used int `json:"used"`
|
||||
Total float64 `json:"total"`
|
||||
Used float64 `json:"used"`
|
||||
Percent float64 `json:"percent"`
|
||||
|
||||
GPUProcesses []GPUProcess `json:"gpuProcesses"`
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ type MonitorGPU struct {
|
|||
Temperature int `json:"temperature"`
|
||||
PowerDraw float64 `json:"powerDraw"`
|
||||
MaxPowerLimit float64 `json:"maxPowerLimit"`
|
||||
MemUsed int `json:"memUsed"`
|
||||
MemTotal int `json:"memTotal"`
|
||||
MemUsed float64 `json:"memUsed"`
|
||||
MemTotal float64 `json:"memTotal"`
|
||||
FanSpeed int `json:"fanSpeed"`
|
||||
Processes string `json:"processes"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,8 +131,10 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
|
|||
}
|
||||
if len(req.ProductName) == 0 {
|
||||
if gpuExist {
|
||||
data.GPUType = "gpu"
|
||||
gpuInfo, err := gpuclient.LoadGpuInfo()
|
||||
if err != nil || len(gpuInfo.GPUs) == 0 {
|
||||
global.LOG.Error("Load GPU info failed or no GPU found, err: ", err)
|
||||
return data, buserr.New("ErrRecordNotFound")
|
||||
}
|
||||
req.ProductName = gpuInfo.GPUs[0].ProductName
|
||||
|
|
@ -140,8 +142,10 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
|
|||
data.ProductNames = append(data.ProductNames, item.ProductName)
|
||||
}
|
||||
} else {
|
||||
data.GPUType = "xpu"
|
||||
xpuInfo, err := xpuClient.LoadGpuInfo()
|
||||
if err != nil || len(xpuInfo.Xpu) == 0 {
|
||||
global.LOG.Error("Load XPU info failed or no XPU found, err: ", err)
|
||||
return data, buserr.New("ErrRecordNotFound")
|
||||
}
|
||||
req.ProductName = xpuInfo.Xpu[0].Basic.DeviceName
|
||||
|
|
@ -159,15 +163,18 @@ func (m *MonitorService) LoadGPUMonitorData(req dto.MonitorGPUSearch) (dto.Monit
|
|||
data.Date = append(data.Date, gpu.CreatedAt)
|
||||
data.GPUValue = append(data.GPUValue, gpu.GPUUtil)
|
||||
data.TemperatureValue = append(data.TemperatureValue, gpu.Temperature)
|
||||
data.PowerValue = append(data.PowerValue, dto.GPUPowerUsageHelper{
|
||||
powerItem := dto.GPUPowerUsageHelper{
|
||||
Total: gpu.MaxPowerLimit,
|
||||
Used: gpu.PowerDraw,
|
||||
Percent: gpu.PowerDraw / gpu.MaxPowerLimit * 100,
|
||||
})
|
||||
}
|
||||
if powerItem.Total != 0 {
|
||||
powerItem.Percent = powerItem.Used / powerItem.Total
|
||||
}
|
||||
data.PowerValue = append(data.PowerValue, powerItem)
|
||||
memItem := dto.GPUMemoryUsageHelper{
|
||||
Total: gpu.MemTotal,
|
||||
Used: gpu.MemUsed,
|
||||
Percent: float64(gpu.MemUsed) / float64(gpu.MemTotal) * 100,
|
||||
Percent: gpu.MemUsed / gpu.MemTotal * 100,
|
||||
}
|
||||
var process []dto.GPUProcess
|
||||
if err := json.Unmarshal([]byte(gpu.Processes), &process); err == nil {
|
||||
|
|
@ -568,9 +575,8 @@ func saveGPUDataToDB() {
|
|||
GPUUtil: loadGPUInfoFloat(gpuItem.GPUUtil),
|
||||
Temperature: loadGPUInfoInt(gpuItem.Temperature),
|
||||
PowerDraw: loadGPUInfoFloat(gpuItem.PowerDraw),
|
||||
MaxPowerLimit: loadGPUInfoFloat(gpuItem.MaxPowerLimit),
|
||||
MemUsed: loadGPUInfoInt(gpuItem.MemUsed),
|
||||
MemTotal: loadGPUInfoInt(gpuItem.MemTotal),
|
||||
MemUsed: loadGPUInfoFloat(gpuItem.MemUsed),
|
||||
MemTotal: loadGPUInfoFloat(gpuItem.MemTotal),
|
||||
FanSpeed: loadGPUInfoInt(gpuItem.FanSpeed),
|
||||
}
|
||||
process, _ := json.Marshal(gpuItem.Processes)
|
||||
|
|
@ -600,9 +606,11 @@ func saveXPUDataToDB() {
|
|||
GPUUtil: loadGPUInfoFloat(xpuItem.Stats.MemoryUtil),
|
||||
Temperature: loadGPUInfoInt(xpuItem.Stats.Temperature),
|
||||
PowerDraw: loadGPUInfoFloat(xpuItem.Stats.Power),
|
||||
MemUsed: loadGPUInfoInt(xpuItem.Stats.MemoryUsed),
|
||||
MemTotal: loadGPUInfoInt(xpuItem.Basic.Memory),
|
||||
MaxPowerLimit: float64(xpuItem.Config.PowerLimit),
|
||||
MemUsed: loadGPUInfoFloat(xpuItem.Stats.MemoryUsed),
|
||||
MemTotal: loadGPUInfoFloat(xpuItem.Basic.Memory),
|
||||
}
|
||||
if len(xpuItem.Processes) != 0 {
|
||||
var processItem []dto.GPUProcess
|
||||
for _, ps := range xpuItem.Processes {
|
||||
processItem = append(processItem, dto.GPUProcess{
|
||||
|
|
@ -616,6 +624,7 @@ func saveXPUDataToDB() {
|
|||
if len(process) != 0 {
|
||||
item.Processes = string(process)
|
||||
}
|
||||
}
|
||||
list = append(list, item)
|
||||
}
|
||||
if err := repo.NewIMonitorRepo().BatchCreateMonitorGPU(list); err != nil {
|
||||
|
|
@ -633,6 +642,7 @@ func loadGPUInfoInt(val string) int {
|
|||
}
|
||||
func loadGPUInfoFloat(val string) float64 {
|
||||
valItem := strings.ReplaceAll(val, "W", "")
|
||||
valItem = strings.ReplaceAll(valItem, "MB", "")
|
||||
valItem = strings.ReplaceAll(valItem, "%", "")
|
||||
valItem = strings.TrimSpace(valItem)
|
||||
data, _ := strconv.ParseFloat(valItem, 64)
|
||||
|
|
|
|||
|
|
@ -721,7 +721,7 @@ var UpdateDatabase = &gormigrate.Migration{
|
|||
}
|
||||
|
||||
var AddGPUMonitor = &gormigrate.Migration{
|
||||
ID: "20251119-add-gpu-monitor",
|
||||
ID: "20251127-add-gpu-monitor",
|
||||
Migrate: func(tx *gorm.DB) error {
|
||||
return global.GPUMonitorDB.AutoMigrate(&model.MonitorGPU{})
|
||||
},
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ type XpuInfo struct {
|
|||
type Xpu struct {
|
||||
Basic Basic `json:"basic"`
|
||||
Stats Stats `json:"stats"`
|
||||
Config Config `json:"config"`
|
||||
Processes []Process `json:"processes"`
|
||||
}
|
||||
|
||||
|
|
@ -23,6 +24,11 @@ type Basic struct {
|
|||
PciBdfAddress string `json:"pciBdfAddress"`
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
PowerLimit int `json:"power_limit"`
|
||||
PowerValidRange string `json:"power_vaild_range"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
Power string `json:"power"`
|
||||
Frequency string `json:"frequency"`
|
||||
|
|
|
|||
|
|
@ -167,6 +167,7 @@ export namespace Host {
|
|||
endTime: Date;
|
||||
}
|
||||
export interface MonitorGPUData {
|
||||
gpuType: string;
|
||||
productNames: Array<string>;
|
||||
date: Array<Date>;
|
||||
gpuValue: Array<number>;
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@
|
|||
</div>
|
||||
</el-card>
|
||||
</div>
|
||||
<el-row :gutter="7" class="card-interval" v-if="options.length !== 0">
|
||||
<el-col :span="24">
|
||||
<el-card style="overflow: inherit">
|
||||
<el-row :gutter="7" v-if="options.length !== 0">
|
||||
<el-col v-bind="gpuType === 'gpu' ? fullWidthProps : halfWidthProps">
|
||||
<el-card class="card-interval" style="overflow: inherit">
|
||||
<template #header>
|
||||
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||
<span class="title">{{ $t('monitor.gpuUtil') }}</span>
|
||||
|
|
@ -51,7 +51,7 @@
|
|||
</el-card>
|
||||
</el-col>
|
||||
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||
<el-card style="overflow: inherit">
|
||||
<el-card class="card-interval" style="overflow: inherit">
|
||||
<template #header>
|
||||
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||
<span class="title">{{ $t('monitor.memoryUsage') }}</span>
|
||||
|
|
@ -70,7 +70,7 @@
|
|||
</el-card>
|
||||
</el-col>
|
||||
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||
<el-card style="overflow: inherit">
|
||||
<el-card class="card-interval" style="overflow: inherit">
|
||||
<template #header>
|
||||
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||
<span class="title">{{ $t('monitor.powerUsage') }}</span>
|
||||
|
|
@ -89,7 +89,7 @@
|
|||
</el-card>
|
||||
</el-col>
|
||||
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||
<el-card style="overflow: inherit">
|
||||
<el-card class="card-interval" style="overflow: inherit">
|
||||
<template #header>
|
||||
<div>
|
||||
{{ $t('monitor.temperature') }}
|
||||
|
|
@ -110,8 +110,8 @@
|
|||
</div>
|
||||
</el-card>
|
||||
</el-col>
|
||||
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12">
|
||||
<el-card style="overflow: inherit">
|
||||
<el-col :xs="24" :sm="24" :md="12" :lg="12" :xl="12" v-if="gpuType === 'gpu'">
|
||||
<el-card class="card-interval" style="overflow: inherit">
|
||||
<template #header>
|
||||
<div :class="mobile ? 'flx-wrap' : 'flex justify-between'">
|
||||
<span class="title">{{ $t('monitor.fanSpeed') }}</span>
|
||||
|
|
@ -161,8 +161,12 @@ const mobile = computed(() => {
|
|||
return globalStore.isMobile();
|
||||
});
|
||||
|
||||
const fullWidthProps = { span: 24 };
|
||||
const halfWidthProps = { xs: 24, sm: 24, md: 12, lg: 12, xl: 12 };
|
||||
|
||||
const loading = ref(false);
|
||||
const options = ref([]);
|
||||
const gpuType = ref('gpu');
|
||||
const timeRangeGlobal = ref<[Date, Date]>([new Date(new Date().setHours(0, 0, 0, 0)), new Date()]);
|
||||
const chartsOption = ref({
|
||||
loadPowerChart: null,
|
||||
|
|
@ -189,6 +193,7 @@ const search = async () => {
|
|||
.then((res) => {
|
||||
loading.value = false;
|
||||
options.value = res.data.productNames || [];
|
||||
gpuType.value = res.data.gpuType || 'gpu';
|
||||
searchInfo.productName = searchInfo.productName || (options.value.length > 0 ? options.value[0] : '');
|
||||
let baseDate = res.data.date.length === 0 ? loadEmptyDate(timeRangeGlobal.value) : res.data.date;
|
||||
let date = baseDate.map(function (item: any) {
|
||||
|
|
@ -196,7 +201,11 @@ const search = async () => {
|
|||
});
|
||||
initCPUCharts(date, res.data.gpuValue);
|
||||
initMemoryCharts(date, res.data.memoryValue);
|
||||
if (gpuType.value === 'gpu') {
|
||||
initPowerCharts(date, res.data.powerValue);
|
||||
} else {
|
||||
initXpuPowerCharts(date, res.data.powerValue);
|
||||
}
|
||||
initSpeedCharts(date, res.data.speedValue);
|
||||
initTemperatureCharts(date, res.data.temperatureValue);
|
||||
})
|
||||
|
|
@ -270,6 +279,33 @@ function initPowerCharts(baseDate: any, items: any) {
|
|||
formatStr: '%',
|
||||
};
|
||||
}
|
||||
|
||||
function initXpuPowerCharts(baseDate: any, items: any) {
|
||||
let list = items.map(function (item: any) {
|
||||
return { value: Number(item.used.toFixed(2)), data: item };
|
||||
});
|
||||
list = list.length === 0 ? loadEmptyData2() : list;
|
||||
chartsOption.value['loadPowerChart'] = {
|
||||
xData: baseDate,
|
||||
yData: [
|
||||
{
|
||||
name: i18n.global.t('monitor.powerUsage'),
|
||||
data: list,
|
||||
},
|
||||
],
|
||||
tooltip: {
|
||||
trigger: 'axis',
|
||||
formatter: function (list: any) {
|
||||
let res = loadDate(list[0].name);
|
||||
for (const item of list) {
|
||||
res += loadSeries(item, item.data.value ? item.data.value : item.data, 'W');
|
||||
}
|
||||
return res;
|
||||
},
|
||||
},
|
||||
formatStr: 'W',
|
||||
};
|
||||
}
|
||||
function initTemperatureCharts(baseDate: any, items: any) {
|
||||
let temperatures = items.map(function (item: any) {
|
||||
return Number(item);
|
||||
|
|
@ -334,14 +370,15 @@ function withMemoryProcess(list: any) {
|
|||
if (!process) {
|
||||
return res;
|
||||
}
|
||||
let title = gpuType.value === 'gpu' ? i18n.global.t('aiTools.gpu.type') : i18n.global.t('aiTools.gpu.shr');
|
||||
res += `
|
||||
<div style="margin-top: 10px; border-bottom: 1px dashed black;"></div>
|
||||
<table style="border-collapse: collapse; margin-top: 20px; font-size: 12px;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="padding: 6px 8px;">PID</th>
|
||||
<th style="padding: 6px 8px;">${i18n.global.t('aiTools.gpu.type')}</th>
|
||||
<th style="padding: 6px 8px;">${i18n.global.t('aiTools.gpu.processName')}</th>
|
||||
<th style="padding: 6px 8px;">${title}</th>
|
||||
<th style="padding: 6px 8px;">${i18n.global.t('aiTools.gpu.processMemoryUsage')}</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
|
@ -354,10 +391,10 @@ function withMemoryProcess(list: any) {
|
|||
${row.pid}
|
||||
</td>
|
||||
<td style="padding: 6px 8px; text-align: center;">
|
||||
${loadProcessType(row.type)}
|
||||
${row.processName}
|
||||
</td>
|
||||
<td style="padding: 6px 8px; text-align: center;">
|
||||
${row.processName}
|
||||
${loadProcessType(row.type)}
|
||||
</td>
|
||||
<td style="padding: 6px 8px; text-align: center;">
|
||||
${row.usedMemory}
|
||||
|
|
|
|||
|
|
@ -69,8 +69,9 @@
|
|||
<el-button v-if="!cpuShowAll" @click="cpuShowAll = true" icon="More" link size="small" />
|
||||
<el-button v-if="cpuShowAll" @click="cpuShowAll = false" icon="ArrowUp" link size="small" />
|
||||
</div>
|
||||
<br />
|
||||
|
||||
<el-button link size="small" type="primary" class="mt-2 mb-2" @click="showTop = !showTop">
|
||||
<el-button link size="small" type="primary" class="mt-1 mb-2" @click="showTop = !showTop">
|
||||
{{ $t('home.cpuTop') }}
|
||||
<el-icon v-if="!showTop"><ArrowRight /></el-icon>
|
||||
<el-icon v-if="showTop"><ArrowDown /></el-icon>
|
||||
|
|
@ -315,7 +316,7 @@
|
|||
<span class="input-help" v-else>{{ item.deviceName }}</span>
|
||||
</el-col>
|
||||
</template>
|
||||
<el-col :xs="6" :sm="6" :md="6" :lg="6" :xl="6" align="center" v-if="totalCount > 5">
|
||||
<el-col :xs="6" :sm="6" :md="3" :lg="3" :xl="3" align="center" v-if="totalCount > 5">
|
||||
<el-button v-if="!showMore" link type="primary" @click="changeShowMore(true)" class="buttonClass">
|
||||
{{ $t('tabs.more') }}
|
||||
<el-icon><Bottom /></el-icon>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue