fix: Add gpu monitoring support for some machines (#10133)

Refs #9504 #8397
This commit is contained in:
ssongliu 2025-08-25 18:53:57 +08:00 committed by GitHub
parent b2b578a5f0
commit c752ea4959
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 7 additions and 9 deletions

View file

@ -59,11 +59,5 @@ func (n NvidiaSMI) LoadGpuInfo() (*common.GpuInfo, error) {
break
}
if version == "v12" || version == "v11" {
return schema.Parse(data, version)
} else {
global.LOG.Errorf("don't support such schema version %s", version)
}
return &common.GpuInfo{}, nil
}

View file

@ -33,9 +33,12 @@ func Parse(buf []byte, version string) (*common.GpuInfo, error) {
gpuItem.Temperature = s.Gpu[i].Temperature.GpuTemp
gpuItem.PerformanceState = s.Gpu[i].PerformanceState
if version == "v12" {
if version == "v12" || version == "v13" {
gpuItem.PowerDraw = s.Gpu[i].GpuPowerReadings.PowerDraw
gpuItem.MaxPowerLimit = s.Gpu[i].GpuPowerReadings.MaxPowerLimit
if len(gpuItem.PowerDraw) == 0 {
gpuItem.PowerDraw = s.Gpu[i].GpuPowerReadings.InstantPowerDraw
}
gpuItem.MaxPowerLimit = s.Gpu[i].GpuPowerReadings.CurrentPowerLimit
} else {
gpuItem.PowerDraw = s.Gpu[i].PowerReadings.PowerDraw
gpuItem.MaxPowerLimit = s.Gpu[i].PowerReadings.MaxPowerLimit

View file

@ -108,6 +108,7 @@ type smi struct {
} `xml:"gpu_operation_mode"`
GpuPartNumber string `xml:"gpu_part_number"`
GpuPowerReadings struct {
InstantPowerDraw string `xml:"instant_power_draw"`
CurrentPowerLimit string `xml:"current_power_limit"`
DefaultPowerLimit string `xml:"default_power_limit"`
MaxPowerLimit string `xml:"max_power_limit"`