diff --git a/agent/app/service/alert_helper.go b/agent/app/service/alert_helper.go index 07b230642..ddd64e821 100644 --- a/agent/app/service/alert_helper.go +++ b/agent/app/service/alert_helper.go @@ -9,6 +9,7 @@ import ( "github.com/1Panel-dev/1Panel/agent/global" alertUtil "github.com/1Panel-dev/1Panel/agent/utils/alert" "github.com/1Panel-dev/1Panel/agent/utils/common" + "github.com/1Panel-dev/1Panel/agent/utils/psutil" versionUtil "github.com/1Panel-dev/1Panel/agent/utils/version" "github.com/1Panel-dev/1Panel/agent/utils/xpack" "github.com/shirou/gopsutil/v4/cpu" @@ -352,7 +353,7 @@ func loadLoadInfo(alert dto.AlertDTO) { return } var loadValue float64 - CPUTotal, _ := cpu.Counts(true) + CPUTotal, _ := psutil.CPUInfo.GetLogicalCores(false) switch alert.Cycle { case 1: loadValue = avgStat.Load1 / (float64(CPUTotal*2) * 0.75) * 100 @@ -839,7 +840,7 @@ func processSingleDisk(alert dto.AlertDTO) error { } func checkAndCreateDiskAlert(alert dto.AlertDTO, path string) (bool, error) { - usageStat, err := disk.Usage(path) + usageStat, err := psutil.DISK.GetUsage(path, false) if err != nil { global.LOG.Errorf("error getting disk usage for %s, err: %v", path, err) return false, err diff --git a/agent/app/service/dashboard.go b/agent/app/service/dashboard.go index bf6825558..4fd3687ab 100644 --- a/agent/app/service/dashboard.go +++ b/agent/app/service/dashboard.go @@ -1,6 +1,7 @@ package service import ( + "cmp" "context" "encoding/json" "fmt" @@ -23,10 +24,9 @@ import ( "github.com/1Panel-dev/1Panel/agent/utils/common" "github.com/1Panel-dev/1Panel/agent/utils/controller" "github.com/1Panel-dev/1Panel/agent/utils/copier" + "github.com/1Panel-dev/1Panel/agent/utils/psutil" "github.com/gin-gonic/gin" - "github.com/shirou/gopsutil/v4/cpu" "github.com/shirou/gopsutil/v4/disk" - "github.com/shirou/gopsutil/v4/host" "github.com/shirou/gopsutil/v4/load" "github.com/shirou/gopsutil/v4/mem" "github.com/shirou/gopsutil/v4/net" @@ -77,7 +77,7 @@ func (u *DashboardService) Restart(operation string) error { func (u *DashboardService) LoadOsInfo() (*dto.OsInfo, error) { var baseInfo dto.OsInfo - hostInfo, err := host.Info() + hostInfo, err := psutil.HOST.GetHostInfo(false) if err != nil { return nil, err } @@ -87,7 +87,7 @@ func (u *DashboardService) LoadOsInfo() (*dto.OsInfo, error) { baseInfo.KernelArch = hostInfo.KernelArch baseInfo.KernelVersion = hostInfo.KernelVersion - diskInfo, err := disk.Usage(global.Dir.BaseDir) + diskInfo, err := psutil.DISK.GetUsage(global.Dir.BaseDir, false) if err == nil { baseInfo.DiskSize = int64(diskInfo.Free) } @@ -104,12 +104,16 @@ func (u *DashboardService) LoadOsInfo() (*dto.OsInfo, error) { func (u *DashboardService) LoadCurrentInfoForNode() *dto.NodeCurrent { var currentInfo dto.NodeCurrent - currentInfo.CPUTotal, _ = cpu.Counts(true) - totalPercent, _ := cpu.Percent(100*time.Millisecond, false) - if len(totalPercent) == 1 { - currentInfo.CPUUsedPercent = totalPercent[0] - currentInfo.CPUUsed = currentInfo.CPUUsedPercent * 0.01 * float64(currentInfo.CPUTotal) + currentInfo.CPUTotal, _ = psutil.CPUInfo.GetLogicalCores(false) + + cpuUsedPercent, perCore := psutil.CPU.GetCPUUsage() + if len(perCore) == 0 { + currentInfo.CPUTotal = psutil.CPU.NumCPU() + } else { + currentInfo.CPUTotal = len(perCore) } + currentInfo.CPUUsedPercent = cpuUsedPercent + currentInfo.CPUUsed = cpuUsedPercent * 0.01 * float64(currentInfo.CPUTotal) loadInfo, _ := load.Avg() currentInfo.Load1 = loadInfo.Load1 @@ -134,38 +138,37 @@ func (u *DashboardService) LoadCurrentInfoForNode() *dto.NodeCurrent { func (u *DashboardService) LoadBaseInfo(ioOption string, netOption string) (*dto.DashboardBase, error) { var baseInfo dto.DashboardBase - hostInfo, err := host.Info() + hostInfo, err := psutil.HOST.GetHostInfo(false) if err != nil { return nil, err } - baseInfo.Hostname = hostInfo.Hostname - baseInfo.OS = hostInfo.OS - baseInfo.Platform = hostInfo.Platform - baseInfo.PlatformFamily = hostInfo.PlatformFamily - baseInfo.PlatformVersion = hostInfo.PlatformVersion - baseInfo.KernelArch = hostInfo.KernelArch - baseInfo.KernelVersion = hostInfo.KernelVersion ss, _ := json.Marshal(hostInfo) - baseInfo.VirtualizationSystem = string(ss) - baseInfo.IpV4Addr = loadOutboundIP() - httpProxy := os.Getenv("http_proxy") - if httpProxy == "" { - httpProxy = os.Getenv("HTTP_PROXY") + baseInfo = dto.DashboardBase{ + Hostname: hostInfo.Hostname, + OS: hostInfo.OS, + Platform: hostInfo.Platform, + PlatformFamily: hostInfo.PlatformFamily, + PlatformVersion: hostInfo.PlatformVersion, + KernelArch: hostInfo.KernelArch, + KernelVersion: hostInfo.KernelVersion, + VirtualizationSystem: string(ss), + IpV4Addr: loadOutboundIP(), + SystemProxy: "noProxy", } - if httpProxy != "" { - baseInfo.SystemProxy = httpProxy + + if proxy := cmp.Or(os.Getenv("http_proxy"), os.Getenv("HTTP_PROXY")); proxy != "" { + baseInfo.SystemProxy = proxy } - baseInfo.SystemProxy = "noProxy" loadQuickJump(&baseInfo) - cpuInfo, err := cpu.Info() - if err == nil { + cpuInfo, err := psutil.CPUInfo.GetCPUInfo(false) + if err == nil && len(cpuInfo) > 0 { baseInfo.CPUModelName = cpuInfo[0].ModelName } - baseInfo.CPUCores, _ = cpu.Counts(false) - baseInfo.CPULogicalCores, _ = cpu.Counts(true) + baseInfo.CPUCores, _ = psutil.CPUInfo.GetPhysicalCores(false) + baseInfo.CPULogicalCores, _ = psutil.CPUInfo.GetLogicalCores(false) baseInfo.CurrentInfo = *u.LoadCurrentInfo(ioOption, netOption) return &baseInfo, nil @@ -173,18 +176,21 @@ func (u *DashboardService) LoadBaseInfo(ioOption string, netOption string) (*dto func (u *DashboardService) LoadCurrentInfo(ioOption string, netOption string) *dto.DashboardCurrent { var currentInfo dto.DashboardCurrent - hostInfo, _ := host.Info() + hostInfo, _ := psutil.HOST.GetHostInfo(false) currentInfo.Uptime = hostInfo.Uptime currentInfo.TimeSinceUptime = time.Now().Add(-time.Duration(hostInfo.Uptime) * time.Second).Format(constant.DateTimeLayout) currentInfo.Procs = hostInfo.Procs + currentInfo.CPUTotal, _ = psutil.CPUInfo.GetLogicalCores(false) - currentInfo.CPUTotal, _ = cpu.Counts(true) - totalPercent, _ := cpu.Percent(100*time.Millisecond, false) - if len(totalPercent) == 1 { - currentInfo.CPUUsedPercent = totalPercent[0] - currentInfo.CPUUsed = currentInfo.CPUUsedPercent * 0.01 * float64(currentInfo.CPUTotal) + cpuUsedPercent, perCore := psutil.CPU.GetCPUUsage() + if len(perCore) == 0 { + currentInfo.CPUTotal = psutil.CPU.NumCPU() + } else { + currentInfo.CPUTotal = len(perCore) } - currentInfo.CPUPercent, _ = cpu.Percent(100*time.Millisecond, true) + currentInfo.CPUPercent = perCore + currentInfo.CPUUsedPercent = cpuUsedPercent + currentInfo.CPUUsed = cpuUsedPercent * 0.01 * float64(currentInfo.CPUTotal) loadInfo, _ := load.Avg() currentInfo.Load1 = loadInfo.Load1 @@ -246,6 +252,7 @@ func (u *DashboardService) LoadCurrentInfo(ioOption string, netOption string) *d if state.Name == netOption { currentInfo.NetBytesSent = state.BytesSent currentInfo.NetBytesRecv = state.BytesRecv + break } } } @@ -455,41 +462,52 @@ func loadDiskInfo() []dto.DiskInfo { ) wg.Add(len(mounts)) for i := 0; i < len(mounts); i++ { - go func(timeoutCh <-chan time.Time, mount diskInfo) { + go func(mount diskInfo) { defer wg.Done() var itemData dto.DiskInfo itemData.Path = mount.Mount itemData.Type = mount.Type itemData.Device = mount.Device + + type diskResult struct { + state *disk.UsageStat + err error + } + resultCh := make(chan diskResult, 1) + + go func() { + state, err := psutil.DISK.GetUsage(mount.Mount, false) + resultCh <- diskResult{state: state, err: err} + }() + select { - case <-timeoutCh: + case <-time.After(5 * time.Second): mu.Lock() datas = append(datas, itemData) mu.Unlock() global.LOG.Errorf("load disk info from %s failed, err: timeout", mount.Mount) - default: - state, err := disk.Usage(mount.Mount) - if err != nil { + case result := <-resultCh: + if result.err != nil { mu.Lock() datas = append(datas, itemData) mu.Unlock() - global.LOG.Errorf("load disk info from %s failed, err: %v", mount.Mount, err) + global.LOG.Errorf("load disk info from %s failed, err: %v", mount.Mount, result.err) return } - itemData.Total = state.Total - itemData.Free = state.Free - itemData.Used = state.Used - itemData.UsedPercent = state.UsedPercent - itemData.InodesTotal = state.InodesTotal - itemData.InodesUsed = state.InodesUsed - itemData.InodesFree = state.InodesFree - itemData.InodesUsedPercent = state.InodesUsedPercent + itemData.Total = result.state.Total + itemData.Free = result.state.Free + itemData.Used = result.state.Used + itemData.UsedPercent = result.state.UsedPercent + itemData.InodesTotal = result.state.InodesTotal + itemData.InodesUsed = result.state.InodesUsed + itemData.InodesFree = result.state.InodesFree + itemData.InodesUsedPercent = result.state.InodesUsedPercent mu.Lock() datas = append(datas, itemData) mu.Unlock() } - }(time.After(5*time.Second), mounts[i]) + }(mounts[i]) } wg.Wait() diff --git a/agent/app/service/monitor.go b/agent/app/service/monitor.go index f76050172..d0b91397b 100644 --- a/agent/app/service/monitor.go +++ b/agent/app/service/monitor.go @@ -19,6 +19,7 @@ import ( "github.com/1Panel-dev/1Panel/agent/utils/ai_tools/gpu" "github.com/1Panel-dev/1Panel/agent/utils/ai_tools/xpu" "github.com/1Panel-dev/1Panel/agent/utils/common" + "github.com/1Panel-dev/1Panel/agent/utils/psutil" "github.com/robfig/cron/v3" "github.com/shirou/gopsutil/v4/cpu" "github.com/shirou/gopsutil/v4/disk" @@ -258,7 +259,7 @@ func (m *MonitorService) Run() { itemModel.TopCPU = string(topItemCPU) } } - cpuCount, _ := cpu.Counts(false) + cpuCount, _ := psutil.CPUInfo.GetPhysicalCores(false) loadInfo, _ := load.Avg() itemModel.CpuLoad1 = loadInfo.Load1 itemModel.CpuLoad5 = loadInfo.Load5 diff --git a/agent/utils/psutil/cpu.go b/agent/utils/psutil/cpu.go new file mode 100644 index 000000000..f10f22fa2 --- /dev/null +++ b/agent/utils/psutil/cpu.go @@ -0,0 +1,276 @@ +package psutil + +import ( + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/shirou/gopsutil/v4/cpu" +) + +const ( + resetInterval = 1 * time.Minute + fastInterval = 3 * time.Second +) + +type CPUStat struct { + Idle uint64 + Total uint64 +} + +type CPUUsageState struct { + mu sync.Mutex + lastTotalStat *CPUStat + lastPerCPUStat []CPUStat + lastSampleTime time.Time + + cachedTotalUsage float64 + cachedPerCore []float64 +} + +func readCPUStat() (CPUStat, error) { + data, err := os.ReadFile("/proc/stat") + if err != nil { + return CPUStat{}, err + } + + fields := strings.Fields(strings.Split(string(data), "\n")[0])[1:] + nums := make([]uint64, len(fields)) + + for i, f := range fields { + v, _ := strconv.ParseUint(f, 10, 64) + nums[i] = v + } + + idle := nums[3] + nums[4] + var total uint64 + for _, v := range nums { + total += v + } + + return CPUStat{Idle: idle, Total: total}, nil +} + +func (c *CPUUsageState) readPerCPUStat() ([]CPUStat, error) { + data, err := os.ReadFile("/proc/stat") + if err != nil { + return nil, err + } + + lines := strings.Split(string(data), "\n") + stats := c.lastPerCPUStat[:0] + + for _, l := range lines[1:] { + if !strings.HasPrefix(l, "cpu") { + continue + } + if len(l) < 4 || l[3] < '0' || l[3] > '9' { + continue + } + + fields := strings.Fields(l)[1:] + nums := make([]uint64, len(fields)) + for i, f := range fields { + v, _ := strconv.ParseUint(f, 10, 64) + nums[i] = v + } + + idle := nums[3] + nums[4] + var total uint64 + for _, v := range nums { + total += v + } + + stats = append(stats, CPUStat{Idle: idle, Total: total}) + } + + return stats, nil +} + +func readPerCPUStatCopy() []CPUStat { + data, err := os.ReadFile("/proc/stat") + if err != nil { + return nil + } + + lines := strings.Split(string(data), "\n") + var stats []CPUStat + + for _, l := range lines[1:] { + if !strings.HasPrefix(l, "cpu") { + continue + } + if len(l) < 4 || l[3] < '0' || l[3] > '9' { + continue + } + + fields := strings.Fields(l)[1:] + nums := make([]uint64, len(fields)) + for i, f := range fields { + v, _ := strconv.ParseUint(f, 10, 64) + nums[i] = v + } + + idle := nums[3] + nums[4] + var total uint64 + for _, v := range nums { + total += v + } + + stats = append(stats, CPUStat{Idle: idle, Total: total}) + } + + return stats +} + +func calcCPUPercent(prev, cur CPUStat) float64 { + deltaIdle := float64(cur.Idle - prev.Idle) + deltaTotal := float64(cur.Total - prev.Total) + if deltaTotal <= 0 { + return 0 + } + return (1 - deltaIdle/deltaTotal) * 100 +} + +func (c *CPUUsageState) GetCPUUsage() (float64, []float64) { + c.mu.Lock() + + now := time.Now() + + if !c.lastSampleTime.IsZero() && now.Sub(c.lastSampleTime) < fastInterval { + result := c.cachedTotalUsage + perCore := c.cachedPerCore + c.mu.Unlock() + return result, perCore + } + + needReset := c.lastSampleTime.IsZero() || now.Sub(c.lastSampleTime) >= resetInterval + c.mu.Unlock() + + if needReset { + firstTotal, _ := readCPUStat() + firstPer := readPerCPUStatCopy() + time.Sleep(100 * time.Millisecond) + secondTotal, _ := readCPUStat() + secondPer := readPerCPUStatCopy() + + totalUsage := calcCPUPercent(firstTotal, secondTotal) + + perCore := make([]float64, len(secondPer)) + for i := range secondPer { + perCore[i] = calcCPUPercent(firstPer[i], secondPer[i]) + } + + c.mu.Lock() + c.cachedTotalUsage = totalUsage + c.cachedPerCore = perCore + c.lastTotalStat = &secondTotal + c.lastPerCPUStat = secondPer + c.lastSampleTime = time.Now() + c.mu.Unlock() + + return totalUsage, perCore + } + + curTotal, _ := readCPUStat() + curPer := readPerCPUStatCopy() + + c.mu.Lock() + defer c.mu.Unlock() + + totalUsage := calcCPUPercent(*c.lastTotalStat, curTotal) + + if len(c.cachedPerCore) != len(curPer) { + c.cachedPerCore = make([]float64, len(curPer)) + } + for i := range curPer { + c.cachedPerCore[i] = calcCPUPercent(c.lastPerCPUStat[i], curPer[i]) + } + + c.cachedTotalUsage = totalUsage + c.lastTotalStat = &curTotal + c.lastPerCPUStat = curPer + c.lastSampleTime = time.Now() + + return totalUsage, c.cachedPerCore +} + +func (c *CPUUsageState) NumCPU() int { + c.mu.Lock() + defer c.mu.Unlock() + + return len(c.cachedPerCore) +} + +type CPUInfoState struct { + mu sync.RWMutex + initialized bool + cachedInfo []cpu.InfoStat + cachedPhysCores int + cachedLogicCores int +} + +func (c *CPUInfoState) GetCPUInfo(forceRefresh bool) ([]cpu.InfoStat, error) { + c.mu.RLock() + if c.initialized && c.cachedInfo != nil && !forceRefresh { + defer c.mu.RUnlock() + return c.cachedInfo, nil + } + c.mu.RUnlock() + + info, err := cpu.Info() + if err != nil { + return nil, err + } + + c.mu.Lock() + c.cachedInfo = info + c.initialized = true + c.mu.Unlock() + + return info, nil +} + +func (c *CPUInfoState) GetPhysicalCores(forceRefresh bool) (int, error) { + c.mu.RLock() + if c.initialized && c.cachedPhysCores > 0 && !forceRefresh { + defer c.mu.RUnlock() + return c.cachedPhysCores, nil + } + c.mu.RUnlock() + + cores, err := cpu.Counts(false) + if err != nil { + return 0, err + } + + c.mu.Lock() + c.cachedPhysCores = cores + c.initialized = true + c.mu.Unlock() + + return cores, nil +} + +func (c *CPUInfoState) GetLogicalCores(forceRefresh bool) (int, error) { + c.mu.RLock() + if c.initialized && c.cachedLogicCores > 0 && !forceRefresh { + defer c.mu.RUnlock() + return c.cachedLogicCores, nil + } + c.mu.RUnlock() + + cores, err := cpu.Counts(true) + if err != nil { + return 0, err + } + + c.mu.Lock() + c.cachedLogicCores = cores + c.initialized = true + c.mu.Unlock() + + return cores, nil +} diff --git a/agent/utils/psutil/disk.go b/agent/utils/psutil/disk.go new file mode 100644 index 000000000..ddca1cabf --- /dev/null +++ b/agent/utils/psutil/disk.go @@ -0,0 +1,92 @@ +package psutil + +import ( + "sync" + "time" + + "github.com/shirou/gopsutil/v4/disk" +) + +const ( + diskUsageCacheInterval = 30 * time.Second + diskPartitionCacheInterval = 10 * time.Minute +) + +type DiskUsageEntry struct { + lastSampleTime time.Time + cachedUsage *disk.UsageStat +} + +type DiskState struct { + usageMu sync.RWMutex + usageCache map[string]*DiskUsageEntry + + partitionMu sync.RWMutex + lastPartitionTime time.Time + cachedPartitions []disk.PartitionStat +} + +func (d *DiskState) GetUsage(path string, forceRefresh bool) (*disk.UsageStat, error) { + d.usageMu.RLock() + if entry, ok := d.usageCache[path]; ok { + if time.Since(entry.lastSampleTime) < diskUsageCacheInterval && !forceRefresh { + defer d.usageMu.RUnlock() + return entry.cachedUsage, nil + } + } + d.usageMu.RUnlock() + + usage, err := disk.Usage(path) + if err != nil { + return nil, err + } + + d.usageMu.Lock() + if d.usageCache == nil { + d.usageCache = make(map[string]*DiskUsageEntry) + } + d.usageCache[path] = &DiskUsageEntry{ + lastSampleTime: time.Now(), + cachedUsage: usage, + } + d.usageMu.Unlock() + + return usage, nil +} + +func (d *DiskState) GetPartitions(all bool, forceRefresh bool) ([]disk.PartitionStat, error) { + d.partitionMu.RLock() + if d.cachedPartitions != nil && time.Since(d.lastPartitionTime) < diskPartitionCacheInterval && !forceRefresh { + defer d.partitionMu.RUnlock() + return d.cachedPartitions, nil + } + d.partitionMu.RUnlock() + + partitions, err := disk.Partitions(all) + if err != nil { + return nil, err + } + + d.partitionMu.Lock() + d.cachedPartitions = partitions + d.lastPartitionTime = time.Now() + d.partitionMu.Unlock() + + return partitions, nil +} + +func (d *DiskState) ClearUsageCache(path string) { + d.usageMu.Lock() + delete(d.usageCache, path) + d.usageMu.Unlock() +} + +func (d *DiskState) ClearAllCache() { + d.usageMu.Lock() + d.usageCache = make(map[string]*DiskUsageEntry) + d.usageMu.Unlock() + + d.partitionMu.Lock() + d.cachedPartitions = nil + d.partitionMu.Unlock() +} diff --git a/agent/utils/psutil/global.go b/agent/utils/psutil/global.go new file mode 100644 index 000000000..1b60498cb --- /dev/null +++ b/agent/utils/psutil/global.go @@ -0,0 +1,6 @@ +package psutil + +var CPU = &CPUUsageState{} +var CPUInfo = &CPUInfoState{} +var HOST = &HostInfoState{} +var DISK = &DiskState{} diff --git a/agent/utils/psutil/host.go b/agent/utils/psutil/host.go new file mode 100644 index 000000000..98e99f326 --- /dev/null +++ b/agent/utils/psutil/host.go @@ -0,0 +1,38 @@ +package psutil + +import ( + "sync" + "time" + + "github.com/shirou/gopsutil/v4/host" +) + +const hostRefreshInterval = 4 * time.Hour + +type HostInfoState struct { + mu sync.RWMutex + lastSampleTime time.Time + + cachedInfo *host.InfoStat +} + +func (h *HostInfoState) GetHostInfo(forceRefresh bool) (*host.InfoStat, error) { + h.mu.RLock() + if h.cachedInfo != nil && time.Since(h.lastSampleTime) < hostRefreshInterval && !forceRefresh { + defer h.mu.RUnlock() + return h.cachedInfo, nil + } + h.mu.RUnlock() + + hostInfo, err := host.Info() + if err != nil { + return nil, err + } + + h.mu.Lock() + h.cachedInfo = hostInfo + h.lastSampleTime = time.Now() + h.mu.Unlock() + + return hostInfo, nil +}