package psutil import ( "context" "errors" "fmt" "io" "io/fs" "math" "os" "strconv" "strings" "sync" "time" psutilgpu "github.com/datarhei/core/v16/resources/psutil/gpu" "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/disk" "github.com/shirou/gopsutil/v3/mem" "github.com/shirou/gopsutil/v3/net" ) var cgroup1Files = []string{ "cpu/cpu.cfs_quota_us", "cpu/cpu.cfs_period_us", "cpuacct/cpuacct.usage", "memory/memory.limit_in_bytes", "memory/memory.usage_in_bytes", } var cgroup2Files = []string{ "cpu.max", "cpu.stat", "memory.max", "memory.current", } // https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/sys_fs_cgroup.c // https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpuacct // https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sect-cpu-example_usage // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html type DiskInfo struct { Path string Fstype string Total uint64 Used uint64 InodesTotal uint64 InodesUsed uint64 } type MemoryInfo struct { Total uint64 // bytes Available uint64 // bytes Used uint64 // bytes } type NetworkInfo struct { Name string // interface name BytesSent uint64 // number of bytes sent BytesRecv uint64 // number of bytes received } type CPUInfo struct { System float64 // percent 0-100 User float64 // percent 0-100 Idle float64 // percent 0-100 Other float64 // percent 0-100 } type GPUInfo struct { Index int // Index of the GPU ID string // Physical ID of the GPU (not populated for a specific process) Name string // Name of the GPU (not populated for a specific process) MemoryTotal uint64 // bytes (not populated for a specific process) MemoryUsed uint64 // bytes Usage float64 // percent 0-100 Encoder float64 // percent 0-100 Decoder float64 // percent 0-100 } type cpuTimesStat struct { total float64 // seconds system float64 // seconds user float64 // seconds idle float64 // seconds other float64 // seconds } type Util interface { Cancel() // CPUCounts returns the number of cores, either logical or physical. CPUCounts() (float64, error) // CPU returns the current CPU load in percent. The values range // from 0 to 100, independently of the number of logical cores. CPU() (*CPUInfo, error) // Disk returns the current usage of the partition specified by the path. Disk(path string) (*DiskInfo, error) // Memory return the current memory usage. Memory() (*MemoryInfo, error) // Network returns the current network interface statistics per network adapter. Network() ([]NetworkInfo, error) // GPU return the current usage for each CPU. GPU() ([]GPUInfo, error) // Process returns a process observer for a process with the given pid. Process(pid int32, children bool) (Process, error) } type util struct { root fs.FS cpuLimit uint64 // Max. allowed CPU time in nanoseconds per second ncpu float64 // Actual available CPUs hasCgroup bool cgroupType int stopTicker context.CancelFunc startOnce sync.Once stopOnce sync.Once lock sync.RWMutex statCurrent cpuTimesStat statCurrentTime time.Time statPrevious cpuTimesStat statPreviousTime time.Time nTicks uint64 mem MemoryInfo gpu psutilgpu.GPU procfs Procfs } // New returns a new util, it will be started automatically func New(root string, gpu psutilgpu.GPU) (Util, error) { if len(root) == 0 { root = "/sys/fs/cgroup" } u := &util{ root: os.DirFS(root), } u.cgroupType = u.detectCgroupVersion() if u.cgroupType != 0 { u.hasCgroup = true } if u.hasCgroup { u.cpuLimit, u.ncpu = u.cgroupCPULimit(u.cgroupType) } if u.ncpu == 0 { var err error u.ncpu, err = u.CPUCounts() if err != nil { return nil, err } } mem, err := u.virtualMemory() if err != nil { return nil, fmt.Errorf("unable to determine system memory: %w", err) } u.mem = *mem u.gpu = gpu if u.gpu == nil { u.gpu = psutilgpu.NewNilGPU() } ctx, cancel := context.WithCancel(context.Background()) u.stopTicker = cancel go u.tickCPU(ctx, time.Second) go u.tickMemory(ctx, time.Second) procfs, _ := NewProcfs(ctx, 5*time.Second) u.procfs = procfs u.stopOnce = sync.Once{} return u, nil } func (u *util) Cancel() { u.stopOnce.Do(func() { u.stopTicker() u.startOnce = sync.Once{} }) } func (u *util) detectCgroupVersion() int { f, err := u.root.Open(".") if err != nil { // no cgroup available return 0 } f.Close() for _, file := range cgroup1Files { if f, err := u.root.Open(file); err == nil { f.Close() return 1 } } for _, file := range cgroup2Files { if f, err := u.root.Open(file); err == nil { f.Close() return 2 } } return 0 } func (u *util) cgroupCPULimit(version int) (uint64, float64) { switch version { case 1: lines, err := u.readFile("cpu/cpu.cfs_quota_us") if err != nil { return 0, 0 } quota, err := strconv.ParseFloat(lines[0], 64) // microseconds if err != nil { return 0, 0 } if quota > 0 { lines, err := u.readFile("cpu/cpu.cfs_period_us") if err != nil { return 0, 0 } period, err := strconv.ParseFloat(lines[0], 64) // microseconds if err != nil { return 0, 0 } return uint64(1e6/period*quota) * 1e3, quota / period // nanoseconds } case 2: lines, err := u.readFile("cpu.max") if err != nil { return 0, 0 } if strings.HasPrefix(lines[0], "max") { return 0, 0 } fields := strings.Split(lines[0], " ") if len(fields) != 2 { return 0, 0 } quota, err := strconv.ParseFloat(fields[0], 64) // microseconds if err != nil { return 0, 0 } period, err := strconv.ParseFloat(fields[1], 64) // microseconds if err != nil { return 0, 0 } return uint64(1e6/period*quota) * 1e3, quota / period // nanoseconds } return 0, 0 } func (u *util) tickCPU(ctx context.Context, interval time.Duration) { ticker := time.NewTicker(interval) defer ticker.Stop() for { select { case <-ctx.Done(): return case t := <-ticker.C: stat := u.collectCPU() u.lock.Lock() u.statPrevious, u.statCurrent = u.statCurrent, stat u.statPreviousTime, u.statCurrentTime = u.statCurrentTime, t u.nTicks++ u.lock.Unlock() } } } func (u *util) collectCPU() cpuTimesStat { stat, err := u.cpuTimes() if err != nil { return cpuTimesStat{ total: float64(time.Now().Unix()), idle: float64(time.Now().Unix()), } } return *stat } func (u *util) tickMemory(ctx context.Context, interval time.Duration) { ticker := time.NewTicker(interval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: stat := u.collectMemory() if stat != nil { u.lock.Lock() u.mem = *stat u.lock.Unlock() } } } } func (u *util) collectMemory() *MemoryInfo { stat, err := u.virtualMemory() if err != nil { return nil } return stat } func (u *util) CPUCounts() (float64, error) { if u.hasCgroup && u.ncpu > 0 { return u.ncpu, nil } ncpu, err := cpu.Counts(true) if err != nil { return 0, err } return float64(ncpu), nil } // cpuTimes returns the current cpu usage times in seconds. func (u *util) cpuTimes() (*cpuTimesStat, error) { if u.hasCgroup && u.cpuLimit > 0 { if stat, err := u.cgroupCPUTimes(u.cgroupType); err == nil { return stat, nil } } times, err := cpu.Times(true) if err != nil { return nil, err } if len(times) == 0 { return nil, errors.New("cpu.Times() returned an empty slice") } s := &cpuTimesStat{} for _, t := range times { s.total += cpuTotal(&t) s.system += t.System s.user += t.User s.idle += t.Idle s.other = s.total - s.system - s.user - s.idle if s.other < 0.0001 { s.other = 0 } } return s, nil } func (u *util) CPU() (*CPUInfo, error) { var total float64 for { u.lock.RLock() nTicks := u.nTicks u.lock.RUnlock() if nTicks < 2 { time.Sleep(100 * time.Millisecond) continue } break } u.lock.RLock() defer u.lock.RUnlock() if u.hasCgroup && u.cpuLimit > 0 { total = float64(u.cpuLimit) * (u.statCurrentTime.Sub(u.statPreviousTime)).Seconds() } else { total = (u.statCurrent.total - u.statPrevious.total) } s := &CPUInfo{ System: 0, User: 0, Idle: 100, Other: 0, } if total == 0 { return s, nil } s.System = 100 * (u.statCurrent.system - u.statPrevious.system) / total s.User = 100 * (u.statCurrent.user - u.statPrevious.user) / total s.Idle = 100 * (u.statCurrent.idle - u.statPrevious.idle) / total s.Other = 100 * (u.statCurrent.other - u.statPrevious.other) / total if u.hasCgroup && u.cpuLimit > 0 { s.Idle = 100 - s.User - s.System } return s, nil } func (u *util) cgroupCPUTimes(version int) (*cpuTimesStat, error) { info := &cpuTimesStat{} switch version { case 1: lines, err := u.readFile("cpuacct/cpuacct.usage") if err != nil { return nil, err } usage, err := strconv.ParseFloat(lines[0], 64) // nanoseconds if err != nil { return nil, err } info.system = usage case 2: lines, err := u.readFile("cpu.stat") if err != nil { return nil, err } var usage float64 if _, err := fmt.Sscanf(lines[0], "usage_usec %f", &usage); err != nil { return nil, err } info.system = usage * 1e3 // convert to nanoseconds } return info, nil } func (u *util) Disk(path string) (*DiskInfo, error) { usage, err := disk.Usage(path) if err != nil { return nil, err } info := &DiskInfo{ Path: usage.Path, Fstype: usage.Fstype, Total: usage.Total, Used: usage.Used, InodesTotal: usage.InodesTotal, InodesUsed: usage.InodesUsed, } return info, nil } func (u *util) virtualMemory() (*MemoryInfo, error) { info, err := mem.VirtualMemory() if err != nil { return nil, err } if u.hasCgroup { if cginfo, err := u.cgroupVirtualMemory(u.cgroupType); err == nil { // if total is a huge garbage number, then there are no limits set if cginfo.Total <= info.Total { return cginfo, nil } } } return &MemoryInfo{ Total: info.Total, Available: info.Available, Used: info.Used, }, nil } func (u *util) Memory() (*MemoryInfo, error) { u.lock.RLock() defer u.lock.RUnlock() stat := &MemoryInfo{ Total: u.mem.Total, Available: u.mem.Available, Used: u.mem.Used, } return stat, nil } func (u *util) cgroupVirtualMemory(version int) (*MemoryInfo, error) { info := &MemoryInfo{} switch version { case 1: lines, err := u.readFile("memory/memory.limit_in_bytes") if err != nil { return nil, err } total, err := strconv.ParseUint(lines[0], 10, 64) if err != nil { return nil, err } lines, err = u.readFile("memory/memory.usage_in_bytes") if err != nil { return nil, err } used, err := strconv.ParseUint(lines[0], 10, 64) if err != nil { return nil, err } info.Total = total info.Available = total - used info.Used = used case 2: lines, err := u.readFile("memory.max") if err != nil { return nil, err } total, err := strconv.ParseUint(lines[0], 10, 64) if err != nil { total = uint64(math.MaxUint64) } lines, err = u.readFile("memory.current") if err != nil { return nil, err } used, err := strconv.ParseUint(lines[0], 10, 64) if err != nil { return nil, err } info.Total = total info.Available = total - used info.Used = used } return info, nil } func (u *util) Network() ([]NetworkInfo, error) { netio, err := net.IOCounters(true) if err != nil { return nil, err } info := []NetworkInfo{} for _, io := range netio { info = append(info, NetworkInfo{ Name: io.Name, BytesSent: io.BytesSent, BytesRecv: io.BytesRecv, }) } return info, nil } func (u *util) readFile(path string) ([]string, error) { file, err := u.root.Open(path) if err != nil { return nil, err } data := []byte{} buf := make([]byte, 20148) for { n, err := file.Read(buf) if n > 0 { data = append(data, buf[:n]...) } if err == io.EOF { break } else if err != nil { return nil, err } } lines := strings.Split(string(data), "\n") for i, line := range lines { lines[i] = strings.TrimSpace(line) } return lines, nil } func cpuTotal(c *cpu.TimesStat) float64 { return c.User + c.System + c.Idle + c.Nice + c.Iowait + c.Irq + c.Softirq + c.Steal + c.Guest + c.GuestNice } func (u *util) GPU() ([]GPUInfo, error) { nvstats, err := u.gpu.Stats() if err != nil { return nil, err } stats := []GPUInfo{} for _, nv := range nvstats { stats = append(stats, GPUInfo{ Index: nv.Index, ID: nv.ID, Name: nv.Name, MemoryTotal: nv.MemoryTotal, MemoryUsed: nv.MemoryUsed, Usage: nv.Usage, Encoder: nv.Encoder, Decoder: nv.Decoder, }) } return stats, nil }