Merge pull request #4751 from kolyshkin/cgroups-002

deps: bump opencontainers/cgroups to v0.0.2, fix tests
This commit is contained in:
Akihiro Suda
2025-06-03 00:39:47 +09:00
committed by GitHub
23 changed files with 222 additions and 124 deletions

2
go.mod
View File

@@ -14,7 +14,7 @@ require (
github.com/moby/sys/user v0.4.0
github.com/moby/sys/userns v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/cgroups v0.0.1
github.com/opencontainers/cgroups v0.0.2
github.com/opencontainers/runtime-spec v1.2.1
github.com/opencontainers/selinux v1.12.0
github.com/seccomp/libseccomp-golang v0.11.0

4
go.sum
View File

@@ -45,8 +45,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/cgroups v0.0.1 h1:MXjMkkFpKv6kpuirUa4USFBas573sSAY082B4CiHEVA=
github.com/opencontainers/cgroups v0.0.1/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
github.com/opencontainers/cgroups v0.0.2 h1:A+mAPPMfgKNCEZUUtibESFx06uvhAmvo8sSz3Abwk7o=
github.com/opencontainers/cgroups v0.0.2/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww=
github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=

View File

@@ -328,7 +328,7 @@ convert_hugetlb_size() {
check_systemd_value "MemoryHigh" 5242880
check_systemd_value "MemoryMax" 10485760
check_systemd_value "TasksMax" 99
check_cpu_quota 10000 100000 "100ms"
check_cpu_quota 10000 100000
check_cpu_weight 42
}
@@ -390,7 +390,7 @@ convert_hugetlb_size() {
[ "$output" = '42' ]
check_systemd_value "TasksMax" 42
check_cpu_quota 5000 50000 "100ms"
check_cpu_quota 5000 50000
check_cpu_weight 42
}

View File

@@ -286,7 +286,33 @@ function check_systemd_value() {
function check_cpu_quota() {
local quota=$1
local period=$2
local sd_quota=$3
local sd_quota
if [ -v RUNC_USE_SYSTEMD ]; then
if [ "$quota" = "-1" ]; then
sd_quota="infinity"
else
# In systemd world, quota (CPUQuotaPerSec) is measured in ms
# (per second), and systemd rounds it up to 10ms. For example,
# given quota=4000 and period=10000, systemd value is 400ms.
#
# Calculate milliseconds (quota/period * 1000).
# First multiply by 1000 to get milliseconds,
# then add half of period for proper rounding.
local ms=$(((quota * 1000 + period / 2) / period))
# Round up to nearest 10ms.
ms=$(((ms + 5) / 10 * 10))
sd_quota="${ms}ms"
# Recalculate quota based on systemd value.
# Convert ms back to quota units.
quota=$((ms * period / 1000))
fi
# Systemd values are the same for v1 and v2.
check_systemd_value "CPUQuotaPerSecUSec" "$sd_quota"
fi
if [ -v CGROUP_V2 ]; then
if [ "$quota" = "-1" ]; then
@@ -297,8 +323,6 @@ function check_cpu_quota() {
check_cgroup_value "cpu.cfs_quota_us" "$quota"
check_cgroup_value "cpu.cfs_period_us" "$period"
fi
# systemd values are the same for v1 and v2
check_systemd_value "CPUQuotaPerSecUSec" "$sd_quota"
# CPUQuotaPeriodUSec requires systemd >= v242
[ "$(systemd_version)" -lt 242 ] && return

View File

@@ -263,26 +263,26 @@ EOF
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]
# check that initial values were properly set
check_cpu_quota 500000 1000000 "500ms"
# Check that initial values were properly set.
check_cpu_quota 500000 1000000
check_cpu_shares 100
# update cpu period
# Update cpu period.
runc update test_update --cpu-period 900000
[ "$status" -eq 0 ]
check_cpu_quota 500000 900000 "560ms"
check_cpu_quota 500000 900000
# update cpu quota
# Update cpu quota.
runc update test_update --cpu-quota 600000
[ "$status" -eq 0 ]
check_cpu_quota 600000 900000 "670ms"
check_cpu_quota 600000 900000
# remove cpu quota
# Remove cpu quota.
runc update test_update --cpu-quota -1
[ "$status" -eq 0 ]
check_cpu_quota -1 900000 "infinity"
check_cpu_quota -1 900000
# update cpu-shares
# Update cpu-shares.
runc update test_update --cpu-share 200
[ "$status" -eq 0 ]
check_cpu_shares 200
@@ -298,21 +298,21 @@ EOF
}
EOF
[ "$status" -eq 0 ]
check_cpu_quota 500000 1000000 "500ms"
check_cpu_quota 500000 1000000
# redo all the changes at once
# Redo all the changes at once.
runc update test_update \
--cpu-period 900000 --cpu-quota 600000 --cpu-share 200
[ "$status" -eq 0 ]
check_cpu_quota 600000 900000 "670ms"
check_cpu_quota 600000 900000
check_cpu_shares 200
# remove cpu quota and reset the period
# Remove cpu quota and reset the period.
runc update test_update --cpu-quota -1 --cpu-period 100000
[ "$status" -eq 0 ]
check_cpu_quota -1 100000 "infinity"
check_cpu_quota -1 100000
# reset to initial test value via json file
# Reset to initial test values via json file.
cat <<EOF >"$BATS_RUN_TMPDIR"/runc-cgroups-integration-test.json
{
"cpu": {
@@ -326,7 +326,7 @@ EOF
runc update -r "$BATS_RUN_TMPDIR"/runc-cgroups-integration-test.json test_update
[ "$status" -eq 0 ]
check_cpu_quota 500000 1000000 "500ms"
check_cpu_quota 500000 1000000
check_cpu_shares 100
}
@@ -363,7 +363,7 @@ EOF
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]
check_cpu_quota -1 1000000 "infinity"
check_cpu_quota -1 1000000
}
@test "set cpu period with no quota (invalid period)" {
@@ -382,7 +382,7 @@ EOF
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]
check_cpu_quota 5000 100000 "50ms"
check_cpu_quota 5000 100000
}
@test "update cpu period with no previous period/quota set" {
@@ -393,10 +393,10 @@ EOF
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]
# update the period alone, no old values were set
# Update the period alone, no old values were set.
runc update --cpu-period 50000 test_update
[ "$status" -eq 0 ]
check_cpu_quota -1 50000 "infinity"
check_cpu_quota -1 50000
}
@test "update cpu quota with no previous period/quota set" {
@@ -407,10 +407,10 @@ EOF
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]
# update the quota alone, no old values were set
# Update the quota alone, no old values were set.
runc update --cpu-quota 30000 test_update
[ "$status" -eq 0 ]
check_cpu_quota 30000 100000 "300ms"
check_cpu_quota 30000 100000
}
@test "update cpu period in a pod cgroup with pod limit set" {
@@ -445,7 +445,7 @@ EOF
# Finally, the test itself: set 30% limit but with lower period.
runc update --cpu-period 10000 --cpu-quota 3000 test_update
[ "$status" -eq 0 ]
check_cpu_quota 3000 10000 "300ms"
check_cpu_quota 3000 10000
}
@test "update cgroup cpu.idle" {
@@ -545,9 +545,9 @@ EOF
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
[ "$status" -eq 0 ]
# check that initial values were properly set
check_cpu_quota 500000 1000000 "500ms"
# initial cpu shares of 100 corresponds to weight of 4
# Check that initial values were properly set.
check_cpu_quota 500000 1000000
# Initial cpu shares of 100 corresponds to weight of 4.
check_cpu_weight 4
check_systemd_value "TasksMax" 20
@@ -561,8 +561,8 @@ EOF
}
EOF
# check the updated systemd unit properties
check_cpu_quota -1 100000 "infinity"
# Check the updated systemd unit properties.
check_cpu_quota -1 100000
check_cpu_weight 16
check_systemd_value "TasksMax" 10
}

View File

@@ -0,0 +1,21 @@
# This is golangci-lint config file which is used to check NEW code in
# github PRs only (see lint-extra in .github/workflows/validate.yml).
#
# For the default linter config, see .golangci.yml. This config should
# only enable additional linters and/or linter settings not enabled
# in the default config.
version: "2"
linters:
default: none
enable:
- godot
- revive
- staticcheck
settings:
staticcheck:
checks:
- all
- -QF1008 # https://staticcheck.dev/docs/checks/#QF1008 Omit embedded fields from selector expression.
exclusions:
generated: strict

31
vendor/github.com/opencontainers/cgroups/.golangci.yml generated vendored Normal file
View File

@@ -0,0 +1,31 @@
# For documentation, see https://golangci-lint.run/usage/configuration/
version: "2"
formatters:
enable:
- gofumpt
exclusions:
generated: strict
linters:
enable:
- errorlint
- nolintlint
- unconvert
- unparam
settings:
govet:
enable:
- nilness
staticcheck:
checks:
- all
- -ST1000 # https://staticcheck.dev/docs/checks/#ST1000 Incorrect or missing package comment.
- -ST1003 # https://staticcheck.dev/docs/checks/#ST1003 Poorly chosen identifier.
- -ST1005 # https://staticcheck.dev/docs/checks/#ST1005 Incorrectly formatted error string.
- -QF1008 # https://staticcheck.dev/docs/checks/#QF1008 Omit embedded fields from selector expression.
exclusions:
generated: strict
presets:
- comments
- std-error-handling

View File

@@ -23,7 +23,7 @@ However, specification releases have special restrictions in the [OCI charter][c
* They are the target of backwards compatibility (§7.g), and
* They are subject to the OFWa patent grant (§8.d and e).
To avoid unfortunate side effects (onerous backwards compatibity requirements or Member resignations), the following additional procedures apply to specification releases:
To avoid unfortunate side effects (onerous backwards compatibility requirements or Member resignations), the following additional procedures apply to specification releases:
### Planning a release

View File

@@ -23,16 +23,16 @@ type Cgroup struct {
// Path specifies the path to cgroups that are created and/or joined by the container.
// The path is assumed to be relative to the host system cgroup mountpoint.
Path string `json:"path"`
Path string `json:"path,omitempty"`
// ScopePrefix describes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// ScopePrefix describes prefix for the scope name.
ScopePrefix string `json:"scope_prefix,omitempty"`
// Resources contains various cgroups settings to apply
*Resources
// Resources contains various cgroups settings to apply.
*Resources `json:"Resources,omitempty"`
// Systemd tells if systemd should be used to manage cgroups.
Systemd bool
Systemd bool `json:"Systemd,omitempty"`
// SystemdProps are any additional properties for systemd,
// derived from org.systemd.property.xxx annotations.
@@ -40,7 +40,7 @@ type Cgroup struct {
SystemdProps []systemdDbus.Property `json:"-"`
// Rootless tells if rootless cgroups should be used.
Rootless bool
Rootless bool `json:"Rootless,omitempty"`
// The host UID that should own the cgroup, or nil to accept
// the default ownership. This should only be set when the
@@ -52,96 +52,96 @@ type Cgroup struct {
type Resources struct {
// Devices is the set of access rules for devices in the container.
Devices []*devices.Rule `json:"devices"`
Devices []*devices.Rule `json:"devices,omitempty"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
// Memory limit (in bytes).
Memory int64 `json:"memory,omitempty"`
// Memory reservation or soft_limit (in bytes)
MemoryReservation int64 `json:"memory_reservation"`
// Memory reservation or soft_limit (in bytes).
MemoryReservation int64 `json:"memory_reservation,omitempty"`
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
MemorySwap int64 `json:"memory_swap"`
// Total memory usage (memory+swap); use -1 for unlimited swap.
MemorySwap int64 `json:"memory_swap,omitempty"`
// CPU shares (relative weight vs. other containers)
CpuShares uint64 `json:"cpu_shares"`
// CPU shares (relative weight vs. other containers).
CpuShares uint64 `json:"cpu_shares,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuShares should be CPUShares".
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
CpuQuota int64 `json:"cpu_quota,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuQuota should be CPUQuota".
// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period.
CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive
CpuBurst *uint64 `json:"cpu_burst,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuBurst should be CPUBurst".
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod uint64 `json:"cpu_period"`
CpuPeriod uint64 `json:"cpu_period,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuPeriod should be CPUPeriod".
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_rt_quota"`
CpuRtRuntime int64 `json:"cpu_rt_quota,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuRtRuntime should be CPURtRuntime".
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod uint64 `json:"cpu_rt_period"`
CpuRtPeriod uint64 `json:"cpu_rt_period,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuQuota should be CPUQuota".
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
// Cpuset CPUs to use.
CpusetCpus string `json:"cpuset_cpus,omitempty"`
// MEM to use
CpusetMems string `json:"cpuset_mems"`
// Cpuset memory nodes to use.
CpusetMems string `json:"cpuset_mems,omitempty"`
// cgroup SCHED_IDLE
// Cgroup's SCHED_IDLE value.
CPUIdle *int64 `json:"cpu_idle,omitempty"`
// Process limit; set <= `0' to disable limit.
PidsLimit int64 `json:"pids_limit"`
PidsLimit int64 `json:"pids_limit,omitempty"`
// Specifies per cgroup weight, range is from 10 to 1000.
BlkioWeight uint16 `json:"blkio_weight"`
BlkioWeight uint16 `json:"blkio_weight,omitempty"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
// Tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only.
BlkioLeafWeight uint16 `json:"blkio_leaf_weight,omitempty"`
// Weight per cgroup per device, can override BlkioWeight.
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device,omitempty"`
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device,omitempty"`
// IO write rate limit per cgroup per device, bytes per second.
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device,omitempty"`
// IO read rate limit per cgroup per device, IO per second.
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device,omitempty"`
// IO write rate limit per cgroup per device, IO per second.
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device,omitempty"`
// set the freeze value for the process
Freezer FreezerState `json:"freezer"`
// Freeze value for the process.
Freezer FreezerState `json:"freezer,omitempty"`
// Hugetlb limit (in bytes)
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
// Hugetlb limit (in bytes).
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit,omitempty"`
// Whether to disable OOM Killer
OomKillDisable bool `json:"oom_kill_disable"`
// Whether to disable OOM killer.
OomKillDisable bool `json:"oom_kill_disable,omitempty"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness *uint64 `json:"memory_swappiness"`
// Tuning swappiness behaviour per cgroup.
MemorySwappiness *uint64 `json:"memory_swappiness,omitempty"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set priority of network traffic for container.
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap,omitempty"`
// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid_u"`
// Set class identifier for container's network packets.
NetClsClassid uint32 `json:"net_cls_classid_u,omitempty"`
// Rdma resource restriction configuration
Rdma map[string]LinuxRdma `json:"rdma"`
// Rdma resource restriction configuration.
Rdma map[string]LinuxRdma `json:"rdma,omitempty"`
// Used on cgroups v2:
// CpuWeight sets a proportional bandwidth limit.
CpuWeight uint64 `json:"cpu_weight"`
CpuWeight uint64 `json:"cpu_weight,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuWeight should be CPUWeight".
// Unified is cgroupv2-only key-value map.
Unified map[string]string `json:"unified"`
Unified map[string]string `json:"unified,omitempty"`
// SkipDevices allows to skip configuring device permissions.
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
@@ -165,5 +165,5 @@ type Resources struct {
// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
// if the new memory limits (Memory and MemorySwap) being set are lower
// than the current memory usage, and reject if so.
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update,omitempty"`
}

View File

@@ -20,10 +20,10 @@ type Device struct {
FileMode os.FileMode `json:"file_mode"`
// Uid of the device.
Uid uint32 `json:"uid"`
Uid uint32 `json:"uid,omitempty"` //nolint:revive // Suppress "var-naming: struct field Uid should be UID".
// Gid of the device.
Gid uint32 `json:"gid"`
Gid uint32 `json:"gid,omitempty"` //nolint:revive // Suppress "var-naming: struct field Gid should be GID".
}
// Permissions is a cgroupv1-style string to represent device access. It

View File

@@ -261,7 +261,7 @@ func (e *emulator) Apply(rule devices.Rule) error {
}
// emulatorFromList takes a reader to a "devices.list"-like source, and returns
// a new Emulator that represents the state of the devices cgroup. Note that
// a new emulator that represents the state of the devices cgroup. Note that
// black-list devices cgroups cannot be fully reconstructed, due to limitations
// in the devices cgroup API. Instead, such cgroups are always treated as
// "allow all" cgroups.
@@ -301,11 +301,12 @@ func emulatorFromList(list io.Reader) (*emulator, error) {
// disruptive rules (like denying all device access) will only be applied if
// necessary.
//
// This function is the sole reason for all of Emulator -- to allow us
// This function is the sole reason for all of emulator -- to allow us
// to figure out how to update a containers' cgroups without causing spurious
// device errors (if possible).
func (source *emulator) Transition(target *emulator) ([]*devices.Rule, error) { //nolint:revive // Ignore receiver-naming warning.
func (e *emulator) Transition(target *emulator) ([]*devices.Rule, error) {
var transitionRules []*devices.Rule
source := e
oldRules := source.rules
// If the default policy doesn't match, we need to include a "disruptive"

View File

@@ -166,7 +166,7 @@ func systemdProperties(r *cgroups.Resources, sdVer int) ([]systemdDbus.Property,
return properties, nil
}
func newProp(name string, units interface{}) systemdDbus.Property {
func newProp(name string, units any) systemdDbus.Property {
return systemdDbus.Property{
Name: name,
Value: dbus.MakeVariant(units),

View File

@@ -57,7 +57,7 @@ func (s *FreezerGroup) Set(path string, r *cgroups.Resources) (Err error) {
// Alas, this is still a game of chances, since the real fix
// belong to the kernel (cgroup v2 do not have this bug).
for i := 0; i < 1000; i++ {
for i := range 1000 {
if i%50 == 49 {
// Occasional thaw and sleep improves
// the chances to succeed in freezing

View File

@@ -108,6 +108,12 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
case "throttled_usec":
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
case "nr_bursts":
stats.CpuStats.BurstData.BurstsPeriods = v
case "burst_usec":
stats.CpuStats.BurstData.BurstTime = v * 1000
}
}
if err := sc.Err(); err != nil {

View File

@@ -18,17 +18,14 @@ import (
// cgroupv2 files with .min, .max, .low, or .high suffix.
// The value of -1 is converted to "max" for cgroupv1 compatibility
// (which used to write -1 to remove the limit).
func numToStr(value int64) (ret string) {
switch {
case value == 0:
ret = ""
case value == -1:
ret = "max"
default:
ret = strconv.FormatInt(value, 10)
func numToStr(value int64) string {
switch value {
case 0:
return ""
case -1:
return "max"
}
return ret
return strconv.FormatInt(value, 10)
}
func isMemorySet(r *cgroups.Resources) bool {
@@ -57,7 +54,7 @@ func setMemory(dirPath string, r *cgroups.Resources) error {
if swapStr != "" {
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
// If swap is not enabled, silently ignore setting to max or disabling it.
if !(errors.Is(err, os.ErrNotExist) && (swapStr == "max" || swapStr == "0")) {
if !(errors.Is(err, os.ErrNotExist) && (swapStr == "max" || swapStr == "0")) { //nolint:staticcheck // Ignore "QF1001: could apply De Morgan's law".
return err
}
}

View File

@@ -50,7 +50,7 @@ func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
if err != nil {
return nil, err
}
defer fd.Close() //nolint:errorlint
defer fd.Close()
scanner := bufio.NewScanner(fd)
for scanner.Scan() {
parts := strings.SplitN(scanner.Text(), " ", 4)

View File

@@ -9,6 +9,14 @@ type ThrottlingData struct {
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
type BurstData struct {
// Number of periods bandwidth burst occurs
BurstsPeriods uint64 `json:"bursts_periods,omitempty"`
// Cumulative wall-time that any cpus has used above quota in respective periods
// Units: nanoseconds.
BurstTime uint64 `json:"burst_time,omitempty"`
}
// CpuUsage denotes the usage of a CPU.
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
@@ -48,6 +56,7 @@ type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
BurstData BurstData `json:"burst_data,omitempty"`
}
type CPUSetStats struct {

View File

@@ -89,7 +89,7 @@ func ExpandSlice(slice string) (string, error) {
return path, nil
}
func newProp(name string, units interface{}) systemdDbus.Property {
func newProp(name string, units any) systemdDbus.Property {
return systemdDbus.Property{
Name: name,
Value: dbus.MakeVariant(units),
@@ -266,7 +266,7 @@ func systemdVersionAtoi(str string) (int, error) {
// Unconditionally remove the leading prefix ("v).
str = strings.TrimLeft(str, `"v`)
// Match on the first integer we can grab.
for i := 0; i < len(str); i++ {
for i := range len(str) {
if str[i] < '0' || str[i] > '9' {
// First non-digit: cut the tail.
str = str[:i]
@@ -280,7 +280,9 @@ func systemdVersionAtoi(str string) (int, error) {
return ver, nil
}
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
// addCPUQuota adds CPUQuotaPeriodUSec and CPUQuotaPerSecUSec to the properties. The passed quota may be modified
// along with round-up during calculation in order to write the same value to cgroupfs later.
func addCPUQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota *int64, period uint64) {
if period != 0 {
// systemd only supports CPUQuotaPeriodUSec since v242
sdVer := systemdVersion(cm)
@@ -292,10 +294,10 @@ func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota
" (setting will still be applied to cgroupfs)", sdVer)
}
}
if quota != 0 || period != 0 {
if *quota != 0 || period != 0 {
// corresponds to USEC_INFINITY in systemd
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
if quota > 0 {
if *quota > 0 {
if period == 0 {
// assume the default
period = defCPUQuotaPeriod
@@ -304,9 +306,11 @@ func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota
// (integer percentage of CPU) internally. This means that if a fractional percent of
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
cpuQuotaPerSecUSec = uint64(*quota*1000000) / period
if cpuQuotaPerSecUSec%10000 != 0 {
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
// Update the requested quota along with the round-up in order to write the same value to cgroupfs.
*quota = int64(cpuQuotaPerSecUSec) * int64(period) / 1000000
}
}
*properties = append(*properties,

View File

@@ -90,7 +90,7 @@ func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]syst
newProp("CPUShares", r.CpuShares))
}
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod)
if r.BlkioWeight != 0 {
properties = append(properties,
@@ -334,6 +334,9 @@ func (m *LegacyManager) Set(r *cgroups.Resources) error {
if r.Unified != nil {
return cgroups.ErrV1NoUnified
}
// Use a copy since CpuQuota in r may be modified.
rCopy := *r
r = &rCopy
properties, err := genV1ResourcesProperties(r, m.dbus)
if err != nil {
return err

View File

@@ -113,7 +113,7 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err)
}
}
addCpuQuota(cm, &props, quota, period)
addCPUQuota(cm, &props, &quota, period)
case "cpu.weight":
if shouldSetCPUIdle(cm, strings.TrimSpace(res["cpu.idle"])) {
@@ -254,7 +254,7 @@ func genV2ResourcesProperties(dirPath string, r *cgroups.Resources, cm *dbusConn
}
}
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod)
if r.PidsLimit > 0 || r.PidsLimit == -1 {
properties = append(properties,
@@ -480,6 +480,9 @@ func (m *UnifiedManager) Set(r *cgroups.Resources) error {
if r == nil {
return nil
}
// Use a copy since CpuQuota in r may be modified.
rCopy := *r
r = &rCopy
properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus)
if err != nil {
return err

View File

@@ -231,7 +231,7 @@ func rmdir(path string, retry bool) error {
again:
err := unix.Rmdir(path)
switch err { // nolint:errorlint // unix errors are bare
switch err {
case nil, unix.ENOENT:
return nil
case unix.EINTR:
@@ -395,7 +395,7 @@ func WriteCgroupProc(dir string, pid int) error {
}
defer file.Close()
for i := 0; i < 5; i++ {
for range 5 {
_, err = file.WriteString(strconv.Itoa(pid))
if err == nil {
return nil

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"sync"
"syscall"
@@ -144,10 +145,8 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string,
func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
for _, mi := range mounts {
if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
for _, opt := range strings.Split(mi.VFSOptions, ",") {
if opt == subsystem {
return mi.Mountpoint, mi.Root, nil
}
if slices.Contains(strings.Split(mi.VFSOptions, ","), subsystem) {
return mi.Mountpoint, mi.Root, nil
}
}
}

2
vendor/modules.txt vendored
View File

@@ -51,7 +51,7 @@ github.com/moby/sys/userns
# github.com/mrunalp/fileutils v0.5.1
## explicit; go 1.13
github.com/mrunalp/fileutils
# github.com/opencontainers/cgroups v0.0.1
# github.com/opencontainers/cgroups v0.0.2
## explicit; go 1.23.0
github.com/opencontainers/cgroups
github.com/opencontainers/cgroups/devices