Merge pull request #4830 from marquiz/devel/rdt-schemata-field

libcontainer/intelrdt: add support for Schemata field
This commit is contained in:
Kir Kolyshkin
2025-09-16 13:23:43 -07:00
committed by GitHub
14 changed files with 205 additions and 136 deletions

View File

@@ -173,6 +173,8 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
if intelrdt.IsCMTEnabled() {
s.IntelRdt.CMTStats = is.CMTStats
}
s.IntelRdt.Schemata = is.Schemata
}
s.NetworkInterfaces = ls.Interfaces

View File

@@ -56,7 +56,8 @@ var featuresCommand = cli.Command{
Enabled: &t,
},
IntelRdt: &features.IntelRdt{
Enabled: &t,
Enabled: &t,
Schemata: &t,
},
MountExtensions: &features.MountExtensions{
IDMap: &features.IDMap{

2
go.mod
View File

@@ -15,7 +15,7 @@ require (
github.com/moby/sys/userns v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/cgroups v0.0.4
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0
github.com/opencontainers/selinux v1.12.0
github.com/seccomp/libseccomp-golang v0.11.1
github.com/sirupsen/logrus v1.9.3

4
go.sum
View File

@@ -46,8 +46,8 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4=
github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67 h1:Q+KewUGTMamIe6Q39xCD/T1NC1POmaTlWnhjikCrZHA=
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0 h1:RLn0YfUWkiqPGtgUANvJrcjIkCHGRl3jcz/c557M28M=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

View File

@@ -4,6 +4,10 @@ type IntelRdt struct {
// The identity for RDT Class of Service
ClosID string `json:"closID,omitempty"`
// Schemata is a generic field to specify schemata file in the resctrl
// filesystem. Each element represents one line written to the schemata file.
Schemata []string `json:"schemata,omitempty"`
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`

View File

@@ -326,16 +326,6 @@ func getIntelRdtParamString(path, file string) (string, error) {
return string(bytes.TrimSpace(contents)), nil
}
func writeFile(dir, file, data string) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err))
}
return nil
}
// Get the read-only L3 cache information
func getL3CacheInfo() (*L3CacheInfo, error) {
l3CacheInfo := &L3CacheInfo{}
@@ -462,11 +452,11 @@ func (m *Manager) Apply(pid int) (err error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" {
if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" && len(m.config.IntelRdt.Schemata) == 0 {
// Check that the CLOS exists, i.e. it has been pre-configured to
// conform with the runtime spec
if _, err := os.Stat(path); err != nil {
return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err)
return fmt.Errorf("clos dir not accessible (must be pre-created when schemata, l3CacheSchema and memBwSchema are empty): %w", err)
}
}
@@ -534,6 +524,8 @@ func (m *Manager) GetStats() (*Stats, error) {
}
schemaStrings := strings.Split(tmpStrings, "\n")
stats.Schemata = schemaStrings
if IsCATEnabled() {
// The read-only L3 cache information
l3CacheInfo, err := getL3CacheInfo()
@@ -637,35 +629,24 @@ func (m *Manager) Set(container *configs.Config) error {
// For example, on a two-socket machine, the schema line could be
// "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on
// socket 0 and 7000 MBps memory bandwidth limit on socket 1.
if container.IntelRdt != nil {
path := m.GetPath()
l3CacheSchema := container.IntelRdt.L3CacheSchema
memBwSchema := container.IntelRdt.MemBwSchema
if r := container.IntelRdt; r != nil {
// TODO: verify that l3CacheSchema and/or memBwSchema match the
// existing schemata if ClosID has been specified. This is a more
// involved than reading the file and doing plain string comparison as
// the value written in does not necessarily match what gets read out
// (leading zeros, cache id ordering etc).
// Write a single joint schema string to schemata file
if l3CacheSchema != "" && memBwSchema != "" {
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
return err
var schemata strings.Builder
for _, s := range append([]string{r.L3CacheSchema, r.MemBwSchema}, r.Schemata...) {
if s != "" {
schemata.WriteString(s)
schemata.WriteString("\n")
}
}
// Write only L3 cache schema string to schemata file
if l3CacheSchema != "" && memBwSchema == "" {
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
return err
}
}
// Write only memory bandwidth schema string to schemata file
if l3CacheSchema == "" && memBwSchema != "" {
if err := writeFile(path, "schemata", memBwSchema); err != nil {
return err
if schemata.Len() > 0 {
path := filepath.Join(m.GetPath(), "schemata")
if err := os.WriteFile(path, []byte(schemata.String()), 0o600); err != nil {
return newLastCmdError(fmt.Errorf("intelrdt: unable to write %q: %w", schemata.String(), err))
}
}
}

View File

@@ -3,97 +3,125 @@ package intelrdt
import (
"os"
"path/filepath"
"slices"
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestIntelRdtSetL3CacheSchema(t *testing.T) {
helper := NewIntelRdtTestUtil(t)
const (
l3CacheSchemaBefore = "L3:0=f;1=f0"
l3CacheSchemeAfter = "L3:0=f0;1=f"
)
helper.writeFileContents(map[string]string{
"schemata": l3CacheSchemaBefore + "\n",
})
helper.config.IntelRdt.L3CacheSchema = l3CacheSchemeAfter
intelrdt := newManager(helper.config, "", helper.IntelRdtPath)
if err := intelrdt.Set(helper.config); err != nil {
t.Fatal(err)
func TestIntelRdtSet(t *testing.T) {
tcs := []struct {
name string
config *configs.IntelRdt
schemataAfter []string
}{
{
name: "L3",
config: &configs.IntelRdt{
L3CacheSchema: "L3:0=f0;1=f",
},
schemataAfter: []string{"L3:0=f0;1=f"},
},
{
name: "MemBw",
config: &configs.IntelRdt{
MemBwSchema: "MB:0=70;1=20",
},
schemataAfter: []string{"MB:0=70;1=20"},
},
{
name: "MemBwSc",
config: &configs.IntelRdt{
MemBwSchema: "MB:0=9000;1=4000",
},
schemataAfter: []string{"MB:0=9000;1=4000"},
},
{
name: "L3 and MemBw",
config: &configs.IntelRdt{
L3CacheSchema: "L3:0=f0;1=f",
MemBwSchema: "MB:0=9000;1=4000",
},
schemataAfter: []string{
"L3:0=f0;1=f",
"MB:0=9000;1=4000",
},
},
{
name: "Schemata",
config: &configs.IntelRdt{
Schemata: []string{
"L3CODE:0=ff;1=ff",
"L3DATA:0=f;1=f0",
},
},
schemataAfter: []string{
"L3CODE:0=ff;1=ff",
"L3DATA:0=f;1=f0",
},
},
{
name: "Schemata and L3",
config: &configs.IntelRdt{
L3CacheSchema: "L3:0=f0;1=f",
Schemata: []string{"L2:0=ff00;1=ff"},
},
schemataAfter: []string{
"L3:0=f0;1=f",
"L2:0=ff00;1=ff",
},
},
{
name: "Schemata and MemBw",
config: &configs.IntelRdt{
MemBwSchema: "MB:0=2000;1=4000",
Schemata: []string{"L3:0=ff;1=ff"},
},
schemataAfter: []string{
"MB:0=2000;1=4000",
"L3:0=ff;1=ff",
},
},
{
name: "Schemata, L3 and MemBw",
config: &configs.IntelRdt{
L3CacheSchema: "L3:0=80;1=7f",
MemBwSchema: "MB:0=2000;1=4000",
Schemata: []string{
"L2:0=ff00;1=ff",
"L3:0=c0;1=3f",
},
},
schemataAfter: []string{
"L3:0=80;1=7f",
"MB:0=2000;1=4000",
"L2:0=ff00;1=ff",
"L3:0=c0;1=3f",
},
},
}
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
value := values[0]
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
helper := NewIntelRdtTestUtil(t)
helper.config.IntelRdt = tc.config
if value != l3CacheSchemeAfter {
t.Fatal("Got the wrong value, set 'schemata' failed.")
}
}
intelrdt := newManager(helper.config, "", helper.IntelRdtPath)
if err := intelrdt.Set(helper.config); err != nil {
t.Fatal(err)
}
func TestIntelRdtSetMemBwSchema(t *testing.T) {
helper := NewIntelRdtTestUtil(t)
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
const (
memBwSchemaBefore = "MB:0=20;1=70"
memBwSchemeAfter = "MB:0=70;1=20"
)
helper.writeFileContents(map[string]string{
"schemata": memBwSchemaBefore + "\n",
})
helper.config.IntelRdt.MemBwSchema = memBwSchemeAfter
intelrdt := newManager(helper.config, "", helper.IntelRdtPath)
if err := intelrdt.Set(helper.config); err != nil {
t.Fatal(err)
}
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
value := values[0]
if value != memBwSchemeAfter {
t.Fatal("Got the wrong value, set 'schemata' failed.")
}
}
func TestIntelRdtSetMemBwScSchema(t *testing.T) {
helper := NewIntelRdtTestUtil(t)
const (
memBwScSchemaBefore = "MB:0=5000;1=7000"
memBwScSchemeAfter = "MB:0=9000;1=4000"
)
helper.writeFileContents(map[string]string{
"schemata": memBwScSchemaBefore + "\n",
})
helper.config.IntelRdt.MemBwSchema = memBwScSchemeAfter
intelrdt := newManager(helper.config, "", helper.IntelRdtPath)
if err := intelrdt.Set(helper.config); err != nil {
t.Fatal(err)
}
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
value := values[0]
if value != memBwScSchemeAfter {
t.Fatal("Got the wrong value, set 'schemata' failed.")
if slices.Compare(values, tc.schemataAfter) != 0 {
t.Fatalf("Got the wrong value, expected %v, got %v", tc.schemataAfter, values)
}
})
}
}

View File

@@ -45,6 +45,9 @@ type Stats struct {
// The memory bandwidth schema in 'container_id' group
MemBwSchema string `json:"mem_bw_schema,omitempty"`
// Schemata contains the full schemata of the ClosID (resctrl group) that the container is assigned to.
Schemata []string `json:"schemata,omitempty"`
// The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group
MBMStats *[]MBMNumaNodeStats `json:"mbm_stats,omitempty"`

View File

@@ -40,13 +40,3 @@ func NewIntelRdtTestUtil(t *testing.T) *intelRdtTestUtil {
}
return &intelRdtTestUtil{config: config, IntelRdtPath: testIntelRdtPath, t: t}
}
// Write the specified contents on the mock of the specified Intel RDT "resource control" files
func (c *intelRdtTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.IntelRdtPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View File

@@ -463,6 +463,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
if spec.Linux.IntelRdt != nil {
config.IntelRdt = &configs.IntelRdt{
ClosID: spec.Linux.IntelRdt.ClosID,
Schemata: spec.Linux.IntelRdt.Schemata,
L3CacheSchema: spec.Linux.IntelRdt.L3CacheSchema,
MemBwSchema: spec.Linux.IntelRdt.MemBwSchema,
}

View File

@@ -143,6 +143,9 @@ type IntelRdt struct {
// The memory bandwidth schema in 'container_id' group
MemBwSchema string `json:"mem_bw_schema,omitempty"`
// Schemata contains the full schemata of the ClosID (resctrl group) that the container is assigned to.
Schemata []string `json:"schemata,omitempty"`
// The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group
MBMStats *[]intelrdt.MBMNumaNodeStats `json:"mbm_stats,omitempty"`

View File

@@ -251,6 +251,8 @@ type Linux struct {
// IntelRdt contains Intel Resource Director Technology (RDT) information for
// handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
// MemoryPolicy contains NUMA memory policy for the container.
MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"`
// Personality contains configuration for the Linux personality syscall
Personality *LinuxPersonality `json:"personality,omitempty"`
// TimeOffsets specifies the offset for supporting time namespaces.
@@ -836,23 +838,41 @@ type LinuxSyscall struct {
type LinuxIntelRdt struct {
// The identity for RDT Class of Service
ClosID string `json:"closID,omitempty"`
// Schemata specifies the complete schemata to be written as is to the
// schemata file in resctrl fs. Each element represents a single line in the schemata file.
// NOTE: This will overwrite schemas specified in the L3CacheSchema and/or
// MemBwSchema fields.
Schemata []string `json:"schemata,omitempty"`
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
// NOTE: Should not be specified if Schemata is non-empty.
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
// The schema of memory bandwidth per L3 cache id
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
// The unit of memory bandwidth is specified in "percentages" by
// default, and in "MBps" if MBA Software Controller is enabled.
// NOTE: Should not be specified if Schemata is non-empty.
MemBwSchema string `json:"memBwSchema,omitempty"`
// EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of
// the last-level cache (LLC) occupancy for the container.
EnableCMT bool `json:"enableCMT,omitempty"`
// EnableMonitoring enables resctrl monitoring for the container. This will
// create a dedicated resctrl monitoring group for the container.
EnableMonitoring bool `json:"enableMonitoring,omitempty"`
}
// EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of
// total and local memory bandwidth for the container.
EnableMBM bool `json:"enableMBM,omitempty"`
// LinuxMemoryPolicy represents input for the set_mempolicy syscall.
type LinuxMemoryPolicy struct {
// Mode for the set_mempolicy syscall.
Mode MemoryPolicyModeType `json:"mode"`
// Nodes representing the nodemask for the set_mempolicy syscall in comma separated ranges format.
// Format: "<node0>-<node1>,<node2>,<node3>-<node4>,..."
Nodes string `json:"nodes"`
// Flags for the set_mempolicy syscall.
Flags []MemoryPolicyFlagType `json:"flags,omitempty"`
}
// ZOS contains platform-specific configuration for z/OS based containers.
@@ -884,6 +904,26 @@ const (
ZOSUTSNamespace ZOSNamespaceType = "uts"
)
type MemoryPolicyModeType string
const (
MpolDefault MemoryPolicyModeType = "MPOL_DEFAULT"
MpolBind MemoryPolicyModeType = "MPOL_BIND"
MpolInterleave MemoryPolicyModeType = "MPOL_INTERLEAVE"
MpolWeightedInterleave MemoryPolicyModeType = "MPOL_WEIGHTED_INTERLEAVE"
MpolPreferred MemoryPolicyModeType = "MPOL_PREFERRED"
MpolPreferredMany MemoryPolicyModeType = "MPOL_PREFERRED_MANY"
MpolLocal MemoryPolicyModeType = "MPOL_LOCAL"
)
type MemoryPolicyFlagType string
const (
MpolFNumaBalancing MemoryPolicyFlagType = "MPOL_F_NUMA_BALANCING"
MpolFRelativeNodes MemoryPolicyFlagType = "MPOL_F_RELATIVE_NODES"
MpolFStaticNodes MemoryPolicyFlagType = "MPOL_F_STATIC_NODES"
)
// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler
type LinuxSchedulerPolicy string

View File

@@ -47,6 +47,7 @@ type Linux struct {
Apparmor *Apparmor `json:"apparmor,omitempty"`
Selinux *Selinux `json:"selinux,omitempty"`
IntelRdt *IntelRdt `json:"intelRdt,omitempty"`
MemoryPolicy *MemoryPolicy `json:"memoryPolicy,omitempty"`
MountExtensions *MountExtensions `json:"mountExtensions,omitempty"`
NetDevices *NetDevices `json:"netDevices,omitempty"`
}
@@ -130,6 +131,21 @@ type IntelRdt struct {
// Unrelated to whether the host supports Intel RDT or not.
// Nil value means "unknown", not "false".
Enabled *bool `json:"enabled,omitempty"`
// Schemata is true if the "linux.intelRdt.enableMonitoring" field of the
// spec is implemented.
Schemata *bool `json:"schemata,omitempty"`
// Monitoring is true if the "linux.intelRdt.enableMonitoring" field of the
// spec is implemented.
// Nil value means "unknown", not "false".
Monitoring *bool `json:"monitoring,omitempty"`
}
// MemoryPolicy represents the "memoryPolicy" field.
type MemoryPolicy struct {
// modes is the list of known memory policy modes, e.g., "MPOL_INTERLEAVE".
Modes []string `json:"modes,omitempty"`
// flags is the list of known memory policy mode flags, e.g., "MPOL_F_STATIC_NODES".
Flags []string `json:"flags,omitempty"`
}
// MountExtensions represents the "mountExtensions" field.

2
vendor/modules.txt vendored
View File

@@ -62,7 +62,7 @@ github.com/opencontainers/cgroups/fscommon
github.com/opencontainers/cgroups/internal/path
github.com/opencontainers/cgroups/manager
github.com/opencontainers/cgroups/systemd
# github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67
# github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0
## explicit
github.com/opencontainers/runtime-spec/specs-go
github.com/opencontainers/runtime-spec/specs-go/features