diff --git a/go.mod b/go.mod index a2c363ce1d4..0c84823978d 100644 --- a/go.mod +++ b/go.mod @@ -12,9 +12,9 @@ require ( github.com/godbus/dbus/v5 v5.1.0 github.com/moby/sys/mountinfo v0.6.1 github.com/mrunalp/fileutils v0.5.0 - github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 - github.com/opencontainers/selinux v1.10.1 - github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 + github.com/opencontainers/runtime-spec v1.0.3-0.20210910115017-0d6cc581aeea + github.com/opencontainers/selinux v1.9.1 + github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 github.com/sirupsen/logrus v1.8.1 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 github.com/urfave/cli v1.22.6 diff --git a/go.sum b/go.sum index 30d6ee33207..8f61a17c402 100644 --- a/go.sum +++ b/go.sum @@ -36,8 +36,10 @@ github.com/moby/sys/mountinfo v0.6.1 h1:+H/KnGEAGRpTrEAqNVQ2AM3SiwMgJUt/TXj+Z8cm github.com/moby/sys/mountinfo v0.6.1/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= -github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.0.3-0.20210910115017-0d6cc581aeea h1:WmF5mV2OwWlHap/Ol8Z+iLZVlvLJrG7PzO/j8vwSLz8= +github.com/opencontainers/runtime-spec v1.0.3-0.20210910115017-0d6cc581aeea/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/selinux v1.9.1 h1:b4VPEF3O5JLZgdTDBmGepaaIbAo0GqoF6EBRq5f/g3Y= +github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w= github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -46,6 +48,8 @@ github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBO github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 h1:58EBmR2dMNL2n/FnbQewK3D14nXr0V9CObDSvMJLq+Y= +github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 h1:RpforrEYXWkmGwJHIGnLZ3tTWStkjVVstwzNGqxX2Ds= github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md index 07ebdc12153..ac31bec65ab 100644 --- a/libcontainer/SPEC.md +++ b/libcontainer/SPEC.md @@ -158,32 +158,38 @@ init process will block waiting for the parent to finish setup. ### IntelRdt Intel platforms with new Xeon CPU support Resource Director Technology (RDT). -Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are -two sub-features of RDT. +Cache Allocation Technology (CAT), Cache Monitoring Technology (CMT), +Memory Bandwidth Allocation (MBA) and Memory Bandwidth Monitoring (MBM) are +four sub-features of RDT. Cache Allocation Technology (CAT) provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). +Cache Monitoring Technology (CMT) supports monitoring of the last-level cache (LLC) occupancy +for each running thread simultaneously. + Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle over memory bandwidth for the software. A user controls the resource by -indicating the percentage of maximum memory bandwidth or memory bandwidth limit -in MBps unit if MBA Software Controller is enabled. +indicating the percentage of maximum memory bandwidth or memory bandwidth +limit in MBps unit if MBA Software Controller is enabled. + +Memory Bandwidth Monitoring (MBM) supports monitoring of total and local memory bandwidth +for each running thread simultaneously. -It can be used to handle L3 cache and memory bandwidth resources allocation -for containers if hardware and kernel support Intel RDT CAT and MBA features. +More details about Intel RDT CAT and MBA can be found in the section 17.18 and 17.19, Volume 3 +of Intel Software Developer Manual: +https://software.intel.com/en-us/articles/intel-sdm -In Linux 4.10 kernel or newer, the interface is defined and exposed via +About Intel RDT kernel interface: +In Linux 4.14 kernel or newer, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. -CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via -"resource control" filesystem. - Intel RDT "resource control" filesystem hierarchy: ``` mount -t resctrl resctrl /sys/fs/resctrl @@ -194,25 +200,46 @@ tree /sys/fs/resctrl | | |-- cbm_mask | | |-- min_cbm_bits | | |-- num_closids +| |-- L3_MON +| | |-- max_threshold_occupancy +| | |-- mon_features +| | |-- num_rmids | |-- MB | |-- bandwidth_gran | |-- delay_linear | |-- min_bandwidth | |-- num_closids -|-- ... +|-- mon_groups + |-- + |-- ... + |-- mon_data + |-- mon_L3_00 + |-- llc_occupancy + |-- mbm_local_bytes + |-- mbm_total_bytes + |-- ... + |-- tasks |-- schemata |-- tasks -|-- +|-- |-- ... - |-- schemata + |-- mon_data + |-- mon_L3_00 + |-- llc_occupancy + |-- mbm_local_bytes + |-- mbm_total_bytes + |-- ... |-- tasks + |-- schemata +|-- ... ``` For runc, we can make use of `tasks` and `schemata` configuration for L3 -cache and memory bandwidth resources constraints. +cache and memory bandwidth resources constraints, `mon_data` directory for +CMT and MBM statistics. The file `tasks` has a list of tasks that belongs to this group (e.g., -" group). Tasks can be added to a group by writing the task ID +"" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. @@ -224,7 +251,7 @@ L3 cache schema: It has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). ``` - Format: "L3:=;=;..." +Format: "L3:=;=;..." ``` For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. @@ -240,7 +267,7 @@ Memory bandwidth schema: It has allocation values for memory bandwidth on each socket, which contains L3 cache id and memory bandwidth. ``` - Format: "MB:=bandwidth0;=bandwidth1;..." +Format: "MB:=bandwidth0;=bandwidth1;..." ``` For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" @@ -251,8 +278,10 @@ that is allocated is also dependent on the CPU model and can be looked up at min_bw + N * bw_gran. Intermediate values are rounded to the next control step available on the hardware. -If MBA Software Controller is enabled through mount option "-o mba_MBps" +If MBA Software Controller is enabled through mount option "-o mba_MBps": +``` mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl +``` We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit instead of "percentages". The kernel underneath would use a software feedback mechanism or a "Software Controller" which reads the actual bandwidth using @@ -263,11 +292,12 @@ For example, on a two-socket machine, the schema line could be "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 and 7000 MBps memory bandwidth limit on socket 1. -For more information about Intel RDT kernel interface: +For more information about Intel RDT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt -``` + An example for runc: +``` Consider a two-socket machine with two L3 caches where the default CBM is 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% with a memory bandwidth granularity of 10%. @@ -281,7 +311,17 @@ maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. "closID": "guaranteed_group", "l3CacheSchema": "L3:0=7f0;1=1f", "memBwSchema": "MB:0=20;1=70" - } + } +} +``` +Another example: +``` +We only want to monitor memory bandwidth and llc occupancy. +"linux": { + "intelRdt": { + "enableMBM": true, + "enableCMT": true + } } ``` diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 7cf2fb65751..7c324bb109f 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -196,7 +196,7 @@ type Config struct { NoNewKeyring bool `json:"no_new_keyring"` // IntelRdt specifies settings for Intel RDT group that the container is placed into - // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available + // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available. IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` // RootlessEUID is set when the runc was launched with non-zero EUID. diff --git a/libcontainer/configs/intelrdt.go b/libcontainer/configs/intelrdt.go index f8d951ab8b9..c778b43a948 100644 --- a/libcontainer/configs/intelrdt.go +++ b/libcontainer/configs/intelrdt.go @@ -13,4 +13,12 @@ type IntelRdt struct { // The unit of memory bandwidth is specified in "percentages" by // default, and in "MBps" if MBA Software Controller is enabled. MemBwSchema string `json:"memBwSchema,omitempty"` + + // The flag to indicate if Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of + // the last-level cache (LLC) occupancy for the container. + EnableCMT bool `json:"enableCMT,omitempty"` + + // The flag to indicate if Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of + // total and local memory bandwidth for the container. + EnableMBM bool `json:"enableMBM,omitempty"` } diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index f69c07c04a3..b722a9073c5 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -226,6 +226,12 @@ func intelrdtCheck(config *configs.Config) error { if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" { return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled") } + if !intelrdt.IsCMTEnabled() && config.IntelRdt.EnableCMT { + return errors.New("intelRdt.enableCMT is specified in config, but Intel RDT/CMT is not enabled") + } + if !intelrdt.IsMBMEnabled() && config.IntelRdt.EnableMBM { + return errors.New("intelRdt.enableMBM is specified in config, but Intel RDT/MBM is not enabled") + } } return nil diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index a2ceb7857db..269a2aa634b 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -2012,6 +2012,7 @@ func (c *Container) currentState() (*State, error) { if c.intelRdtManager != nil { intelRdtPath = c.intelRdtManager.GetPath() } + state := &State{ BaseState: BaseState{ ID: c.ID(), diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go index 3953f930d25..64bedb73a7a 100644 --- a/libcontainer/intelrdt/intelrdt.go +++ b/libcontainer/intelrdt/intelrdt.go @@ -21,34 +21,38 @@ import ( /* * About Intel RDT features: * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). - * Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are - * two sub-features of RDT. + * Cache Allocation Technology (CAT), Cache Monitoring Technology (CMT), + * Memory Bandwidth Allocation (MBA) and Memory Bandwidth Monitoring (MBM) are + * four sub-features of RDT. * * Cache Allocation Technology (CAT) provides a way for the software to restrict * cache allocation to a defined 'subset' of L3 cache which may be overlapping * with other 'subsets'. The different subsets are identified by class of * service (CLOS) and each CLOS has a capacity bitmask (CBM). * + * Cache Monitoring Technology (CMT) supports monitoring of the last-level cache (LLC) occupancy + * for each running thread simultaneously. + * * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle * over memory bandwidth for the software. A user controls the resource by * indicating the percentage of maximum memory bandwidth or memory bandwidth * limit in MBps unit if MBA Software Controller is enabled. * - * More details about Intel RDT CAT and MBA can be found in the section 17.18 + * Memory Bandwidth Monitoring (MBM) supports monitoring of total and local memory bandwidth + * for each running thread simultaneously. + * + * More details about Intel RDT CAT and MBA can be found in the section 17.18 and 17.19, Volume 3 * of Intel Software Developer Manual: * https://software.intel.com/en-us/articles/intel-sdm * * About Intel RDT kernel interface: - * In Linux 4.10 kernel or newer, the interface is defined and exposed via + * In Linux 4.14 kernel or newer, the interface is defined and exposed via * "resource control" filesystem, which is a "cgroup-like" interface. * * Comparing with cgroups, it has similar process management lifecycle and * interfaces in a container. But unlike cgroups' hierarchy, it has single level * filesystem layout. * - * CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via - * "resource control" filesystem. - * * Intel RDT "resource control" filesystem hierarchy: * mount -t resctrl resctrl /sys/fs/resctrl * tree /sys/fs/resctrl @@ -67,19 +71,37 @@ import ( * | |-- delay_linear * | |-- min_bandwidth * | |-- num_closids - * |-- ... + * |-- mon_groups + * |-- + * |-- ... + * |-- mon_data + * |-- mon_L3_00 + * |-- llc_occupancy + * |-- mbm_local_bytes + * |-- mbm_total_bytes + * |-- ... + * |-- tasks * |-- schemata * |-- tasks * |-- * |-- ... - * |-- schemata + * |-- mon_data + * |-- mon_L3_00 + * |-- llc_occupancy + * |-- mbm_local_bytes + * |-- mbm_total_bytes + * |-- ... * |-- tasks + * |-- schemata + * |-- ... + * * * For runc, we can make use of `tasks` and `schemata` configuration for L3 - * cache and memory bandwidth resources constraints. + * cache and memory bandwidth resources constraints, `mon_data` directory for + * CMT and MBM statistics. * * The file `tasks` has a list of tasks that belongs to this group (e.g., - * " group). Tasks can be added to a group by writing the task ID + * "" group). Tasks can be added to a group by writing the task ID * to the "tasks" file (which will automatically remove them from the previous * group to which they belonged). New tasks created by fork(2) and clone(2) are * added to the same group as their parent. @@ -90,7 +112,9 @@ import ( * L3 cache schema: * It has allocation bitmasks/values for L3 cache on each socket, which * contains L3 cache id and capacity bitmask (CBM). - * Format: "L3:=;=;..." + * + * Format: "L3:=;=;..." + * * For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. * @@ -104,7 +128,9 @@ import ( * Memory bandwidth schema: * It has allocation values for memory bandwidth on each socket, which contains * L3 cache id and memory bandwidth. - * Format: "MB:=bandwidth0;=bandwidth1;..." + * + * Format: "MB:=bandwidth0;=bandwidth1;..." + * * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" * * The minimum bandwidth percentage value for each CPU model is predefined and @@ -129,7 +155,9 @@ import ( * For more information about Intel RDT kernel interface: * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt * + * * An example for runc: + * * Consider a two-socket machine with two L3 caches where the default CBM is * 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% * with a memory bandwidth granularity of 10%. @@ -142,15 +170,27 @@ import ( * "intelRdt": { * "l3CacheSchema": "L3:0=7f0;1=1f", * "memBwSchema": "MB:0=20;1=70" - * } + * } + * } + * + * Another example: + * + * We only want to monitor memory bandwidth and llc occupancy. + * "linux": { + * "intelRdt": { + * "enableMBM": true, + * "enableCMT": true + * } * } + * */ type Manager struct { - mu sync.Mutex - config *configs.Config - id string - path string + mu sync.Mutex + config *configs.Config + id string + path string + monitoringGroup bool } // NewManager returns a new instance of Manager, or nil, if the Intel RDT @@ -166,15 +206,26 @@ func NewManager(config *configs.Config, id string, path string) *Manager { // newManager is the same as NewManager, except it does not check if the feature // is actually available. Used by unit tests that mock intelrdt paths. func newManager(config *configs.Config, id string, path string) *Manager { + var monitoringGroup bool + if config.IntelRdt.L3CacheSchema != "" || config.IntelRdt.MemBwSchema != "" || config.IntelRdt.ClosID != "" { + monitoringGroup = false + } else if config.IntelRdt.EnableCMT || config.IntelRdt.EnableMBM { + monitoringGroup = true + } else { + return nil + } + return &Manager{ - config: config, - id: id, - path: path, + config: config, + id: id, + path: path, + monitoringGroup: monitoringGroup, } } const ( - intelRdtTasks = "tasks" + intelRdtTasks = "tasks" + monitoringGroupRoot = "mon_groups" ) var ( @@ -506,12 +557,16 @@ func (m *Manager) getIntelRdtPath() (string, error) { return "", err } - clos := m.id + groupName := m.id if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID != "" { - clos = m.config.IntelRdt.ClosID + groupName = m.config.IntelRdt.ClosID + } + + if m.monitoringGroup { + return filepath.Join(rootPath, monitoringGroupRoot, groupName), nil } - return filepath.Join(rootPath, clos), nil + return filepath.Join(rootPath, groupName), nil } // Applies Intel RDT configuration to the process with the specified pid @@ -589,6 +644,9 @@ func (m *Manager) GetStats() (*Stats, error) { if err != nil { return nil, err } + + containerPath := m.GetPath() + // The read-only L3 cache and memory bandwidth schemata in root tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") if err != nil { @@ -596,9 +654,13 @@ func (m *Manager) GetStats() (*Stats, error) { } schemaRootStrings := strings.Split(tmpRootStrings, "\n") + closPath := containerPath + if m.monitoringGroup { + closPath = filepath.Join(containerPath, "../..") + } + // The L3 cache and memory bandwidth schemata in container's clos group - containerPath := m.GetPath() - tmpStrings, err := getIntelRdtParamString(containerPath, "schemata") + tmpStrings, err := getIntelRdtParamString(closPath, "schemata") if err != nil { return nil, err } @@ -650,8 +712,8 @@ func (m *Manager) GetStats() (*Stats, error) { } } - if IsMBMEnabled() || IsCMTEnabled() { - err = getMonitoringStats(containerPath, stats) + if (IsCMTEnabled() && m.config.IntelRdt.EnableCMT) || (IsMBMEnabled() && m.config.IntelRdt.EnableMBM) { + err = getMonitoringStats(containerPath, stats, m.config.IntelRdt.EnableCMT, m.config.IntelRdt.EnableMBM) if err != nil { return nil, err } @@ -708,17 +770,26 @@ func (m *Manager) Set(container *configs.Config) error { // "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on // socket 0 and 7000 MBps memory bandwidth limit on socket 1. if container.IntelRdt != nil { - path := m.GetPath() + l3CacheSchema := container.IntelRdt.L3CacheSchema memBwSchema := container.IntelRdt.MemBwSchema + // Write a single joint schema string to schemata file + if l3CacheSchema != "" || memBwSchema != "" { + // Schema can be set only in Control Group. + if m.monitoringGroup { + return fmt.Errorf("couldn't set IntelRdt l3CacheSchema or memBwSchema for the monitoring group") + } + } + // TODO: verify that l3CacheSchema and/or memBwSchema match the // existing schemata if ClosID has been specified. This is a more // involved than reading the file and doing plain string comparison as // the value written in does not necessarily match what gets read out // (leading zeros, cache id ordering etc). - // Write a single joint schema string to schemata file + // Write a single joint schemata string to schemata file + path := m.GetPath() if l3CacheSchema != "" && memBwSchema != "" { if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil { return err diff --git a/libcontainer/intelrdt/intelrdt_test.go b/libcontainer/intelrdt/intelrdt_test.go index c127cd8f7c6..baa845483bd 100644 --- a/libcontainer/intelrdt/intelrdt_test.go +++ b/libcontainer/intelrdt/intelrdt_test.go @@ -1,10 +1,14 @@ package intelrdt import ( + "errors" "os" "path/filepath" "strings" + "sync" "testing" + + "github.com/opencontainers/runc/libcontainer/configs" ) func TestIntelRdtSetL3CacheSchema(t *testing.T) { @@ -125,3 +129,50 @@ func TestApply(t *testing.T) { t.Fatalf("unexpected tasks file, expected '1235', got %q", pids) } } + +func TestIntelRdtManagerSetSchemataInMonGroup(t *testing.T) { + helper := NewIntelRdtTestUtil(t) + + intelrdt := Manager{ + mu: sync.Mutex{}, + config: helper.config, + id: "", + path: helper.IntelRdtPath, + monitoringGroup: true, + } + + test := []struct { + l3Schema string + memBwSchema string + }{ + { + "L3:0=f0;1=f", + "", + }, + { + "L3:0=f0;1=f", + "MB:0=20;1=70", + }, + { + "", + "MB:0=20;1=70", + }, + } + + expectedError := errors.New("couldn't set IntelRdt l3CacheSchema or memBwSchema for the monitoring group") + + for _, tc := range test { + err := intelrdt.Set(&configs.Config{IntelRdt: &configs.IntelRdt{ + L3CacheSchema: tc.l3Schema, + MemBwSchema: tc.memBwSchema, + }}) + + if err == nil { + t.Fatalf("Expected error: %v, got nil.", expectedError) + } + + if err.Error() != expectedError.Error() { + t.Fatalf("Expected error: %v but got: %v.", expectedError, err) + } + } +} diff --git a/libcontainer/intelrdt/monitoring.go b/libcontainer/intelrdt/monitoring.go index 82e0002efad..945612cff9d 100644 --- a/libcontainer/intelrdt/monitoring.go +++ b/libcontainer/intelrdt/monitoring.go @@ -47,7 +47,7 @@ func parseMonFeatures(reader io.Reader) (monFeatures, error) { return monFeatures, scanner.Err() } -func getMonitoringStats(containerPath string, stats *Stats) error { +func getMonitoringStats(containerPath string, stats *Stats, enableCMT bool, enableMBM bool) error { numaFiles, err := os.ReadDir(filepath.Join(containerPath, "mon_data")) if err != nil { return err @@ -59,14 +59,14 @@ func getMonitoringStats(containerPath string, stats *Stats) error { for _, file := range numaFiles { if file.IsDir() { numaPath := filepath.Join(containerPath, "mon_data", file.Name()) - if IsMBMEnabled() { + if IsMBMEnabled() && enableMBM { numaMBMStats, err := getMBMNumaNodeStats(numaPath) if err != nil { return err } mbmStats = append(mbmStats, *numaMBMStats) } - if IsCMTEnabled() { + if IsCMTEnabled() && enableCMT { numaCMTStats, err := getCMTNumaNodeStats(numaPath) if err != nil { return err diff --git a/libcontainer/intelrdt/monitoring_test.go b/libcontainer/intelrdt/monitoring_test.go index 0a89ef2f7b8..8ce8489a208 100644 --- a/libcontainer/intelrdt/monitoring_test.go +++ b/libcontainer/intelrdt/monitoring_test.go @@ -80,7 +80,7 @@ func TestGetMonitoringStats(t *testing.T) { t.Run("Gather monitoring stats", func(t *testing.T) { var stats Stats - err := getMonitoringStats(mockedL3_MON, &stats) + err := getMonitoringStats(mockedL3_MON, &stats, true, true) if err != nil { t.Fatal(err) } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 7ff9f098d6a..d3f80d3d5a4 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -431,6 +431,12 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { L3CacheSchema: spec.Linux.IntelRdt.L3CacheSchema, MemBwSchema: spec.Linux.IntelRdt.MemBwSchema, } + if spec.Linux.IntelRdt.EnableCMT { + config.IntelRdt.EnableCMT = spec.Linux.IntelRdt.EnableCMT + } + if spec.Linux.IntelRdt.EnableMBM { + config.IntelRdt.EnableMBM = spec.Linux.IntelRdt.EnableMBM + } } } diff --git a/update.go b/update.go index 9ce5a2e835b..d366388fa3b 100644 --- a/update.go +++ b/update.go @@ -21,6 +21,13 @@ func i64Ptr(i int64) *int64 { return &i } func u64Ptr(i uint64) *uint64 { return &i } func u16Ptr(i uint16) *uint16 { return &i } +const ( + l3CacheSchemaFlag = "l3-cache-schema" + memBwSchemaFlag = "mem-bw-schema" + enableCMTFlag = "enable-intelrdt-cmt" + enableMBMFlag = "enable-intelrdt-mbm" +) + var updateCommand = cli.Command{ Name: "update", Usage: "update container resource constraints", @@ -117,13 +124,21 @@ other options are ignored. Usage: "Maximum number of pids allowed in the container", }, cli.StringFlag{ - Name: "l3-cache-schema", + Name: l3CacheSchemaFlag, Usage: "The string of Intel RDT/CAT L3 cache schema", }, cli.StringFlag{ - Name: "mem-bw-schema", + Name: memBwSchemaFlag, Usage: "The string of Intel RDT/MBA memory bandwidth schema", }, + cli.BoolFlag{ + Name: enableCMTFlag, + Usage: "Enable Intel RDT/CMT metrics for the container", + }, + cli.BoolFlag{ + Name: enableMBMFlag, + Usage: "Enable Intel RDT/MBM metrics for the container", + }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { @@ -297,17 +312,32 @@ other options are ignored. config.Cgroups.Resources.Unified = r.Unified // Update Intel RDT - l3CacheSchema := context.String("l3-cache-schema") - memBwSchema := context.String("mem-bw-schema") - if l3CacheSchema != "" && !intelrdt.IsCATEnabled() { - return errors.New("Intel RDT/CAT: l3 cache schema is not enabled") - } + isL3CacheSchemaFlagSet := context.IsSet(l3CacheSchemaFlag) + l3CacheSchema := context.String(l3CacheSchemaFlag) - if memBwSchema != "" && !intelrdt.IsMBAEnabled() { - return errors.New("Intel RDT/MBA: memory bandwidth schema is not enabled") - } + isMemBwSchemaFlagSet := context.IsSet(memBwSchemaFlag) + memBwSchema := context.String(memBwSchemaFlag) + + isEnableCMTFlagSet := context.IsSet(enableCMTFlag) + enableCMT := context.Bool(enableCMTFlag) + + isEnableMBMFlagSet := context.IsSet(enableMBMFlag) + enableMBM := context.Bool(enableMBMFlag) + + if isL3CacheSchemaFlagSet || isMemBwSchemaFlagSet || isEnableCMTFlagSet || isEnableMBMFlagSet { + if isL3CacheSchemaFlagSet && l3CacheSchema != "" && !intelrdt.IsCATEnabled() { + return errors.New("Intel RDT/CAT: l3 cache schema is not enabled") + } + if isMemBwSchemaFlagSet && memBwSchema != "" && !intelrdt.IsMBAEnabled() { + return errors.New("Intel RDT/MBA: memory bandwidth schema is not enabled") + } + if isEnableCMTFlagSet && enableCMT && !intelrdt.IsCMTEnabled() { + return errors.New("Intel RDT/CMT: CMT is not enabled") + } + if isEnableMBMFlagSet && enableMBM && !intelrdt.IsMBMEnabled() { + return errors.New("Intel RDT/MBM: MBM is not enabled") + } - if l3CacheSchema != "" || memBwSchema != "" { // If intelRdt is not specified in original configuration, we just don't // Apply() to create intelRdt group or attach tasks for this container. // In update command, we could re-enable through IntelRdtManager.Apply() @@ -318,13 +348,28 @@ other options are ignored. return err } config.IntelRdt = &configs.IntelRdt{} + config.IntelRdt.L3CacheSchema = l3CacheSchema + config.IntelRdt.MemBwSchema = memBwSchema + config.IntelRdt.EnableCMT = enableCMT + config.IntelRdt.EnableMBM = enableMBM intelRdtManager := intelrdt.NewManager(&config, container.ID(), state.IntelRdtPath) if err := intelRdtManager.Apply(state.InitProcessPid); err != nil { return err } + } else { + if isL3CacheSchemaFlagSet { + config.IntelRdt.L3CacheSchema = l3CacheSchema + } + if isMemBwSchemaFlagSet { + config.IntelRdt.MemBwSchema = memBwSchema + } + if isEnableCMTFlagSet { + config.IntelRdt.EnableCMT = enableCMT + } + if isEnableMBMFlagSet { + config.IntelRdt.EnableMBM = enableMBM + } } - config.IntelRdt.L3CacheSchema = l3CacheSchema - config.IntelRdt.MemBwSchema = memBwSchema } // XXX(kolyshkin@): currently "runc update" is unable to change diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go index 6a7a91e5596..c0e8794482c 100644 --- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -15,7 +15,7 @@ type Spec struct { // Mounts configures additional mounts (on top of Root). Mounts []Mount `json:"mounts,omitempty"` // Hooks configures callbacks for container lifecycle events. - Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"` + Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris,zos"` // Annotations contains arbitrary metadata for the container. Annotations map[string]string `json:"annotations,omitempty"` @@ -27,6 +27,8 @@ type Spec struct { Windows *Windows `json:"windows,omitempty" platform:"windows"` // VM specifies configuration for virtual-machine-based containers. VM *VM `json:"vm,omitempty" platform:"vm"` + // ZOS is platform-specific configuration for z/OS based containers. + ZOS *ZOS `json:"zos,omitempty" platform:"zos"` } // Process contains information to start a specific application inside the container. @@ -49,7 +51,7 @@ type Process struct { // Capabilities are Linux capabilities that are kept for the process. Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` // Rlimits specifies rlimit options to apply to the process. - Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"` + Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"` // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` // ApparmorProfile specifies the apparmor profile for the container. @@ -86,11 +88,11 @@ type Box struct { // User specifies specific user (and group) information for the container process. type User struct { // UID is the user id. - UID uint32 `json:"uid" platform:"linux,solaris"` + UID uint32 `json:"uid" platform:"linux,solaris,zos"` // GID is the group id. - GID uint32 `json:"gid" platform:"linux,solaris"` + GID uint32 `json:"gid" platform:"linux,solaris,zos"` // Umask is the umask for the init process. - Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris"` + Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris,zos"` // AdditionalGids are additional group ids set for the container's process. AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"` // Username is the user name. @@ -110,7 +112,7 @@ type Mount struct { // Destination is the absolute path where the mount will be placed in the container. Destination string `json:"destination"` // Type specifies the mount kind. - Type string `json:"type,omitempty" platform:"linux,solaris"` + Type string `json:"type,omitempty" platform:"linux,solaris,zos"` // Source specifies the source path of the mount. Source string `json:"source,omitempty"` // Options are fstab style mount options. @@ -178,7 +180,7 @@ type Linux struct { // MountLabel specifies the selinux context for the mounts in the container. MountLabel string `json:"mountLabel,omitempty"` // IntelRdt contains Intel Resource Director Technology (RDT) information for - // handling resource constraints (e.g., L3 cache, memory bandwidth) for the container + // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` // Personality contains configuration for the Linux personality syscall Personality *LinuxPersonality `json:"personality,omitempty"` @@ -683,8 +685,9 @@ type LinuxSyscall struct { Args []LinuxSeccompArg `json:"args,omitempty"` } -// LinuxIntelRdt has container runtime resource constraints for Intel RDT -// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel +// LinuxIntelRdt has container runtime resource constraints for Intel RDT CAT and MBA +// features and flags enabling Intel RDT CMT and MBM features. +// Intel RDT features are available in Linux 4.14 and newer kernel versions. type LinuxIntelRdt struct { // The identity for RDT Class of Service ClosID string `json:"closID,omitempty"` @@ -697,4 +700,36 @@ type LinuxIntelRdt struct { // The unit of memory bandwidth is specified in "percentages" by // default, and in "MBps" if MBA Software Controller is enabled. MemBwSchema string `json:"memBwSchema,omitempty"` + + // EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of + // the last-level cache (LLC) occupancy for the container. + EnableCMT bool `json:"enableCMT,omitempty"` + + // EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of + // total and local memory bandwidth for the container. + EnableMBM bool `json:"enableMBM,omitempty"` +} + +// ZOS contains platform-specific configuration for z/OS based containers. +type ZOS struct { + // Devices are a list of device nodes that are created for the container + Devices []ZOSDevice `json:"devices,omitempty"` +} + +// ZOSDevice represents the mknod information for a z/OS special device file +type ZOSDevice struct { + // Path to the device. + Path string `json:"path"` + // Device type, block, char, etc. + Type string `json:"type"` + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` + // FileMode permission bits for the device. + FileMode *os.FileMode `json:"fileMode,omitempty"` + // UID of the device. + UID *uint32 `json:"uid,omitempty"` + // Gid of the device. + GID *uint32 `json:"gid,omitempty"` } diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/rchcon.go b/vendor/github.com/opencontainers/selinux/go-selinux/rchcon.go index feb739d3261..897ecbac41c 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/rchcon.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/rchcon.go @@ -12,7 +12,7 @@ import ( func rchcon(fpath, label string) error { return pwalkdir.Walk(fpath, func(p string, _ fs.DirEntry, _ error) error { - e := lSetFileLabel(p, label) + e := setFileLabel(p, label) // Walk a file tree can race with removal, so ignore ENOENT. if errors.Is(e, os.ErrNotExist) { return nil diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/rchcon_go115.go b/vendor/github.com/opencontainers/selinux/go-selinux/rchcon_go115.go index ecc7abfac5e..2c8b033ce05 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/rchcon_go115.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/rchcon_go115.go @@ -11,7 +11,7 @@ import ( func rchcon(fpath, label string) error { return pwalk.Walk(fpath, func(p string, _ os.FileInfo, _ error) error { - e := lSetFileLabel(p, label) + e := setFileLabel(p, label) // Walk a file tree can race with removal, so ignore ENOENT. if errors.Is(e, os.ErrNotExist) { return nil diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go index 5a59d151f67..cad467507a5 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go @@ -61,30 +61,16 @@ func ClassIndex(class string) (int, error) { return classIndex(class) } -// SetFileLabel sets the SELinux label for this path, following symlinks, -// or returns an error. +// SetFileLabel sets the SELinux label for this path or returns an error. func SetFileLabel(fpath string, label string) error { return setFileLabel(fpath, label) } -// LsetFileLabel sets the SELinux label for this path, not following symlinks, -// or returns an error. -func LsetFileLabel(fpath string, label string) error { - return lSetFileLabel(fpath, label) -} - -// FileLabel returns the SELinux label for this path, following symlinks, -// or returns an error. +// FileLabel returns the SELinux label for this path or returns an error. func FileLabel(fpath string) (string, error) { return fileLabel(fpath) } -// LfileLabel returns the SELinux label for this path, not following symlinks, -// or returns an error. -func LfileLabel(fpath string) (string, error) { - return lFileLabel(fpath) -} - // SetFSCreateLabel tells the kernel what label to use for all file system objects // created by this task. // Set the label to an empty string to return to the default label. Calls to SetFSCreateLabel diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go index ee602ab96dd..b045843ad6e 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go @@ -316,9 +316,8 @@ func classIndex(class string) (int, error) { return index, nil } -// lSetFileLabel sets the SELinux label for this path, not following symlinks, -// or returns an error. -func lSetFileLabel(fpath string, label string) error { +// setFileLabel sets the SELinux label for this path or returns an error. +func setFileLabel(fpath string, label string) error { if fpath == "" { return ErrEmptyPath } @@ -335,50 +334,12 @@ func lSetFileLabel(fpath string, label string) error { return nil } -// setFileLabel sets the SELinux label for this path, following symlinks, -// or returns an error. -func setFileLabel(fpath string, label string) error { - if fpath == "" { - return ErrEmptyPath - } - for { - err := unix.Setxattr(fpath, xattrNameSelinux, []byte(label), 0) - if err == nil { - break - } - if err != unix.EINTR { //nolint:errorlint // unix errors are bare - return &os.PathError{Op: "setxattr", Path: fpath, Err: err} - } - } - - return nil -} - -// fileLabel returns the SELinux label for this path, following symlinks, -// or returns an error. +// fileLabel returns the SELinux label for this path or returns an error. func fileLabel(fpath string) (string, error) { if fpath == "" { return "", ErrEmptyPath } - label, err := getxattr(fpath, xattrNameSelinux) - if err != nil { - return "", &os.PathError{Op: "getxattr", Path: fpath, Err: err} - } - // Trim the NUL byte at the end of the byte buffer, if present. - if len(label) > 0 && label[len(label)-1] == '\x00' { - label = label[:len(label)-1] - } - return string(label), nil -} - -// lFileLabel returns the SELinux label for this path, not following symlinks, -// or returns an error. -func lFileLabel(fpath string) (string, error) { - if fpath == "" { - return "", ErrEmptyPath - } - label, err := lgetxattr(fpath, xattrNameSelinux) if err != nil { return "", &os.PathError{Op: "lgetxattr", Path: fpath, Err: err} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go index 78743b020c9..42657759c38 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go @@ -17,18 +17,10 @@ func setFileLabel(fpath string, label string) error { return nil } -func lSetFileLabel(fpath string, label string) error { - return nil -} - func fileLabel(fpath string) (string, error) { return "", nil } -func lFileLabel(fpath string) (string, error) { - return "", nil -} - func setFSCreateLabel(label string) error { return nil } diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go index 9e473ca168f..c6b0a7f2655 100644 --- a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go +++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go @@ -36,36 +36,3 @@ func doLgetxattr(path, attr string, dest []byte) (int, error) { } } } - -// getxattr returns a []byte slice containing the value of -// an extended attribute attr set for path. -func getxattr(path, attr string) ([]byte, error) { - // Start with a 128 length byte array - dest := make([]byte, 128) - sz, errno := dogetxattr(path, attr, dest) - for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare - // Buffer too small, use zero-sized buffer to get the actual size - sz, errno = dogetxattr(path, attr, []byte{}) - if errno != nil { - return nil, errno - } - - dest = make([]byte, sz) - sz, errno = dogetxattr(path, attr, dest) - } - if errno != nil { - return nil, errno - } - - return dest[:sz], nil -} - -// dogetxattr is a wrapper that retries on EINTR -func dogetxattr(path, attr string, dest []byte) (int, error) { - for { - sz, err := unix.Getxattr(path, attr, dest) - if err != unix.EINTR { //nolint:errorlint // unix errors are bare - return sz, err - } - } -} diff --git a/vendor/github.com/seccomp/libseccomp-golang/.golangci.yml b/vendor/github.com/seccomp/libseccomp-golang/.golangci.yml deleted file mode 100644 index 7df8aa19838..00000000000 --- a/vendor/github.com/seccomp/libseccomp-golang/.golangci.yml +++ /dev/null @@ -1,4 +0,0 @@ -# For documentation, see https://golangci-lint.run/usage/configuration/ -linters: - enable: - - gofumpt diff --git a/vendor/github.com/seccomp/libseccomp-golang/.travis.yml b/vendor/github.com/seccomp/libseccomp-golang/.travis.yml new file mode 100644 index 00000000000..5240d462280 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/.travis.yml @@ -0,0 +1,57 @@ +# Travis CI configuration for libseccomp-golang + +# https://docs.travis-ci.com/user/reference/bionic +# https://wiki.ubuntu.com/Releases + +dist: bionic +sudo: false + +notifications: + email: + on_success: always + on_failure: always + +arch: + - amd64 + +os: + - linux + +language: go + +jobs: + include: + - name: "last libseccomp 2.5.0" + env: + - SECCOMP_VER=2.5.0 + - SECCOMP_SHA256SUM=1ffa7038d2720ad191919816db3479295a4bcca1ec14e02f672539f4983014f3 + - name: "compat libseccomp 2.4.4" + env: + - SECCOMP_VER=2.4.4 + - SECCOMP_SHA256SUM=4e79738d1ef3c9b7ca9769f1f8b8d84fc17143c2c1c432e53b9c64787e0ff3eb + - name: "compat libseccomp 2.2.1" + env: + - SECCOMP_VER=2.2.1 + - SECCOMP_SHA256SUM=0ba1789f54786c644af54cdffc9fd0dd0a8bb2b2ee153933f658855d2851a740 + +addons: + apt: + packages: + - build-essential + - astyle + - golint + - gperf + +install: + - go get -u golang.org/x/lint/golint + +# run all of the tests independently, fail if any of the tests error +script: + - wget https://github.com/seccomp/libseccomp/releases/download/v$SECCOMP_VER/libseccomp-$SECCOMP_VER.tar.gz + - echo $SECCOMP_SHA256SUM libseccomp-$SECCOMP_VER.tar.gz | sha256sum -c + - tar xf libseccomp-$SECCOMP_VER.tar.gz + - pushd libseccomp-$SECCOMP_VER && ./configure --prefix=/opt/libseccomp-$SECCOMP_VER && make && sudo make install && popd + - make check-syntax + - make lint + - PKG_CONFIG_PATH=/opt/libseccomp-$SECCOMP_VER/lib/pkgconfig LD_LIBRARY_PATH=/opt/libseccomp-$SECCOMP_VER/lib make vet + - PKG_CONFIG_PATH=/opt/libseccomp-$SECCOMP_VER/lib/pkgconfig LD_LIBRARY_PATH=/opt/libseccomp-$SECCOMP_VER/lib make test diff --git a/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md b/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md index c2fc80d5af6..d6862cbd5f9 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md +++ b/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md @@ -1,23 +1,31 @@ -How to Submit Patches to the libseccomp-golang Project +How to Submit Patches to the libseccomp Project =============================================================================== https://github.com/seccomp/libseccomp-golang This document is intended to act as a guide to help you contribute to the -libseccomp-golang project. It is not perfect, and there will always be -exceptions to the rules described here, but by following the instructions below -you should have a much easier time getting your work merged with the upstream +libseccomp project. It is not perfect, and there will always be exceptions +to the rules described here, but by following the instructions below you +should have a much easier time getting your work merged with the upstream project. ## Test Your Code Using Existing Tests -A number of tests and lint related recipes are provided in the Makefile, if -you want to run the standard regression tests, you can execute the following: +There are two possible tests you can run to verify your code. The first +test is used to check the formatting and coding style of your changes, you +can run the test with the following command: - # make check + # make check-syntax + +... if there are any problems with your changes a diff/patch will be shown +which indicates the problems and how to fix them. -In order to use it, the 'golangci-lint' tool is needed, which can be found at: +The second possible test is used to ensure the sanity of your code changes +and to test these changes against the included tests. You can run the test +with the following command: + + # make check -* https://github.com/golangci/golangci-lint +... if there are any faults or errors they will be displayed. ## Add New Tests for New Functionality diff --git a/vendor/github.com/seccomp/libseccomp-golang/Makefile b/vendor/github.com/seccomp/libseccomp-golang/Makefile index 530f5b4adbc..38cfa852cd6 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/Makefile +++ b/vendor/github.com/seccomp/libseccomp-golang/Makefile @@ -4,7 +4,7 @@ all: check-build -check: lint test +check: vet test check-build: go build @@ -16,7 +16,7 @@ fix-syntax: gofmt -w . vet: - go vet -v ./... + go vet -v # Previous bugs have made the tests freeze until the timeout. Golang default # timeout for tests is 10 minutes, which is too long, considering current tests @@ -28,4 +28,5 @@ test: go test -v -timeout $(TEST_TIMEOUT) lint: - golangci-lint run . + @$(if $(shell which golint),true,$(error "install golint and include it in your PATH")) + golint -set_exit_status diff --git a/vendor/github.com/seccomp/libseccomp-golang/README.md b/vendor/github.com/seccomp/libseccomp-golang/README.md index 6430f1c9e25..806a5ddf29b 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/README.md +++ b/vendor/github.com/seccomp/libseccomp-golang/README.md @@ -2,9 +2,7 @@ =============================================================================== https://github.com/seccomp/libseccomp-golang -[![Go Reference](https://pkg.go.dev/badge/github.com/seccomp/libseccomp-golang.svg)](https://pkg.go.dev/github.com/seccomp/libseccomp-golang) -[![validate](https://github.com/seccomp/libseccomp-golang/actions/workflows/validate.yml/badge.svg)](https://github.com/seccomp/libseccomp-golang/actions/workflows/validate.yml) -[![test](https://github.com/seccomp/libseccomp-golang/actions/workflows/test.yml/badge.svg)](https://github.com/seccomp/libseccomp-golang/actions/workflows/test.yml) +[![Build Status](https://img.shields.io/travis/seccomp/libseccomp-golang/main.svg)](https://travis-ci.org/seccomp/libseccomp-golang) The libseccomp library provides an easy to use, platform independent, interface to the Linux Kernel's syscall filtering mechanism. The libseccomp API is @@ -28,14 +26,26 @@ list. * https://groups.google.com/d/forum/libseccomp -Documentation for this package is also available at: +Documentation is also available at: -* https://pkg.go.dev/github.com/seccomp/libseccomp-golang +* https://godoc.org/github.com/seccomp/libseccomp-golang ## Installing the package +The libseccomp-golang bindings require at least Go v1.2.1 and GCC v4.8.4; +earlier versions may yield unpredictable results. If you meet these +requirements you can install this package using the command below: + # go get github.com/seccomp/libseccomp-golang -## Contributing +## Testing the Library + +A number of tests and lint related recipes are provided in the Makefile, if +you want to run the standard regression tests, you can excute the following: + + # make check + +In order to execute the 'make lint' recipe the 'golint' tool is needed, it +can be found at: -See [CONTRIBUTING.md](CONTRIBUTING.md). +* https://github.com/golang/lint diff --git a/vendor/github.com/seccomp/libseccomp-golang/SECURITY.md b/vendor/github.com/seccomp/libseccomp-golang/SECURITY.md deleted file mode 100644 index c448faa8e80..00000000000 --- a/vendor/github.com/seccomp/libseccomp-golang/SECURITY.md +++ /dev/null @@ -1,47 +0,0 @@ -The libseccomp-golang Security Vulnerability Handling Process -=============================================================================== -https://github.com/seccomp/libseccomp-golang - -This document document attempts to describe the processes through which -sensitive security relevant bugs can be responsibly disclosed to the -libseccomp-golang project and how the project maintainers should handle these -reports. Just like the other libseccomp-golang process documents, this -document should be treated as a guiding document and not a hard, unyielding set -of regulations; the bug reporters and project maintainers are encouraged to -work together to address the issues as best they can, in a manner which works -best for all parties involved. - -### Reporting Problems - -Problems with the libseccomp-golang library that are not suitable for immediate -public disclosure should be emailed to the current libseccomp-golang -maintainers, the list is below. We typically request at most a 90 day time -period to address the issue before it is made public, but we will make every -effort to address the issue as quickly as possible and shorten the disclosure -window. - -* Paul Moore, paul@paul-moore.com -* Tom Hromatka, tom.hromatka@oracle.com - -### Resolving Sensitive Security Issues - -Upon disclosure of a bug, the maintainers should work together to investigate -the problem and decide on a solution. In order to prevent an early disclosure -of the problem, those working on the solution should do so privately and -outside of the traditional libseccomp-golang development practices. One -possible solution to this is to leverage the GitHub "Security" functionality to -create a private development fork that can be shared among the maintainers, and -optionally the reporter. A placeholder GitHub issue may be created, but -details should remain extremely limited until such time as the problem has been -fixed and responsibly disclosed. If a CVE, or other tag, has been assigned to -the problem, the GitHub issue title should include the vulnerability tag once -the problem has been disclosed. - -### Public Disclosure - -Whenever possible, responsible reporting and patching practices should be -followed, including notification to the linux-distros and oss-security mailing -lists. - -* https://oss-security.openwall.org/wiki/mailing-lists/distros -* https://oss-security.openwall.org/wiki/mailing-lists/oss-security diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go index 8dad12fdbb9..e9b92e2219a 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go @@ -1,3 +1,5 @@ +// +build linux + // Public API specification for libseccomp Go bindings // Contains public API for the bindings @@ -16,36 +18,48 @@ import ( "unsafe" ) +// C wrapping code + +// To compile libseccomp-golang against a specific version of libseccomp: +// cd ../libseccomp && mkdir -p prefix +// ./configure --prefix=$PWD/prefix && make && make install +// cd ../libseccomp-golang +// PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make +// LD_PRELOAD=$PWD/../libseccomp/prefix/lib/libseccomp.so.2.5.0 PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make test + +// #cgo pkg-config: libseccomp // #include // #include import "C" // Exported types -// VersionError represents an error when either the system libseccomp version -// or the kernel version is too old to perform the operation requested. +// VersionError denotes that the system libseccomp version is incompatible +// with this package. type VersionError struct { - op string // operation that failed or would fail - major, minor, micro uint // minimally required libseccomp version - curAPI, minAPI uint // current and minimally required API versions + message string + minimum string } func init() { // This forces the cgo libseccomp to initialize its internal API support state, // which is necessary on older versions of libseccomp in order to work // correctly. - _, _ = getAPI() + GetAPI() } func (e VersionError) Error() string { - if e.minAPI != 0 { - return fmt.Sprintf("%s requires libseccomp >= %d.%d.%d and API level >= %d "+ - "(current version: %d.%d.%d, API level: %d)", - e.op, e.major, e.minor, e.micro, e.minAPI, - verMajor, verMinor, verMicro, e.curAPI) + messageStr := "" + if e.message != "" { + messageStr = e.message + ": " } - return fmt.Sprintf("%s requires libseccomp >= %d.%d.%d (current version: %d.%d.%d)", - e.op, e.major, e.minor, e.micro, verMajor, verMinor, verMicro) + minimumStr := "" + if e.minimum != "" { + minimumStr = e.minimum + } else { + minimumStr = "2.2.0" + } + return fmt.Sprintf("Libseccomp version too low: %sminimum supported is %s: detected %d.%d.%d", messageStr, minimumStr, verMajor, verMinor, verMicro) } // ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a @@ -134,46 +148,44 @@ const ( // variables are invalid ArchInvalid ScmpArch = iota // ArchNative is the native architecture of the kernel - ArchNative + ArchNative ScmpArch = iota // ArchX86 represents 32-bit x86 syscalls - ArchX86 + ArchX86 ScmpArch = iota // ArchAMD64 represents 64-bit x86-64 syscalls - ArchAMD64 + ArchAMD64 ScmpArch = iota // ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers) - ArchX32 + ArchX32 ScmpArch = iota // ArchARM represents 32-bit ARM syscalls - ArchARM + ArchARM ScmpArch = iota // ArchARM64 represents 64-bit ARM syscalls - ArchARM64 + ArchARM64 ScmpArch = iota // ArchMIPS represents 32-bit MIPS syscalls - ArchMIPS + ArchMIPS ScmpArch = iota // ArchMIPS64 represents 64-bit MIPS syscalls - ArchMIPS64 + ArchMIPS64 ScmpArch = iota // ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers) - ArchMIPS64N32 + ArchMIPS64N32 ScmpArch = iota // ArchMIPSEL represents 32-bit MIPS syscalls (little endian) - ArchMIPSEL + ArchMIPSEL ScmpArch = iota // ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian) - ArchMIPSEL64 + ArchMIPSEL64 ScmpArch = iota // ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian, // 32-bit pointers) - ArchMIPSEL64N32 + ArchMIPSEL64N32 ScmpArch = iota // ArchPPC represents 32-bit POWERPC syscalls - ArchPPC + ArchPPC ScmpArch = iota // ArchPPC64 represents 64-bit POWER syscalls (big endian) - ArchPPC64 + ArchPPC64 ScmpArch = iota // ArchPPC64LE represents 64-bit POWER syscalls (little endian) - ArchPPC64LE + ArchPPC64LE ScmpArch = iota // ArchS390 represents 31-bit System z/390 syscalls - ArchS390 + ArchS390 ScmpArch = iota // ArchS390X represents 64-bit System z/390 syscalls - ArchS390X + ArchS390X ScmpArch = iota // ArchPARISC represents 32-bit PA-RISC - ArchPARISC + ArchPARISC ScmpArch = iota // ArchPARISC64 represents 64-bit PA-RISC - ArchPARISC64 - // ArchRISCV64 represents RISCV64 - ArchRISCV64 + ArchPARISC64 ScmpArch = iota ) const ( @@ -182,36 +194,34 @@ const ( // ActInvalid is a placeholder to ensure uninitialized ScmpAction // variables are invalid ActInvalid ScmpAction = iota - // ActKillThread kills the thread that violated the rule. + // ActKill kills the thread that violated the rule. It is the same as ActKillThread. // All other threads from the same thread group will continue to execute. - ActKillThread + ActKill ScmpAction = iota // ActTrap throws SIGSYS - ActTrap + ActTrap ScmpAction = iota // ActNotify triggers a userspace notification. This action is only usable when // libseccomp API level 6 or higher is supported. - ActNotify + ActNotify ScmpAction = iota // ActErrno causes the syscall to return a negative error code. This // code can be set with the SetReturnCode method - ActErrno + ActErrno ScmpAction = iota // ActTrace causes the syscall to notify tracing processes with the // given error code. This code can be set with the SetReturnCode method - ActTrace + ActTrace ScmpAction = iota // ActAllow permits the syscall to continue execution - ActAllow + ActAllow ScmpAction = iota // ActLog permits the syscall to continue execution after logging it. // This action is only usable when libseccomp API level 3 or higher is // supported. - ActLog + ActLog ScmpAction = iota + // ActKillThread kills the thread that violated the rule. It is the same as ActKill. + // All other threads from the same thread group will continue to execute. + ActKillThread ScmpAction = iota // ActKillProcess kills the process that violated the rule. // All threads in the thread group are also terminated. // This action is only usable when libseccomp API level 3 or higher is // supported. - ActKillProcess - // ActKill kills the thread that violated the rule. - // All other threads from the same thread group will continue to execute. - // - // Deprecated: use ActKillThread - ActKill = ActKillThread + ActKillProcess ScmpAction = iota ) const ( @@ -224,35 +234,36 @@ const ( CompareInvalid ScmpCompareOp = iota // CompareNotEqual returns true if the argument is not equal to the // given value - CompareNotEqual + CompareNotEqual ScmpCompareOp = iota // CompareLess returns true if the argument is less than the given value - CompareLess + CompareLess ScmpCompareOp = iota // CompareLessOrEqual returns true if the argument is less than or equal // to the given value - CompareLessOrEqual + CompareLessOrEqual ScmpCompareOp = iota // CompareEqual returns true if the argument is equal to the given value - CompareEqual + CompareEqual ScmpCompareOp = iota // CompareGreaterEqual returns true if the argument is greater than or // equal to the given value - CompareGreaterEqual + CompareGreaterEqual ScmpCompareOp = iota // CompareGreater returns true if the argument is greater than the given // value - CompareGreater - // CompareMaskedEqual returns true if the masked argument value is - // equal to the masked datum value. Mask is the first argument, and - // datum is the second one. - CompareMaskedEqual + CompareGreater ScmpCompareOp = iota + // CompareMaskedEqual returns true if the argument is equal to the given + // value, when masked (bitwise &) against the second given value + CompareMaskedEqual ScmpCompareOp = iota ) -// ErrSyscallDoesNotExist represents an error condition where -// libseccomp is unable to resolve the syscall -var ErrSyscallDoesNotExist = fmt.Errorf("could not resolve syscall name") +var ( + // ErrSyscallDoesNotExist represents an error condition where + // libseccomp is unable to resolve the syscall + ErrSyscallDoesNotExist = fmt.Errorf("could not resolve syscall name") +) const ( // Userspace notification response flags // NotifRespFlagContinue tells the kernel to continue executing the system - // call that triggered the notification. Must only be used when the notification + // call that triggered the notification. Must only be used when the notication // response's error is 0. NotifRespFlagContinue uint32 = 1 ) @@ -303,8 +314,6 @@ func GetArchFromString(arch string) (ScmpArch, error) { return ArchPARISC, nil case "parisc64": return ArchPARISC64, nil - case "riscv64": - return ArchRISCV64, nil default: return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %q", arch) } @@ -349,8 +358,6 @@ func (a ScmpArch) String() string { return "parisc" case ArchPARISC64: return "parisc64" - case ArchRISCV64: - return "riscv64" case ArchNative: return "native" case ArchInvalid: @@ -387,7 +394,7 @@ func (a ScmpCompareOp) String() string { // String returns a string representation of a seccomp match action func (a ScmpAction) String() string { switch a & 0xFFFF { - case ActKillThread: + case ActKill, ActKillThread: return "Action: Kill thread" case ActKillProcess: return "Action: Kill process" @@ -549,8 +556,8 @@ func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCo return condStruct, err } - if err := sanitizeCompareOp(comparison); err != nil { - return condStruct, err + if comparison == CompareInvalid { + return condStruct, fmt.Errorf("invalid comparison operator") } else if arg > 5 { return condStruct, fmt.Errorf("syscalls only have up to 6 arguments (%d given)", arg) } else if len(values) > 2 { @@ -867,8 +874,10 @@ func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { func (f *ScmpFilter) GetLogBit() (bool, error) { log, err := f.getFilterAttr(filterAttrLog) if err != nil { - if e := checkAPI("GetLogBit", 3, 2, 4, 0); e != nil { - err = e + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 3 { + return false, fmt.Errorf("getting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") } return false, err @@ -890,8 +899,9 @@ func (f *ScmpFilter) GetLogBit() (bool, error) { func (f *ScmpFilter) GetSSB() (bool, error) { ssb, err := f.getFilterAttr(filterAttrSSB) if err != nil { - if e := checkAPI("GetSSB", 4, 2, 5, 0); e != nil { - err = e + api, apiErr := getAPI() + if (apiErr != nil && api == 0) || (apiErr == nil && api < 4) { + return false, fmt.Errorf("getting the SSB flag is only supported in libseccomp 2.5.0 and newer with API level 4 or higher") } return false, err @@ -904,42 +914,6 @@ func (f *ScmpFilter) GetSSB() (bool, error) { return true, nil } -// GetOptimize returns the current optimization level of the filter, -// or an error if an issue was encountered retrieving the value. -// See SetOptimize for more details. -func (f *ScmpFilter) GetOptimize() (int, error) { - level, err := f.getFilterAttr(filterAttrOptimize) - if err != nil { - if e := checkAPI("GetOptimize", 4, 2, 5, 0); e != nil { - err = e - } - - return 0, err - } - - return int(level), nil -} - -// GetRawRC returns the current state of RawRC flag, or an error -// if an issue was encountered retrieving the value. -// See SetRawRC for more details. -func (f *ScmpFilter) GetRawRC() (bool, error) { - rawrc, err := f.getFilterAttr(filterAttrRawRC) - if err != nil { - if e := checkAPI("GetRawRC", 4, 2, 5, 0); e != nil { - err = e - } - - return false, err - } - - if rawrc == 0 { - return false, nil - } - - return true, nil -} - // SetBadArchAction sets the default action taken on a syscall for an // architecture not in the filter, or an error if an issue was encountered // setting the value. @@ -979,8 +953,10 @@ func (f *ScmpFilter) SetLogBit(state bool) error { err := f.setFilterAttr(filterAttrLog, toSet) if err != nil { - if e := checkAPI("SetLogBit", 3, 2, 4, 0); e != nil { - err = e + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 3 { + return fmt.Errorf("setting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") } } @@ -1000,52 +976,9 @@ func (f *ScmpFilter) SetSSB(state bool) error { err := f.setFilterAttr(filterAttrSSB, toSet) if err != nil { - if e := checkAPI("SetSSB", 4, 2, 5, 0); e != nil { - err = e - } - } - - return err -} - -// SetOptimize sets optimization level of the seccomp filter. By default -// libseccomp generates a set of sequential "if" statements for each rule in -// the filter. SetSyscallPriority can be used to prioritize the order for the -// default cause. The binary tree optimization sorts by syscall numbers and -// generates consistent O(log n) filter traversal for every rule in the filter. -// The binary tree may be advantageous for large filters. Note that -// SetSyscallPriority is ignored when level == 2. -// -// The different optimization levels are: -// 0: Reserved value, not currently used. -// 1: Rules sorted by priority and complexity (DEFAULT). -// 2: Binary tree sorted by syscall number. -func (f *ScmpFilter) SetOptimize(level int) error { - cLevel := C.uint32_t(level) - - err := f.setFilterAttr(filterAttrOptimize, cLevel) - if err != nil { - if e := checkAPI("SetOptimize", 4, 2, 5, 0); e != nil { - err = e - } - } - - return err -} - -// SetRawRC sets whether libseccomp should pass system error codes back to the -// caller, instead of the default ECANCELED. Defaults to false. -func (f *ScmpFilter) SetRawRC(state bool) error { - var toSet C.uint32_t = 0x0 - - if state { - toSet = 0x1 - } - - err := f.setFilterAttr(filterAttrRawRC, toSet) - if err != nil { - if e := checkAPI("SetRawRC", 4, 2, 5, 0); e != nil { - err = e + api, apiErr := getAPI() + if (apiErr != nil && api == 0) || (apiErr == nil && api < 4) { + return fmt.Errorf("setting the SSB flag is only supported in libseccomp 2.5.0 and newer with API level 4 or higher") } } @@ -1096,6 +1029,9 @@ func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { // AddRuleConditional adds a single rule for a conditional action on a syscall. // Returns an error if an issue was encountered adding the rule. // All conditions must match for the rule to match. +// There is a bug in library versions below v2.2.1 which can, in some cases, +// cause conditions to be lost when more than one are used. Consequently, +// AddRuleConditional is disabled on library versions lower than v2.2.1 func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { return f.addRuleGeneric(call, action, false, conds) } @@ -1107,6 +1043,9 @@ func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, con // The rule will function exactly as described, but it may not function identically // (or be able to be applied to) all architectures. // Returns an error if an issue was encountered adding the rule. +// There is a bug in library versions below v2.2.1 which can, in some cases, +// cause conditions to be lost when more than one are used. Consequently, +// AddRuleConditionalExact is disabled on library versions lower than v2.2.1 func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { return f.addRuleGeneric(call, action, true, conds) } diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go index df4dfb7eba8..8dc7b296f3b 100644 --- a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go @@ -1,10 +1,11 @@ +// +build linux + // Internal functions for libseccomp Go bindings // No exported functions package seccomp import ( - "errors" "fmt" "syscall" ) @@ -26,10 +27,10 @@ import ( #include #include -#if (SCMP_VER_MAJOR < 2) || \ - (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 3) || \ - (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 3 && SCMP_VER_MICRO < 1) -#error This package requires libseccomp >= v2.3.1 +#if SCMP_VER_MAJOR < 2 +#error Minimum supported version of Libseccomp is v2.2.0 +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2 +#error Minimum supported version of Libseccomp is v2.2.0 #endif #define ARCH_BAD ~0 @@ -64,10 +65,6 @@ const uint32_t C_ARCH_BAD = ARCH_BAD; #define SCMP_ARCH_PARISC64 ARCH_BAD #endif -#ifndef SCMP_ARCH_RISCV64 -#define SCMP_ARCH_RISCV64 ARCH_BAD -#endif - const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; @@ -87,7 +84,6 @@ const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; const uint32_t C_ARCH_PARISC = SCMP_ARCH_PARISC; const uint32_t C_ARCH_PARISC64 = SCMP_ARCH_PARISC64; -const uint32_t C_ARCH_RISCV64 = SCMP_ARCH_RISCV64; #ifndef SCMP_ACT_LOG #define SCMP_ACT_LOG 0x7ffc0000U @@ -117,25 +113,20 @@ const uint32_t C_ACT_NOTIFY = SCMP_ACT_NOTIFY; // The libseccomp SCMP_FLTATR_CTL_LOG member of the scmp_filter_attr enum was // added in v2.4.0 -#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4 +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) #define SCMP_FLTATR_CTL_LOG _SCMP_FLTATR_MIN #endif - -// The following SCMP_FLTATR_* were added in libseccomp v2.5.0. #if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5 -#define SCMP_FLTATR_CTL_SSB _SCMP_FLTATR_MIN -#define SCMP_FLTATR_CTL_OPTIMIZE _SCMP_FLTATR_MIN -#define SCMP_FLTATR_API_SYSRAWRC _SCMP_FLTATR_MIN +#define SCMP_FLTATR_CTL_SSB _SCMP_FLTATR_MIN #endif -const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; -const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; -const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; -const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; -const uint32_t C_ATTRIBUTE_LOG = (uint32_t)SCMP_FLTATR_CTL_LOG; -const uint32_t C_ATTRIBUTE_SSB = (uint32_t)SCMP_FLTATR_CTL_SSB; -const uint32_t C_ATTRIBUTE_OPTIMIZE = (uint32_t)SCMP_FLTATR_CTL_OPTIMIZE; -const uint32_t C_ATTRIBUTE_SYSRAWRC = (uint32_t)SCMP_FLTATR_API_SYSRAWRC; +const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; +const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; +const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; +const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; +const uint32_t C_ATTRIBUTE_LOG = (uint32_t)SCMP_FLTATR_CTL_LOG; +const uint32_t C_ATTRIBUTE_SSB = (uint32_t)SCMP_FLTATR_CTL_SSB; const int C_CMP_NE = (int)SCMP_CMP_NE; const int C_CMP_LT = (int)SCMP_CMP_LT; @@ -182,7 +173,8 @@ unsigned int get_micro_version() #endif // The libseccomp API level functions were added in v2.4.0 -#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4 +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) const unsigned int seccomp_api_get(void) { // libseccomp-golang requires libseccomp v2.2.0, at a minimum, which @@ -225,7 +217,8 @@ void add_struct_arg_cmp( } // The seccomp notify API functions were added in v2.5.0 -#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5 +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5) struct seccomp_data { int nr; @@ -277,13 +270,11 @@ type scmpFilterAttr uint32 const ( filterAttrActDefault scmpFilterAttr = iota - filterAttrActBadArch - filterAttrNNP - filterAttrTsync - filterAttrLog - filterAttrSSB - filterAttrOptimize - filterAttrRawRC + filterAttrActBadArch scmpFilterAttr = iota + filterAttrNNP scmpFilterAttr = iota + filterAttrTsync scmpFilterAttr = iota + filterAttrLog scmpFilterAttr = iota + filterAttrSSB scmpFilterAttr = iota ) const ( @@ -291,9 +282,9 @@ const ( scmpError C.int = -1 // Comparison boundaries to check for architecture validity archStart ScmpArch = ArchNative - archEnd ScmpArch = ArchRISCV64 + archEnd ScmpArch = ArchPARISC64 // Comparison boundaries to check for action validity - actionStart ScmpAction = ActKillThread + actionStart ScmpAction = ActKill actionEnd ScmpAction = ActKillProcess // Comparison boundaries to check for comparison operator validity compareOpStart ScmpCompareOp = CompareNotEqual @@ -301,9 +292,8 @@ const ( ) var ( - // errBadFilter is thrown on bad filter context. - errBadFilter = errors.New("filter is invalid or uninitialized") - errDefAction = errors.New("requested action matches default action of filter") + // Error thrown on bad filter context + errBadFilter = fmt.Errorf("filter is invalid or uninitialized") // Constants representing library major, minor, and micro versions verMajor = uint(C.get_major_version()) verMinor = uint(C.get_minor_version()) @@ -312,28 +302,19 @@ var ( // Nonexported functions -// checkVersion returns an error if the libseccomp version being used -// is less than the one specified by major, minor, and micro arguments. -// Argument op is an arbitrary non-empty operation description, which -// is used as a part of the error message returned. -// -// Most users should use checkAPI instead. -func checkVersion(op string, major, minor, micro uint) error { - if (verMajor > major) || +// Check if library version is greater than or equal to the given one +func checkVersionAbove(major, minor, micro uint) bool { + return (verMajor > major) || (verMajor == major && verMinor > minor) || - (verMajor == major && verMinor == minor && verMicro >= micro) { - return nil - } - return &VersionError{ - op: op, - major: major, - minor: minor, - micro: micro, - } + (verMajor == major && verMinor == minor && verMicro >= micro) } +// Ensure that the library is supported, i.e. >= 2.2.0. func ensureSupportedVersion() error { - return checkVersion("seccomp", 2, 3, 1) + if !checkVersionAbove(2, 2, 0) { + return VersionError{} + } + return nil } // Get the API level @@ -425,10 +406,8 @@ func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact b switch e := errRc(retCode); e { case syscall.EFAULT: return fmt.Errorf("unrecognized syscall %#x", int32(call)) - // libseccomp >= v2.5.0 returns EACCES, older versions return EPERM. - // TODO: remove EPERM once libseccomp < v2.5.0 is not supported. - case syscall.EPERM, syscall.EACCES: - return errDefAction + case syscall.EPERM: + return fmt.Errorf("requested action matches default action of filter") case syscall.EINVAL: return fmt.Errorf("two checks on same syscall argument") default: @@ -453,6 +432,14 @@ func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact b return err } } else { + // We don't support conditional filtering in library version v2.1 + if !checkVersionAbove(2, 2, 1) { + return VersionError{ + message: "conditional filtering is not supported", + minimum: "2.2.1", + } + } + argsArr := C.make_arg_cmp_array(C.uint(len(conds))) if argsArr == nil { return fmt.Errorf("error allocating memory for conditions") @@ -549,8 +536,6 @@ func archFromNative(a C.uint32_t) (ScmpArch, error) { return ArchPARISC, nil case C.C_ARCH_PARISC64: return ArchPARISC64, nil - case C.C_ARCH_RISCV64: - return ArchRISCV64, nil default: return 0x0, fmt.Errorf("unrecognized architecture %#x", uint32(a)) } @@ -595,8 +580,6 @@ func (a ScmpArch) toNative() C.uint32_t { return C.C_ARCH_PARISC case ArchPARISC64: return C.C_ARCH_PARISC64 - case ArchRISCV64: - return C.C_ARCH_RISCV64 case ArchNative: return C.C_ARCH_NATIVE default: @@ -629,6 +612,8 @@ func (a ScmpCompareOp) toNative() C.int { func actionFromNative(a C.uint32_t) (ScmpAction, error) { aTmp := a & 0xFFFF switch a & 0xFFFF0000 { + case C.C_ACT_KILL: + return ActKill, nil case C.C_ACT_KILL_PROCESS: return ActKillProcess, nil case C.C_ACT_KILL_THREAD: @@ -653,6 +638,8 @@ func actionFromNative(a C.uint32_t) (ScmpAction, error) { // Only use with sanitized actions, no error handling func (a ScmpAction) toNative() C.uint32_t { switch a & 0xFFFF { + case ActKill: + return C.C_ACT_KILL case ActKillProcess: return C.C_ACT_KILL_PROCESS case ActKillThread: @@ -689,15 +676,15 @@ func (a scmpFilterAttr) toNative() uint32 { return uint32(C.C_ATTRIBUTE_LOG) case filterAttrSSB: return uint32(C.C_ATTRIBUTE_SSB) - case filterAttrOptimize: - return uint32(C.C_ATTRIBUTE_OPTIMIZE) - case filterAttrRawRC: - return uint32(C.C_ATTRIBUTE_SYSRAWRC) default: return 0x0 } } +func (a ScmpSyscall) toNative() C.uint32_t { + return C.uint32_t(a) +} + func syscallFromNative(a C.int) ScmpSyscall { return ScmpSyscall(a) } @@ -737,34 +724,9 @@ func (scmpResp *ScmpNotifResp) toNative(resp *C.struct_seccomp_notif_resp) { resp.flags = C.__u32(scmpResp.Flags) } -// checkAPI checks that both the API level and the seccomp version is equal to -// or greater than the specified minLevel and major, minor, micro, -// respectively, and returns an error otherwise. Argument op is an arbitrary -// non-empty operation description, used as a part of the error message -// returned. -func checkAPI(op string, minLevel uint, major, minor, micro uint) error { - // Ignore error from getAPI, as it returns level == 0 in case of error. - level, _ := getAPI() - if level >= minLevel { - return checkVersion(op, major, minor, micro) - } - return &VersionError{ - op: op, - curAPI: level, - minAPI: minLevel, - major: major, - minor: minor, - micro: micro, - } -} - // Userspace Notification API // Calls to C.seccomp_notify* hidden from seccomp.go -func notifSupported() error { - return checkAPI("seccomp notification", 6, 2, 5, 0) -} - func (f *ScmpFilter) getNotifFd() (ScmpFd, error) { f.lock.Lock() defer f.lock.Unlock() @@ -772,8 +734,11 @@ func (f *ScmpFilter) getNotifFd() (ScmpFd, error) { if !f.valid { return -1, errBadFilter } - if err := notifSupported(); err != nil { - return -1, err + + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return -1, fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) } fd := C.seccomp_notify_fd(f.filterCtx) @@ -785,8 +750,10 @@ func notifReceive(fd ScmpFd) (*ScmpNotifReq, error) { var req *C.struct_seccomp_notif var resp *C.struct_seccomp_notif_resp - if err := notifSupported(); err != nil { - return nil, err + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return nil, fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) } // we only use the request here; the response is unused @@ -822,11 +789,13 @@ func notifRespond(fd ScmpFd, scmpResp *ScmpNotifResp) error { var req *C.struct_seccomp_notif var resp *C.struct_seccomp_notif_resp - if err := notifSupported(); err != nil { - return err + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) } - // we only use the response here; the request is discarded + // we only use the reponse here; the request is discarded if retCode := C.seccomp_notify_alloc(&req, &resp); retCode != 0 { return errRc(retCode) } @@ -858,8 +827,10 @@ func notifRespond(fd ScmpFd, scmpResp *ScmpNotifResp) error { } func notifIDValid(fd ScmpFd, id uint64) error { - if err := notifSupported(); err != nil { - return err + // Ignore error, if not supported returns apiLevel == 0 + apiLevel, _ := GetAPI() + if apiLevel < 6 { + return fmt.Errorf("seccomp notification requires API level >= 6; current level = %d", apiLevel) } for { diff --git a/vendor/modules.txt b/vendor/modules.txt index 5271e38fb2d..8c2f382ef91 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -36,10 +36,10 @@ github.com/moby/sys/mountinfo # github.com/mrunalp/fileutils v0.5.0 ## explicit; go 1.13 github.com/mrunalp/fileutils -# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 +# github.com/opencontainers/runtime-spec v1.0.3-0.20210910115017-0d6cc581aeea ## explicit github.com/opencontainers/runtime-spec/specs-go -# github.com/opencontainers/selinux v1.10.1 +# github.com/opencontainers/selinux v1.9.1 ## explicit; go 1.13 github.com/opencontainers/selinux/go-selinux github.com/opencontainers/selinux/go-selinux/label @@ -48,7 +48,7 @@ github.com/opencontainers/selinux/pkg/pwalkdir # github.com/russross/blackfriday/v2 v2.0.1 ## explicit github.com/russross/blackfriday/v2 -# github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 +# github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 ## explicit; go 1.14 github.com/seccomp/libseccomp-golang # github.com/shurcooL/sanitized_anchor_name v1.0.0