mirror of
https://github.com/opencontainers/runc.git
synced 2025-09-27 11:53:40 +08:00
libct: speedup process.Env handling
The current implementation sets all the environment variables passed in
Process.Env in the current process, one by one, then uses os.Environ to
read those back.
As pointed out in [1], this is slow, as runc calls os.Setenv for every
variable, and there may be a few thousands of those. Looking into how
os.Setenv is implemented, it is indeed slow, especially when cgo is
enabled.
Looking into why it was implemented the way it is, I found commit
9744d72c
and traced it to [2], which discusses the actual reasons.
It boils down to these two:
- HOME is not passed into container as it is set in setupUser by
os.Setenv and has no effect on config.Env;
- there is a need to deduplicate the environment variables.
Yet it was decided in [2] to not go ahead with this patch, but
later [3] was opened with the carry of this patch, and merged.
Now, from what I see:
1. Passing environment to exec is way faster than using os.Setenv and
os.Environ (tests show ~20x speed improvement in a simple Go test,
and ~3x improvement in real-world test, see below).
2. Setting environment variables in the runc context may result is some
ugly side effects (think GODEBUG, LD_PRELOAD, or _LIBCONTAINER_*).
3. Nothing in runtime spec says that the environment needs to be
deduplicated, or the order of preference (whether the first or the
last value of a variable with the same name is to be used). We should
stick to what we have in order to maintain backward compatibility.
So, this patch:
- switches to passing env directly to exec;
- adds deduplication mechanism to retain backward compatibility;
- takes care to set PATH from process.Env in the current process
(so that supplied PATH is used to find the binary to execute),
also to retain backward compatibility;
- adds HOME to process.Env if not set;
- ensures any StartContainer CommandHook entries with no environment
set explicitly are run with the same environment as before. Thanks
to @lifubang who noticed that peculiarity.
The benchmark added by the previous commit shows ~3x improvement:
│ before │ after │
│ sec/op │ sec/op vs base │
ExecInBigEnv-20 61.53m ± 1% 21.87m ± 16% -64.46% (p=0.000 n=10)
[1]: https://github.com/opencontainers/runc/pull/1983
[2]: https://github.com/docker-archive/libcontainer/pull/418
[3]: https://github.com/docker-archive/libcontainer/pull/432
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
59
libcontainer/env.go
Normal file
59
libcontainer/env.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// prepareEnv processes a list of environment variables, preparing it
|
||||
// for direct consumption by unix.Exec. In particular, it:
|
||||
// - validates each variable is in the NAME=VALUE format and
|
||||
// contains no \0 (nil) bytes;
|
||||
// - removes any duplicates (keeping only the last value for each key)
|
||||
// - sets PATH for the current process, if found in the list.
|
||||
//
|
||||
// It returns the deduplicated environment, a flag telling whether HOME
|
||||
// is present in the input, and an error.
|
||||
func prepareEnv(env []string) ([]string, bool, error) {
|
||||
if env == nil {
|
||||
return nil, false, nil
|
||||
}
|
||||
// Deduplication code based on dedupEnv from Go 1.22 os/exec.
|
||||
|
||||
// Construct the output in reverse order, to preserve the
|
||||
// last occurrence of each key.
|
||||
out := make([]string, 0, len(env))
|
||||
saw := make(map[string]bool, len(env))
|
||||
for n := len(env); n > 0; n-- {
|
||||
kv := env[n-1]
|
||||
i := strings.IndexByte(kv, '=')
|
||||
if i == -1 {
|
||||
return nil, false, errors.New("invalid environment variable: missing '='")
|
||||
}
|
||||
if i == 0 {
|
||||
return nil, false, errors.New("invalid environment variable: name cannot be empty")
|
||||
}
|
||||
key := kv[:i]
|
||||
if saw[key] { // Duplicate.
|
||||
continue
|
||||
}
|
||||
saw[key] = true
|
||||
if strings.IndexByte(kv, 0) >= 0 {
|
||||
return nil, false, fmt.Errorf("invalid environment variable %q: contains nul byte (\\x00)", key)
|
||||
}
|
||||
if key == "PATH" {
|
||||
// Needs to be set as it is used for binary lookup.
|
||||
if err := os.Setenv("PATH", kv[i+1:]); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
}
|
||||
out = append(out, kv)
|
||||
}
|
||||
// Restore the original order.
|
||||
slices.Reverse(out)
|
||||
|
||||
return out, saw["HOME"], nil
|
||||
}
|
40
libcontainer/env_test.go
Normal file
40
libcontainer/env_test.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPrepareEnvDedup(t *testing.T) {
|
||||
tests := []struct {
|
||||
env, wantEnv []string
|
||||
}{
|
||||
{
|
||||
env: []string{},
|
||||
wantEnv: []string{},
|
||||
},
|
||||
{
|
||||
env: []string{"HOME=/root", "FOO=bar"},
|
||||
wantEnv: []string{"HOME=/root", "FOO=bar"},
|
||||
},
|
||||
{
|
||||
env: []string{"A=a", "A=b", "A=c"},
|
||||
wantEnv: []string{"A=c"},
|
||||
},
|
||||
{
|
||||
env: []string{"TERM=vt100", "HOME=/home/one", "HOME=/home/two", "TERM=xterm", "HOME=/home/three", "FOO=bar"},
|
||||
wantEnv: []string{"TERM=xterm", "HOME=/home/three", "FOO=bar"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
env, _, err := prepareEnv(tc.env)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
continue
|
||||
}
|
||||
if !slices.Equal(env, tc.wantEnv) {
|
||||
t.Errorf("want %v, got %v", tc.wantEnv, env)
|
||||
}
|
||||
}
|
||||
}
|
@@ -11,7 +11,6 @@ import (
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/containerd/console"
|
||||
@@ -185,8 +184,8 @@ func startInitialization() (retErr error) {
|
||||
defer pidfdSocket.Close()
|
||||
}
|
||||
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
// From here on, we don't need current process environment. It is not
|
||||
// used directly anywhere below this point, but let's clear it anyway.
|
||||
os.Clearenv()
|
||||
|
||||
defer func() {
|
||||
@@ -209,9 +208,11 @@ func startInitialization() (retErr error) {
|
||||
}
|
||||
|
||||
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe *os.File) error {
|
||||
if err := populateProcessEnvironment(config.Env); err != nil {
|
||||
env, homeSet, err := prepareEnv(config.Env)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
config.Env = env
|
||||
|
||||
// Clean the RLIMIT_NOFILE cache in go runtime.
|
||||
// Issue: https://github.com/opencontainers/runc/issues/4195
|
||||
@@ -225,6 +226,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
|
||||
pidfdSocket: pidfdSocket,
|
||||
config: config,
|
||||
logPipe: logPipe,
|
||||
addHome: !homeSet,
|
||||
}
|
||||
return i.Init()
|
||||
case initStandard:
|
||||
@@ -236,36 +238,13 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
|
||||
config: config,
|
||||
fifoFile: fifoFile,
|
||||
logPipe: logPipe,
|
||||
addHome: !homeSet,
|
||||
}
|
||||
return i.Init()
|
||||
}
|
||||
return fmt.Errorf("unknown init type %q", t)
|
||||
}
|
||||
|
||||
// populateProcessEnvironment loads the provided environment variables into the
|
||||
// current processes's environment.
|
||||
func populateProcessEnvironment(env []string) error {
|
||||
for _, pair := range env {
|
||||
name, val, ok := strings.Cut(pair, "=")
|
||||
if !ok {
|
||||
return errors.New("invalid environment variable: missing '='")
|
||||
}
|
||||
if name == "" {
|
||||
return errors.New("invalid environment variable: name cannot be empty")
|
||||
}
|
||||
if strings.IndexByte(name, 0) >= 0 {
|
||||
return fmt.Errorf("invalid environment variable %q: name contains nul byte (\\x00)", name)
|
||||
}
|
||||
if strings.IndexByte(val, 0) >= 0 {
|
||||
return fmt.Errorf("invalid environment variable %q: value contains nul byte (\\x00)", name)
|
||||
}
|
||||
if err := os.Setenv(name, val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifyCwd ensures that the current directory is actually inside the mount
|
||||
// namespace root of the current process.
|
||||
func verifyCwd() error {
|
||||
@@ -294,8 +273,8 @@ func verifyCwd() error {
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaked file descriptors
|
||||
// before executing the command inside the namespace
|
||||
func finalizeNamespace(config *initConfig) error {
|
||||
// before executing the command inside the namespace.
|
||||
func finalizeNamespace(config *initConfig, addHome bool) error {
|
||||
// Ensure that all unwanted fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
@@ -341,7 +320,7 @@ func finalizeNamespace(config *initConfig) error {
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return fmt.Errorf("unable to set keep caps: %w", err)
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
if err := setupUser(config, addHome); err != nil {
|
||||
return fmt.Errorf("unable to setup user: %w", err)
|
||||
}
|
||||
// Change working directory AFTER the user has been set up, if we haven't done it yet.
|
||||
@@ -459,8 +438,9 @@ func syncParentSeccomp(pipe *syncSocket, seccompFd int) error {
|
||||
return readSync(pipe, procSeccompDone)
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *initConfig) error {
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container,
|
||||
// and appends user's HOME to config.Env if addHome is true.
|
||||
func setupUser(config *initConfig, addHome bool) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: 0,
|
||||
@@ -541,11 +521,9 @@ func setupUser(config *initConfig) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return err
|
||||
}
|
||||
// If we didn't get HOME already, set it based on the user's HOME.
|
||||
if addHome {
|
||||
config.Env = append(config.Env, "HOME="+execUser.Home)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@ type linuxSetnsInit struct {
|
||||
pidfdSocket *os.File
|
||||
config *initConfig
|
||||
logPipe *os.File
|
||||
addHome bool
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) getSessionRingName() string {
|
||||
@@ -101,7 +102,7 @@ func (l *linuxSetnsInit) Init() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
if err := finalizeNamespace(l.config, l.addHome); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
@@ -154,5 +155,5 @@ func (l *linuxSetnsInit) Init() error {
|
||||
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
|
||||
return err
|
||||
}
|
||||
return system.Exec(name, l.config.Args, os.Environ())
|
||||
return system.Exec(name, l.config.Args, l.config.Env)
|
||||
}
|
||||
|
@@ -27,6 +27,7 @@ type linuxStandardInit struct {
|
||||
fifoFile *os.File
|
||||
logPipe *os.File
|
||||
config *initConfig
|
||||
addHome bool
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
||||
@@ -186,7 +187,7 @@ func (l *linuxStandardInit) Init() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
if err := finalizeNamespace(l.config, l.addHome); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
@@ -194,6 +195,17 @@ func (l *linuxStandardInit) Init() error {
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return fmt.Errorf("can't restore pdeath signal: %w", err)
|
||||
}
|
||||
|
||||
// In case we have any StartContainer hooks to run, and they don't
|
||||
// have environment configured explicitly, make sure they will be run
|
||||
// with the same environment as container's init.
|
||||
//
|
||||
// NOTE the above described behavior is not part of runtime-spec, but
|
||||
// rather a de facto historical thing we afraid to change.
|
||||
if h := l.config.Config.Hooks[configs.StartContainer]; len(h) > 0 {
|
||||
h.SetDefaultEnv(l.config.Env)
|
||||
}
|
||||
|
||||
// Compare the parent from the initial start of the init process and make
|
||||
// sure that it did not change. if the parent changes that means it died
|
||||
// and we were reparented to something else so we should just kill ourself
|
||||
@@ -285,5 +297,5 @@ func (l *linuxStandardInit) Init() error {
|
||||
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
|
||||
return err
|
||||
}
|
||||
return system.Exec(name, l.config.Args, os.Environ())
|
||||
return system.Exec(name, l.config.Args, l.config.Env)
|
||||
}
|
||||
|
Reference in New Issue
Block a user