mirror of
				https://github.com/opencontainers/runc.git
				synced 2025-10-26 00:50:52 +08:00 
			
		
		
		
	Refactor init actions into separate types
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
		
							
								
								
									
										75
									
								
								configs/validate/config.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								configs/validate/config.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| package validate | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"path/filepath" | ||||
|  | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| ) | ||||
|  | ||||
| type Validator interface { | ||||
| 	Validate(*configs.Config) error | ||||
| } | ||||
|  | ||||
| func New() Validator { | ||||
| 	return &ConfigValidator{} | ||||
| } | ||||
|  | ||||
| type ConfigValidator struct { | ||||
| } | ||||
|  | ||||
| func (v *ConfigValidator) Validate(config *configs.Config) error { | ||||
| 	if err := v.rootfs(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := v.network(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := v.hostname(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := v.security(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // rootfs validates the the rootfs is an absolute path and is not a symlink | ||||
| // to the container's root filesystem. | ||||
| func (v *ConfigValidator) rootfs(config *configs.Config) error { | ||||
| 	cleaned, err := filepath.Abs(config.Rootfs) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if config.Rootfs != cleaned { | ||||
| 		return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (v *ConfigValidator) network(config *configs.Config) error { | ||||
| 	if !config.Namespaces.Contains(configs.NEWNET) { | ||||
| 		if len(config.Networks) > 0 || len(config.Routes) > 0 { | ||||
| 			return fmt.Errorf("unable to apply network settings without a private NET namespace") | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (v *ConfigValidator) hostname(config *configs.Config) error { | ||||
| 	if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { | ||||
| 		return fmt.Errorf("unable to set hostname without a private UTS namespace") | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (v *ConfigValidator) security(config *configs.Config) error { | ||||
| 	// restrict sys without mount namespace | ||||
| 	if config.RestrictSys && !config.Namespaces.Contains(configs.NEWNS) { | ||||
| 		return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
							
								
								
									
										8
									
								
								error.go
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								error.go
									
									
									
									
									
								
							| @@ -54,3 +54,11 @@ type Error interface { | ||||
| 	// Returns the error code for this error. | ||||
| 	Code() ErrorCode | ||||
| } | ||||
|  | ||||
| type initError struct { | ||||
| 	Message string `json:"message,omitempty"` | ||||
| } | ||||
|  | ||||
| func (i initError) Error() string { | ||||
| 	return i.Message | ||||
| } | ||||
|   | ||||
| @@ -22,12 +22,8 @@ const ( | ||||
| 	EXIT_SIGNAL_OFFSET = 128 | ||||
| ) | ||||
|  | ||||
| type initError struct { | ||||
| 	Message string `json:"message,omitempty"` | ||||
| } | ||||
|  | ||||
| func (i initError) Error() string { | ||||
| 	return i.Message | ||||
| type pid struct { | ||||
| 	Pid int `json:"Pid"` | ||||
| } | ||||
|  | ||||
| type linuxContainer struct { | ||||
| @@ -97,6 +93,21 @@ func (c *linuxContainer) Start(process *Process) (int, error) { | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	cmd := c.commandTemplate(process) | ||||
| 	if status != configs.Destroyed { | ||||
| 		// TODO: (crosbymichael) check out console use for execin | ||||
| 		return c.startNewProcess(cmd, process.Args) | ||||
| 		//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) | ||||
| 	} | ||||
| 	if err := c.startInitialProcess(cmd, process.Args); err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	return c.state.InitPid, nil | ||||
| } | ||||
|  | ||||
| // commandTemplate creates a template *exec.Cmd.  It uses the init arguments provided | ||||
| // to the factory and attaches IO to the process. | ||||
| func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd { | ||||
| 	cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) | ||||
| 	cmd.Stdin = process.Stdin | ||||
| 	cmd.Stdout = process.Stdout | ||||
| @@ -108,32 +119,26 @@ func (c *linuxContainer) Start(process *Process) (int, error) { | ||||
| 	} | ||||
| 	// TODO: add pdeath to config for a container | ||||
| 	cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL | ||||
| 	if status != configs.Destroyed { | ||||
| 		glog.Info("start new container process") | ||||
| 		// TODO: (crosbymichael) check out console use for execin | ||||
| 		//return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) | ||||
| 		return c.startNewProcess(cmd, process.Args) | ||||
| 	} | ||||
| 	if err := c.startInitProcess(cmd, process.Args); err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	return c.state.InitPid, nil | ||||
| 	return cmd | ||||
| } | ||||
|  | ||||
| // startNewProcess adds another process to an already running container | ||||
| func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) { | ||||
| 	var err error | ||||
| 	glog.Info("start new container process") | ||||
| 	parent, child, err := newInitPipe() | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	defer parent.Close() | ||||
| 	cmd.ExtraFiles = []*os.File{child} | ||||
| 	cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid)) | ||||
| 	if err := cmd.Start(); err != nil { | ||||
| 	cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid), "_LIBCONTAINER_INITTYPE=setns") | ||||
|  | ||||
| 	// start the command | ||||
| 	err = cmd.Start() | ||||
| 	child.Close() | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| 	} | ||||
| 	child.Close() | ||||
| 	s, err := cmd.Process.Wait() | ||||
| 	if err != nil { | ||||
| 		return -1, err | ||||
| @@ -152,29 +157,28 @@ func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, err | ||||
| 	} | ||||
| 	terminate := func(terr error) (int, error) { | ||||
| 		// TODO: log the errors for kill and wait | ||||
| 		p.Kill() | ||||
| 		p.Wait() | ||||
| 		if err := p.Kill(); err != nil { | ||||
| 			glog.Warning(err) | ||||
| 		} | ||||
| 		if _, err := p.Wait(); err != nil { | ||||
| 			glog.Warning(err) | ||||
| 		} | ||||
| 		return -1, terr | ||||
| 	} | ||||
| 	// Enter cgroups. | ||||
| 	if err := c.enterCgroups(pid.Pid); err != nil { | ||||
| 		return terminate(err) | ||||
| 	} | ||||
| 	encoder := json.NewEncoder(parent) | ||||
| 	if err := encoder.Encode(c.config); err != nil { | ||||
| 		return terminate(err) | ||||
| 	} | ||||
| 	process := processArgs{ | ||||
| 	if err := json.NewEncoder(parent).Encode(&initConfig{ | ||||
| 		Config: c.config, | ||||
| 		Args:   args, | ||||
| 	} | ||||
| 	if err := encoder.Encode(process); err != nil { | ||||
| 	}); err != nil { | ||||
| 		return terminate(err) | ||||
| 	} | ||||
| 	return pid.Pid, nil | ||||
| } | ||||
|  | ||||
| func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { | ||||
| func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, args []string) error { | ||||
| 	glog.Info("starting container initial process") | ||||
| 	// create a pipe so that we can syncronize with the namespaced process and | ||||
| 	// pass the state and configuration to the child process | ||||
| 	parent, child, err := newInitPipe() | ||||
| @@ -184,6 +188,9 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { | ||||
| 	defer parent.Close() | ||||
| 	cmd.ExtraFiles = []*os.File{child} | ||||
| 	cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags() | ||||
| 	cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard") | ||||
| 	// if the container is configured to use user namespaces we have to setup the | ||||
| 	// uid:gid mapping on the command. | ||||
| 	if c.config.Namespaces.Contains(configs.NEWUSER) { | ||||
| 		addUidGidMappings(cmd.SysProcAttr, c.config) | ||||
| 		// Default to root user when user namespaces are enabled. | ||||
| @@ -191,7 +198,6 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { | ||||
| 			cmd.SysProcAttr.Credential = &syscall.Credential{} | ||||
| 		} | ||||
| 	} | ||||
| 	glog.Info("starting container init process") | ||||
| 	err = cmd.Start() | ||||
| 	child.Close() | ||||
| 	if err != nil { | ||||
| @@ -199,12 +205,15 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { | ||||
| 	} | ||||
| 	wait := func() (*os.ProcessState, error) { | ||||
| 		ps, err := cmd.Process.Wait() | ||||
| 		if err != nil { | ||||
| 			return nil, newGenericError(err, SystemError) | ||||
| 		} | ||||
| 		// we should kill all processes in cgroup when init is died if we use | ||||
| 		// host PID namespace | ||||
| 		if !c.config.Namespaces.Contains(configs.NEWPID) { | ||||
| 			c.killAllPids() | ||||
| 		} | ||||
| 		return ps, newGenericError(err, SystemError) | ||||
| 		return ps, nil | ||||
| 	} | ||||
| 	terminate := func(terr error) error { | ||||
| 		// TODO: log the errors for kill and wait | ||||
| @@ -230,19 +239,19 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { | ||||
| 	if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil { | ||||
| 		return terminate(err) | ||||
| 	} | ||||
| 	process := processArgs{ | ||||
| 	iconfig := &initConfig{ | ||||
| 		Args:         args, | ||||
| 		Config:       c.config, | ||||
| 		NetworkState: &networkState, | ||||
| 	} | ||||
| 	// Start the setup process to setup the init process | ||||
| 	if c.config.Namespaces.Contains(configs.NEWUSER) { | ||||
| 		if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil { | ||||
| 		if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil { | ||||
| 			return terminate(err) | ||||
| 		} | ||||
| 	} | ||||
| 	// send the state to the container's init process then shutdown writes for the parent | ||||
| 	if err := json.NewEncoder(parent).Encode(process); err != nil { | ||||
| 	if err := json.NewEncoder(parent).Encode(iconfig); err != nil { | ||||
| 		return terminate(err) | ||||
| 	} | ||||
| 	// shutdown writes for the parent side of the pipe | ||||
| @@ -258,12 +267,10 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { | ||||
| 	if ierr != nil { | ||||
| 		return terminate(ierr) | ||||
| 	} | ||||
|  | ||||
| 	c.state.InitPid = cmd.Process.Pid | ||||
| 	c.state.InitStartTime = started | ||||
| 	c.state.NetworkState = networkState | ||||
| 	c.state.CgroupPaths = c.cgroupManager.GetPaths() | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| @@ -386,7 +393,7 @@ func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.N | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { | ||||
| func executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error { | ||||
| 	command := exec.Command(args[0], args[1:]...) | ||||
| 	parent, child, err := newInitPipe() | ||||
| 	if err != nil { | ||||
| @@ -397,7 +404,7 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process | ||||
| 	command.Dir = container.Rootfs | ||||
| 	command.Env = append(command.Env, | ||||
| 		fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), | ||||
| 		fmt.Sprintf("_LIBCONTAINER_USERNS=1")) | ||||
| 		fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_sidecar")) | ||||
| 	err = command.Start() | ||||
| 	child.Close() | ||||
| 	if err != nil { | ||||
| @@ -452,10 +459,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| type pid struct { | ||||
| 	Pid int `json:"Pid"` | ||||
| } | ||||
|  | ||||
| func (c *linuxContainer) enterCgroups(pid int) error { | ||||
| 	return cgroups.EnterPid(c.state.CgroupPaths, pid) | ||||
| } | ||||
|   | ||||
							
								
								
									
										509
									
								
								linux_factory.go
									
									
									
									
									
								
							
							
						
						
									
										509
									
								
								linux_factory.go
									
									
									
									
									
								
							| @@ -9,24 +9,12 @@ import ( | ||||
| 	"os" | ||||
| 	"path/filepath" | ||||
| 	"regexp" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
|  | ||||
| 	"github.com/golang/glog" | ||||
|  | ||||
| 	"github.com/docker/libcontainer/apparmor" | ||||
| 	cgroups "github.com/docker/libcontainer/cgroups/manager" | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| 	"github.com/docker/libcontainer/console" | ||||
| 	"github.com/docker/libcontainer/label" | ||||
| 	"github.com/docker/libcontainer/mount" | ||||
| 	"github.com/docker/libcontainer/netlink" | ||||
| 	"github.com/docker/libcontainer/network" | ||||
| 	"github.com/docker/libcontainer/security/capabilities" | ||||
| 	"github.com/docker/libcontainer/security/restrict" | ||||
| 	"github.com/docker/libcontainer/system" | ||||
| 	"github.com/docker/libcontainer/user" | ||||
| 	"github.com/docker/libcontainer/utils" | ||||
| 	"github.com/docker/libcontainer/configs/validate" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| @@ -39,13 +27,6 @@ var ( | ||||
| 	maxIdLen = 1024 | ||||
| ) | ||||
|  | ||||
| // Process is used for transferring parameters from Exec() to Init() | ||||
| type processArgs struct { | ||||
| 	Args         []string              `json:"args,omitempty"` | ||||
| 	Config       *configs.Config       `json:"config,omitempty"` | ||||
| 	NetworkState *configs.NetworkState `json:"network_state,omitempty"` | ||||
| } | ||||
|  | ||||
| // New returns a linux based container factory based in the root directory. | ||||
| func New(root string, initArgs []string) (Factory, error) { | ||||
| 	if root != "" { | ||||
| @@ -56,6 +37,7 @@ func New(root string, initArgs []string) (Factory, error) { | ||||
| 	return &linuxFactory{ | ||||
| 		root:      root, | ||||
| 		initArgs:  initArgs, | ||||
| 		validator: validate.New(), | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
| @@ -64,6 +46,7 @@ type linuxFactory struct { | ||||
| 	// root is the root directory | ||||
| 	root      string | ||||
| 	initArgs  []string | ||||
| 	validator validate.Validator | ||||
| } | ||||
|  | ||||
| func (l *linuxFactory) Create(id string, config *configs.Config) (Container, error) { | ||||
| @@ -73,6 +56,9 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err | ||||
| 	if err := l.validateID(id); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if err := l.validator.Validate(config); err != nil { | ||||
| 		return nil, newGenericError(err, ConfigInvalid) | ||||
| 	} | ||||
| 	containerRoot := filepath.Join(l.root, id) | ||||
| 	if _, err := os.Stat(containerRoot); err == nil { | ||||
| 		return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) | ||||
| @@ -96,14 +82,13 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err | ||||
| 		os.RemoveAll(containerRoot) | ||||
| 		return nil, newGenericError(err, SystemError) | ||||
| 	} | ||||
| 	cgroupManager := cgroups.NewCgroupManager(config.Cgroups) | ||||
| 	return &linuxContainer{ | ||||
| 		id:            id, | ||||
| 		root:          containerRoot, | ||||
| 		config:        config, | ||||
| 		initArgs:      l.initArgs, | ||||
| 		state:         &configs.State{}, | ||||
| 		cgroupManager: cgroupManager, | ||||
| 		cgroupManager: cgroups.NewCgroupManager(config.Cgroups), | ||||
| 	}, nil | ||||
| } | ||||
|  | ||||
| @@ -137,12 +122,13 @@ func (l *linuxFactory) Load(id string) (Container, error) { | ||||
| // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state | ||||
| // This is a low level implementation detail of the reexec and should not be consumed externally | ||||
| func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { | ||||
| 	pipe := os.NewFile(uintptr(pipefd), "pipe") | ||||
| 	setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != "" | ||||
| 	pid := os.Getenv("_LIBCONTAINER_INITPID") | ||||
| 	if pid != "" && !setupUserns { | ||||
| 		return initIn(pipe) | ||||
| 	} | ||||
| 	var ( | ||||
| 		pipe = os.NewFile(uintptr(pipefd), "pipe") | ||||
| 		it   = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) | ||||
| 	) | ||||
| 	// clear the current process's environment to clean any libcontainer | ||||
| 	// specific env vars. | ||||
| 	os.Clearenv() | ||||
| 	defer func() { | ||||
| 		// if we have an error during the initialization of the container's init then send it back to the | ||||
| 		// parent process in the form of an initError. | ||||
| @@ -159,27 +145,11 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { | ||||
| 		// ensure that this pipe is always closed | ||||
| 		pipe.Close() | ||||
| 	}() | ||||
| 	uncleanRootfs, err := os.Getwd() | ||||
| 	i, err := newContainerInit(it, pipe) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	var process *processArgs | ||||
| 	// We always read this as it is a way to sync with the parent as well | ||||
| 	if err := json.NewDecoder(pipe).Decode(&process); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if setupUserns { | ||||
| 		err = setupContainer(process) | ||||
| 		if err == nil { | ||||
| 			os.Exit(0) | ||||
| 		} else { | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 	} | ||||
| 	if process.Config.Namespaces.Contains(configs.NEWUSER) { | ||||
| 		return l.initUserNs(uncleanRootfs, process) | ||||
| 	} | ||||
| 	return l.initDefault(uncleanRootfs, process) | ||||
| 	return i.Init() | ||||
| } | ||||
|  | ||||
| func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { | ||||
| @@ -223,450 +193,3 @@ func (l *linuxFactory) validateID(id string) error { | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) { | ||||
| 	config := process.Config | ||||
| 	networkState := process.NetworkState | ||||
|  | ||||
| 	// TODO: move to validation | ||||
| 	/* | ||||
| 		rootfs, err := utils.ResolveRootfs(uncleanRootfs) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	*/ | ||||
|  | ||||
| 	// clear the current processes env and replace it with the environment | ||||
| 	// defined on the container | ||||
| 	if err := loadContainerEnvironment(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// join any namespaces via a path to the namespace fd if provided | ||||
| 	if err := joinExistingNamespaces(config.Namespaces); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if config.Console != "" { | ||||
| 		if err := console.OpenAndDup(config.Console); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if _, err := syscall.Setsid(); err != nil { | ||||
| 		return fmt.Errorf("setsid %s", err) | ||||
| 	} | ||||
| 	if config.Console != "" { | ||||
| 		if err := system.Setctty(); err != nil { | ||||
| 			return fmt.Errorf("setctty %s", err) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	cloneFlags := config.Namespaces.CloneFlags() | ||||
| 	if (cloneFlags & syscall.CLONE_NEWNET) == 0 { | ||||
| 		if len(config.Networks) != 0 || len(config.Routes) != 0 { | ||||
| 			return fmt.Errorf("unable to apply network parameters without network namespace") | ||||
| 		} | ||||
| 	} else { | ||||
| 		if err := setupNetwork(config, networkState); err != nil { | ||||
| 			return fmt.Errorf("setup networking %s", err) | ||||
| 		} | ||||
| 		if err := setupRoute(config); err != nil { | ||||
| 			return fmt.Errorf("setup route %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := setupRlimits(config); err != nil { | ||||
| 		return fmt.Errorf("setup rlimits %s", err) | ||||
| 	} | ||||
| 	label.Init() | ||||
| 	// InitializeMountNamespace() can be executed only for a new mount namespace | ||||
| 	if (cloneFlags & syscall.CLONE_NEWNS) != 0 { | ||||
| 		if err := mount.InitializeMountNamespace(config); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if config.Hostname != "" { | ||||
| 		// TODO: (crosbymichael) move this to pre spawn validation | ||||
| 		if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { | ||||
| 			return fmt.Errorf("unable to set the hostname without UTS namespace") | ||||
| 		} | ||||
| 		if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { | ||||
| 			return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { | ||||
| 		return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) | ||||
| 	} | ||||
| 	if err := label.SetProcessLabel(config.ProcessLabel); err != nil { | ||||
| 		return fmt.Errorf("set process label %s", err) | ||||
| 	} | ||||
| 	// TODO: (crosbymichael) make this configurable at the Config level | ||||
| 	if config.RestrictSys { | ||||
| 		if (cloneFlags & syscall.CLONE_NEWNS) == 0 { | ||||
| 			return fmt.Errorf("unable to restrict access to kernel files without mount namespace") | ||||
| 		} | ||||
| 		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	pdeathSignal, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get parent death signal %s", err) | ||||
| 	} | ||||
| 	if err := finalizeNamespace(config); err != nil { | ||||
| 		return fmt.Errorf("finalize namespace %s", err) | ||||
| 	} | ||||
| 	// finalizeNamespace can change user/group which clears the parent death | ||||
| 	// signal, so we restore it here. | ||||
| 	if err := restoreParentDeathSignal(pdeathSignal); err != nil { | ||||
| 		return fmt.Errorf("restore parent death signal %s", err) | ||||
| 	} | ||||
| 	return system.Execv(process.Args[0], process.Args[0:], config.Env) | ||||
| } | ||||
|  | ||||
| func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) { | ||||
| 	config := process.Config | ||||
| 	// clear the current processes env and replace it with the environment | ||||
| 	// defined on the config | ||||
| 	if err := loadContainerEnvironment(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// join any namespaces via a path to the namespace fd if provided | ||||
| 	if err := joinExistingNamespaces(config.Namespaces); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if config.Console != "" { | ||||
| 		if err := console.OpenAndDup("/dev/console"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if _, err := syscall.Setsid(); err != nil { | ||||
| 		return fmt.Errorf("setsid %s", err) | ||||
| 	} | ||||
| 	if config.Console != "" { | ||||
| 		if err := system.Setctty(); err != nil { | ||||
| 			return fmt.Errorf("setctty %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 	if config.WorkingDir == "" { | ||||
| 		config.WorkingDir = "/" | ||||
| 	} | ||||
| 	if err := setupRlimits(config); err != nil { | ||||
| 		return fmt.Errorf("setup rlimits %s", err) | ||||
| 	} | ||||
| 	cloneFlags := config.Namespaces.CloneFlags() | ||||
| 	if config.Hostname != "" { | ||||
| 		// TODO: move validation | ||||
| 		if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { | ||||
| 			return fmt.Errorf("unable to set the hostname without UTS namespace") | ||||
| 		} | ||||
| 		if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { | ||||
| 			return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { | ||||
| 		return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) | ||||
| 	} | ||||
| 	if err := label.SetProcessLabel(config.ProcessLabel); err != nil { | ||||
| 		return fmt.Errorf("set process label %s", err) | ||||
| 	} | ||||
| 	if config.RestrictSys { | ||||
| 		if (cloneFlags & syscall.CLONE_NEWNS) == 0 { | ||||
| 			return fmt.Errorf("unable to restrict access to kernel files without mount namespace") | ||||
| 		} | ||||
| 		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	pdeathSignal, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get parent death signal %s", err) | ||||
| 	} | ||||
| 	if err := finalizeNamespace(config); err != nil { | ||||
| 		return fmt.Errorf("finalize namespace %s", err) | ||||
| 	} | ||||
| 	// finalizeNamespace can change user/group which clears the parent death | ||||
| 	// signal, so we restore it here. | ||||
| 	if err := restoreParentDeathSignal(pdeathSignal); err != nil { | ||||
| 		return fmt.Errorf("restore parent death signal %s", err) | ||||
| 	} | ||||
| 	return system.Execv(process.Args[0], process.Args[0:], config.Env) | ||||
| } | ||||
|  | ||||
| // restoreParentDeathSignal sets the parent death signal to old. | ||||
| func restoreParentDeathSignal(old int) error { | ||||
| 	if old == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	current, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get parent death signal %s", err) | ||||
| 	} | ||||
| 	if old == current { | ||||
| 		return nil | ||||
| 	} | ||||
| 	if err := system.ParentDeathSignal(uintptr(old)); err != nil { | ||||
| 		return fmt.Errorf("set parent death signal %s", err) | ||||
| 	} | ||||
| 	// Signal self if parent is already dead. Does nothing if running in a new | ||||
| 	// PID namespace, as Getppid will always return 0. | ||||
| 	if syscall.Getppid() == 1 { | ||||
| 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // setupUser changes the groups, gid, and uid for the user inside the container | ||||
| func setupUser(config *configs.Config) error { | ||||
| 	// Set up defaults. | ||||
| 	defaultExecUser := user.ExecUser{ | ||||
| 		Uid:  syscall.Getuid(), | ||||
| 		Gid:  syscall.Getgid(), | ||||
| 		Home: "/", | ||||
| 	} | ||||
| 	passwdPath, err := user.GetPasswdPath() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	groupPath, err := user.GetGroupPath() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get supplementary groups %s", err) | ||||
| 	} | ||||
| 	suppGroups := append(execUser.Sgids, config.AdditionalGroups...) | ||||
| 	if err := syscall.Setgroups(suppGroups); err != nil { | ||||
| 		return fmt.Errorf("setgroups %s", err) | ||||
| 	} | ||||
| 	if err := system.Setgid(execUser.Gid); err != nil { | ||||
| 		return fmt.Errorf("setgid %s", err) | ||||
| 	} | ||||
| 	if err := system.Setuid(execUser.Uid); err != nil { | ||||
| 		return fmt.Errorf("setuid %s", err) | ||||
| 	} | ||||
| 	// if we didn't get HOME already, set it based on the user's HOME | ||||
| 	if envHome := os.Getenv("HOME"); envHome == "" { | ||||
| 		if err := os.Setenv("HOME", execUser.Home); err != nil { | ||||
| 			return fmt.Errorf("set HOME %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // setupVethNetwork uses the Network config if it is not nil to initialize | ||||
| // the new veth interface inside the container for use by changing the name to eth0 | ||||
| // setting the MTU and IP address along with the default gateway | ||||
| func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { | ||||
| 	for _, config := range config.Networks { | ||||
| 		strategy, err := network.GetStrategy(config.Type) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		err1 := strategy.Initialize(config, networkState) | ||||
| 		if err1 != nil { | ||||
| 			return err1 | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func setupRoute(config *configs.Config) error { | ||||
| 	for _, config := range config.Routes { | ||||
| 		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func setupRlimits(config *configs.Config) error { | ||||
| 	for _, rlimit := range config.Rlimits { | ||||
| 		l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} | ||||
| 		if err := syscall.Setrlimit(rlimit.Type, l); err != nil { | ||||
| 			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // finalizeNamespace drops the caps, sets the correct user | ||||
| // and working dir, and closes any leaky file descriptors | ||||
| // before execing the command inside the namespace | ||||
| func finalizeNamespace(config *configs.Config) error { | ||||
| 	// Ensure that all non-standard fds we may have accidentally | ||||
| 	// inherited are marked close-on-exec so they stay out of the | ||||
| 	// container | ||||
| 	if err := utils.CloseExecFrom(3); err != nil { | ||||
| 		return fmt.Errorf("close open file descriptors %s", err) | ||||
| 	} | ||||
| 	// drop capabilities in bounding set before changing user | ||||
| 	if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { | ||||
| 		return fmt.Errorf("drop bounding set %s", err) | ||||
| 	} | ||||
| 	// preserve existing capabilities while we change users | ||||
| 	if err := system.SetKeepCaps(); err != nil { | ||||
| 		return fmt.Errorf("set keep caps %s", err) | ||||
| 	} | ||||
| 	if err := setupUser(config); err != nil { | ||||
| 		return fmt.Errorf("setup user %s", err) | ||||
| 	} | ||||
| 	if err := system.ClearKeepCaps(); err != nil { | ||||
| 		return fmt.Errorf("clear keep caps %s", err) | ||||
| 	} | ||||
| 	// drop all other capabilities | ||||
| 	if err := capabilities.DropCapabilities(config.Capabilities); err != nil { | ||||
| 		return fmt.Errorf("drop capabilities %s", err) | ||||
| 	} | ||||
| 	if config.WorkingDir != "" { | ||||
| 		if err := syscall.Chdir(config.WorkingDir); err != nil { | ||||
| 			return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func loadContainerEnvironment(config *configs.Config) error { | ||||
| 	os.Clearenv() | ||||
| 	for _, pair := range config.Env { | ||||
| 		p := strings.SplitN(pair, "=", 2) | ||||
| 		if len(p) < 2 { | ||||
| 			return fmt.Errorf("invalid environment '%v'", pair) | ||||
| 		} | ||||
| 		if err := os.Setenv(p[0], p[1]); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // joinExistingNamespaces gets all the namespace paths specified for the container and | ||||
| // does a setns on the namespace fd so that the current process joins the namespace. | ||||
| func joinExistingNamespaces(namespaces []configs.Namespace) error { | ||||
| 	for _, ns := range namespaces { | ||||
| 		if ns.Path != "" { | ||||
| 			f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			err = system.Setns(f.Fd(), uintptr(ns.Syscall())) | ||||
| 			f.Close() | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // setupContainer is run to setup mounts and networking related operations | ||||
| // for a user namespace enabled process as a user namespace root doesn't | ||||
| // have permissions to perform these operations. | ||||
| // The setup process joins all the namespaces of user namespace enabled init | ||||
| // except the user namespace, so it run as root in the root user namespace | ||||
| // to perform these operations. | ||||
| func setupContainer(process *processArgs) error { | ||||
| 	container := process.Config | ||||
| 	networkState := process.NetworkState | ||||
|  | ||||
| 	// TODO : move to validation | ||||
| 	/* | ||||
| 		rootfs, err := utils.ResolveRootfs(container.Rootfs) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	*/ | ||||
|  | ||||
| 	// clear the current processes env and replace it with the environment | ||||
| 	// defined on the container | ||||
| 	if err := loadContainerEnvironment(container); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	cloneFlags := container.Namespaces.CloneFlags() | ||||
| 	if (cloneFlags & syscall.CLONE_NEWNET) == 0 { | ||||
| 		if len(container.Networks) != 0 || len(container.Routes) != 0 { | ||||
| 			return fmt.Errorf("unable to apply network parameters without network namespace") | ||||
| 		} | ||||
| 	} else { | ||||
| 		if err := setupNetwork(container, networkState); err != nil { | ||||
| 			return fmt.Errorf("setup networking %s", err) | ||||
| 		} | ||||
| 		if err := setupRoute(container); err != nil { | ||||
| 			return fmt.Errorf("setup route %s", err) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	label.Init() | ||||
|  | ||||
| 	// InitializeMountNamespace() can be executed only for a new mount namespace | ||||
| 	if (cloneFlags & syscall.CLONE_NEWNS) != 0 { | ||||
| 		if err := mount.InitializeMountNamespace(container); err != nil { | ||||
| 			return fmt.Errorf("setup mount namespace %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Finalize entering into a container and execute a specified command | ||||
| func initIn(pipe *os.File) (err error) { | ||||
| 	defer func() { | ||||
| 		// if we have an error during the initialization of the container's init then send it back to the | ||||
| 		// parent process in the form of an initError. | ||||
| 		if err != nil { | ||||
| 			// ensure that any data sent from the parent is consumed so it doesn't | ||||
| 			// receive ECONNRESET when the child writes to the pipe. | ||||
| 			ioutil.ReadAll(pipe) | ||||
| 			if err := json.NewEncoder(pipe).Encode(initError{ | ||||
| 				Message: err.Error(), | ||||
| 			}); err != nil { | ||||
| 				panic(err) | ||||
| 			} | ||||
| 		} | ||||
| 		// ensure that this pipe is always closed | ||||
| 		pipe.Close() | ||||
| 	}() | ||||
| 	decoder := json.NewDecoder(pipe) | ||||
| 	var config *configs.Config | ||||
| 	if err := decoder.Decode(&config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	var process *processArgs | ||||
| 	if err := decoder.Decode(&process); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := finalizeSetns(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	panic("unreachable") | ||||
| } | ||||
|  | ||||
| // finalize expects that the setns calls have been setup and that is has joined an | ||||
| // existing namespace | ||||
| func finalizeSetns(container *configs.Config) error { | ||||
| 	// clear the current processes env and replace it with the environment defined on the container | ||||
| 	if err := loadContainerEnvironment(container); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	if err := setupRlimits(container); err != nil { | ||||
| 		return fmt.Errorf("setup rlimits %s", err) | ||||
| 	} | ||||
|  | ||||
| 	if err := finalizeNamespace(container); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { | ||||
| 		return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) | ||||
| 	} | ||||
|  | ||||
| 	if container.ProcessLabel != "" { | ||||
| 		if err := label.SetProcessLabel(container.ProcessLabel); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|   | ||||
							
								
								
									
										216
									
								
								linux_init.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										216
									
								
								linux_init.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,216 @@ | ||||
| // +build linux | ||||
|  | ||||
| package libcontainer | ||||
|  | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
|  | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| 	"github.com/docker/libcontainer/netlink" | ||||
| 	"github.com/docker/libcontainer/network" | ||||
| 	"github.com/docker/libcontainer/security/capabilities" | ||||
| 	"github.com/docker/libcontainer/system" | ||||
| 	"github.com/docker/libcontainer/user" | ||||
| 	"github.com/docker/libcontainer/utils" | ||||
| ) | ||||
|  | ||||
| type initType string | ||||
|  | ||||
| const ( | ||||
| 	initSetns         initType = "setns" | ||||
| 	initStandard      initType = "standard" | ||||
| 	initUserns        initType = "userns" | ||||
| 	initUsernsSideCar initType = "userns_sidecar" | ||||
| ) | ||||
|  | ||||
| // Process is used for transferring parameters from Exec() to Init() | ||||
| type initConfig struct { | ||||
| 	Args         []string              `json:"args,omitempty"` | ||||
| 	Config       *configs.Config       `json:"config,omitempty"` | ||||
| 	NetworkState *configs.NetworkState `json:"network_state,omitempty"` | ||||
| } | ||||
|  | ||||
| type initer interface { | ||||
| 	Init() error | ||||
| } | ||||
|  | ||||
| func newContainerInit(t initType, pipe *os.File) (initer, error) { | ||||
| 	var config *initConfig | ||||
| 	if err := json.NewDecoder(pipe).Decode(&config); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if err := populateProcessEnvironment(config.Config.Env); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	switch t { | ||||
| 	case initSetns: | ||||
| 		return &linuxSetnsInit{ | ||||
| 			args:   config.Args, | ||||
| 			config: config.Config, | ||||
| 		}, nil | ||||
| 	case initUserns: | ||||
| 		return &linuxUsernsInit{ | ||||
| 			args:   config.Args, | ||||
| 			config: config.Config, | ||||
| 		}, nil | ||||
| 	case initUsernsSideCar: | ||||
| 		return &linuxUsernsSideCar{ | ||||
| 			config:  config.Config, | ||||
| 			network: config.NetworkState, | ||||
| 		}, nil | ||||
| 	case initStandard: | ||||
| 		return &linuxStandardInit{ | ||||
| 			config: config, | ||||
| 		}, nil | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("unknown init type %q", t) | ||||
| } | ||||
|  | ||||
| // populateProcessEnvironment loads the provided environment variables into the | ||||
| // current processes's environment. | ||||
| func populateProcessEnvironment(env []string) error { | ||||
| 	for _, pair := range env { | ||||
| 		p := strings.SplitN(pair, "=", 2) | ||||
| 		if len(p) < 2 { | ||||
| 			return fmt.Errorf("invalid environment '%v'", pair) | ||||
| 		} | ||||
| 		if err := os.Setenv(p[0], p[1]); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // finalizeNamespace drops the caps, sets the correct user | ||||
| // and working dir, and closes any leaky file descriptors | ||||
| // before execing the command inside the namespace | ||||
| func finalizeNamespace(config *configs.Config) error { | ||||
| 	// Ensure that all non-standard fds we may have accidentally | ||||
| 	// inherited are marked close-on-exec so they stay out of the | ||||
| 	// container | ||||
| 	if err := utils.CloseExecFrom(3); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// drop capabilities in bounding set before changing user | ||||
| 	if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// preserve existing capabilities while we change users | ||||
| 	if err := system.SetKeepCaps(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := setupUser(config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := system.ClearKeepCaps(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// drop all other capabilities | ||||
| 	if err := capabilities.DropCapabilities(config.Capabilities); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if config.WorkingDir != "" { | ||||
| 		if err := syscall.Chdir(config.WorkingDir); err != nil { | ||||
| 			return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // joinExistingNamespaces gets all the namespace paths specified for the container and | ||||
| // does a setns on the namespace fd so that the current process joins the namespace. | ||||
| func joinExistingNamespaces(namespaces []configs.Namespace) error { | ||||
| 	for _, ns := range namespaces { | ||||
| 		if ns.Path != "" { | ||||
| 			f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			err = system.Setns(f.Fd(), uintptr(ns.Syscall())) | ||||
| 			f.Close() | ||||
| 			if err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // setupUser changes the groups, gid, and uid for the user inside the container | ||||
| func setupUser(config *configs.Config) error { | ||||
| 	// Set up defaults. | ||||
| 	defaultExecUser := user.ExecUser{ | ||||
| 		Uid:  syscall.Getuid(), | ||||
| 		Gid:  syscall.Getgid(), | ||||
| 		Home: "/", | ||||
| 	} | ||||
| 	passwdPath, err := user.GetPasswdPath() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	groupPath, err := user.GetGroupPath() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("get supplementary groups %s", err) | ||||
| 	} | ||||
| 	suppGroups := append(execUser.Sgids, config.AdditionalGroups...) | ||||
| 	if err := syscall.Setgroups(suppGroups); err != nil { | ||||
| 		return fmt.Errorf("setgroups %s", err) | ||||
| 	} | ||||
| 	if err := system.Setgid(execUser.Gid); err != nil { | ||||
| 		return fmt.Errorf("setgid %s", err) | ||||
| 	} | ||||
| 	if err := system.Setuid(execUser.Uid); err != nil { | ||||
| 		return fmt.Errorf("setuid %s", err) | ||||
| 	} | ||||
| 	// if we didn't get HOME already, set it based on the user's HOME | ||||
| 	if envHome := os.Getenv("HOME"); envHome == "" { | ||||
| 		if err := os.Setenv("HOME", execUser.Home); err != nil { | ||||
| 			return fmt.Errorf("set HOME %s", err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // setupVethNetwork uses the Network config if it is not nil to initialize | ||||
| // the new veth interface inside the container for use by changing the name to eth0 | ||||
| // setting the MTU and IP address along with the default gateway | ||||
| func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { | ||||
| 	for _, config := range config.Networks { | ||||
| 		strategy, err := network.GetStrategy(config.Type) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		err1 := strategy.Initialize(config, networkState) | ||||
| 		if err1 != nil { | ||||
| 			return err1 | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func setupRoute(config *configs.Config) error { | ||||
| 	for _, config := range config.Routes { | ||||
| 		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func setupRlimits(config *configs.Config) error { | ||||
| 	for _, rlimit := range config.Rlimits { | ||||
| 		l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} | ||||
| 		if err := syscall.Setrlimit(rlimit.Type, l); err != nil { | ||||
| 			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
							
								
								
									
										35
									
								
								linux_setns_init.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								linux_setns_init.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| // +build linux | ||||
|  | ||||
| package libcontainer | ||||
|  | ||||
| import ( | ||||
| 	"github.com/docker/libcontainer/apparmor" | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| 	"github.com/docker/libcontainer/label" | ||||
| 	"github.com/docker/libcontainer/system" | ||||
| ) | ||||
|  | ||||
| // linuxSetnsInit performs the container's initialization for running a new process | ||||
| // inside an existing container. | ||||
| type linuxSetnsInit struct { | ||||
| 	args   []string | ||||
| 	config *configs.Config | ||||
| } | ||||
|  | ||||
| func (l *linuxSetnsInit) Init() error { | ||||
| 	if err := setupRlimits(l.config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := finalizeNamespace(l.config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if l.config.ProcessLabel != "" { | ||||
| 		if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return system.Execv(l.args[0], l.args[0:], l.config.Env) | ||||
| } | ||||
							
								
								
									
										90
									
								
								linux_standard_init.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								linux_standard_init.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
| // +build linux | ||||
|  | ||||
| package libcontainer | ||||
|  | ||||
| import ( | ||||
| 	"syscall" | ||||
|  | ||||
| 	"github.com/docker/libcontainer/apparmor" | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| 	consolepkg "github.com/docker/libcontainer/console" | ||||
| 	"github.com/docker/libcontainer/label" | ||||
| 	"github.com/docker/libcontainer/mount" | ||||
| 	"github.com/docker/libcontainer/security/restrict" | ||||
| 	"github.com/docker/libcontainer/system" | ||||
| ) | ||||
|  | ||||
| type linuxStandardInit struct { | ||||
| 	config *initConfig | ||||
| } | ||||
|  | ||||
| func (l *linuxStandardInit) Init() error { | ||||
| 	// join any namespaces via a path to the namespace fd if provided | ||||
| 	if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	console := l.config.Config.Console | ||||
| 	if console != "" { | ||||
| 		if err := consolepkg.OpenAndDup(console); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if _, err := syscall.Setsid(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if console != "" { | ||||
| 		if err := system.Setctty(); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if err := setupNetwork(l.config.Config, l.config.NetworkState); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := setupRoute(l.config.Config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := setupRlimits(l.config.Config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	label.Init() | ||||
| 	// InitializeMountNamespace() can be executed only for a new mount namespace | ||||
| 	if l.config.Config.Namespaces.Contains(configs.NEWNS) { | ||||
| 		if err := mount.InitializeMountNamespace(l.config.Config); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if hostname := l.config.Config.Hostname; hostname != "" { | ||||
| 		if err := syscall.Sethostname([]byte(hostname)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if l.config.Config.RestrictSys { | ||||
| 		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	pdeath, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := finalizeNamespace(l.config.Config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// finalizeNamespace can change user/group which clears the parent death | ||||
| 	// signal, so we restore it here. | ||||
| 	if err := pdeath.Restore(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// Signal self if parent is already dead. Does nothing if running in a new | ||||
| 	// PID namespace, as Getppid will always return 0. | ||||
| 	if syscall.Getppid() == 1 { | ||||
| 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) | ||||
| 	} | ||||
| 	return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Config.Env) | ||||
| } | ||||
							
								
								
									
										80
									
								
								linux_userns_init.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								linux_userns_init.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| // +build linux | ||||
|  | ||||
| package libcontainer | ||||
|  | ||||
| import ( | ||||
| 	"syscall" | ||||
|  | ||||
| 	"github.com/docker/libcontainer/apparmor" | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| 	consolepkg "github.com/docker/libcontainer/console" | ||||
| 	"github.com/docker/libcontainer/label" | ||||
| 	"github.com/docker/libcontainer/security/restrict" | ||||
| 	"github.com/docker/libcontainer/system" | ||||
| ) | ||||
|  | ||||
| type linuxUsernsInit struct { | ||||
| 	args   []string | ||||
| 	config *configs.Config | ||||
| } | ||||
|  | ||||
| func (l *linuxUsernsInit) Init() error { | ||||
| 	// join any namespaces via a path to the namespace fd if provided | ||||
| 	if err := joinExistingNamespaces(l.config.Namespaces); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	console := l.config.Console | ||||
| 	if console != "" { | ||||
| 		if err := consolepkg.OpenAndDup("/dev/console"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if _, err := syscall.Setsid(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if console != "" { | ||||
| 		if err := system.Setctty(); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if l.config.WorkingDir == "" { | ||||
| 		l.config.WorkingDir = "/" | ||||
| 	} | ||||
| 	if err := setupRlimits(l.config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if hostname := l.config.Hostname; hostname != "" { | ||||
| 		if err := syscall.Sethostname([]byte(hostname)); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if l.config.RestrictSys { | ||||
| 		if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	pdeath, err := system.GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := finalizeNamespace(l.config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// finalizeNamespace can change user/group which clears the parent death | ||||
| 	// signal, so we restore it here. | ||||
| 	if err := pdeath.Restore(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	// Signal self if parent is already dead. Does nothing if running in a new | ||||
| 	// PID namespace, as Getppid will always return 0. | ||||
| 	if syscall.Getppid() == 1 { | ||||
| 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) | ||||
| 	} | ||||
| 	return system.Execv(l.args[0], l.args[0:], l.config.Env) | ||||
| } | ||||
							
								
								
									
										37
									
								
								linux_userns_sidecar_init.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								linux_userns_sidecar_init.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| // +build linux | ||||
|  | ||||
| package libcontainer | ||||
|  | ||||
| import ( | ||||
| 	"github.com/docker/libcontainer/configs" | ||||
| 	"github.com/docker/libcontainer/label" | ||||
| 	"github.com/docker/libcontainer/mount" | ||||
| ) | ||||
|  | ||||
| // linuxUsernsSideCar is run to setup mounts and networking related operations | ||||
| // for a user namespace enabled process as a user namespace root doesn't | ||||
| // have permissions to perform these operations. | ||||
| // The setup process joins all the namespaces of user namespace enabled init | ||||
| // except the user namespace, so it run as root in the root user namespace | ||||
| // to perform these operations. | ||||
| type linuxUsernsSideCar struct { | ||||
| 	config  *configs.Config | ||||
| 	network *configs.NetworkState | ||||
| } | ||||
|  | ||||
| func (l *linuxUsernsSideCar) Init() error { | ||||
| 	if err := setupNetwork(l.config, l.network); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := setupRoute(l.config); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	label.Init() | ||||
| 	// InitializeMountNamespace() can be executed only for a new mount namespace | ||||
| 	if l.config.Namespaces.Contains(configs.NEWNET) { | ||||
| 		if err := mount.InitializeMountNamespace(l.config); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| @@ -22,14 +22,14 @@ struct clone_arg { | ||||
| 	 * Reserve some space for clone() to locate arguments | ||||
| 	 * and retcode in this place | ||||
| 	 */ | ||||
| 	char stack[4096] __attribute__((aligned (8))); | ||||
| 	char stack[4096] __attribute__ ((aligned(8))); | ||||
| 	char stack_ptr[0]; | ||||
| 	jmp_buf *env; | ||||
| }; | ||||
|  | ||||
| static int child_func(void *_arg) | ||||
| { | ||||
| 	struct clone_arg *arg = (struct clone_arg *) _arg; | ||||
| 	struct clone_arg *arg = (struct clone_arg *)_arg; | ||||
| 	longjmp(*arg->env, 1); | ||||
| } | ||||
|  | ||||
| @@ -47,8 +47,8 @@ int setns(int fd, int nstype) | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| static int clone_parent(jmp_buf *env) __attribute__ ((noinline)); | ||||
| static int clone_parent(jmp_buf *env) | ||||
| static int clone_parent(jmp_buf * env) __attribute__ ((noinline)); | ||||
| static int clone_parent(jmp_buf * env) | ||||
| { | ||||
| 	struct clone_arg ca; | ||||
| 	int child; | ||||
| @@ -100,7 +100,8 @@ void nsexec() | ||||
|  | ||||
| 		fd = openat(tfd, namespaces[i], O_RDONLY); | ||||
| 		if (fd == -1) { | ||||
| 			pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]); | ||||
| 			pr_perror("Failed to open ns file %s for ns %s", buf, | ||||
| 				  namespaces[i]); | ||||
| 			exit(1); | ||||
| 		} | ||||
| 		// Set the namespace. | ||||
|   | ||||
| @@ -10,7 +10,6 @@ func Capture(userSkip int) Stacktrace { | ||||
| 		skip   = userSkip + 1 // add one for our own function | ||||
| 		frames []Frame | ||||
| 	) | ||||
|  | ||||
| 	for i := skip; ; i++ { | ||||
| 		pc, file, line, ok := runtime.Caller(i) | ||||
| 		if !ok { | ||||
| @@ -18,7 +17,6 @@ func Capture(userSkip int) Stacktrace { | ||||
| 		} | ||||
| 		frames = append(frames, NewFrame(pc, file, line)) | ||||
| 	} | ||||
|  | ||||
| 	return Stacktrace{ | ||||
| 		Frames: frames, | ||||
| 	} | ||||
|   | ||||
| @@ -8,6 +8,26 @@ import ( | ||||
| 	"unsafe" | ||||
| ) | ||||
|  | ||||
| type ParentDeathSignal int | ||||
|  | ||||
| func (p ParentDeathSignal) Restore() error { | ||||
| 	if p == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	current, err := GetParentDeathSignal() | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if p == current { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return p.Set() | ||||
| } | ||||
|  | ||||
| func (p ParentDeathSignal) Set() error { | ||||
| 	return SetParentDeathSignal(uintptr(p)) | ||||
| } | ||||
|  | ||||
| func Execv(cmd string, args []string, env []string) error { | ||||
| 	name, err := exec.LookPath(cmd) | ||||
| 	if err != nil { | ||||
| @@ -17,23 +37,20 @@ func Execv(cmd string, args []string, env []string) error { | ||||
| 	return syscall.Exec(name, args, env) | ||||
| } | ||||
|  | ||||
| func ParentDeathSignal(sig uintptr) error { | ||||
| func SetParentDeathSignal(sig uintptr) error { | ||||
| 	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func GetParentDeathSignal() (int, error) { | ||||
| func GetParentDeathSignal() (ParentDeathSignal, error) { | ||||
| 	var sig int | ||||
|  | ||||
| 	_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0) | ||||
|  | ||||
| 	if err != 0 { | ||||
| 		return -1, err | ||||
| 	} | ||||
|  | ||||
| 	return sig, nil | ||||
| 	return ParentDeathSignal(sig), nil | ||||
| } | ||||
|  | ||||
| func SetKeepCaps() error { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Michael Crosby
					Michael Crosby