Files
runc/libcontainer/state_linux.go
Kir Kolyshkin 9583b3d1c2 libct: move killing logic to container.Signal
By default, the container has its own PID namespace, and killing (with
SIGKILL) its init process from the parent PID namespace also kills all
the other processes.

Obviously, it does not work that way when the container is sharing its
PID namespace with the host or another container, since init is no
longer special (it's not PID 1). In this case, killing container's init
will result in a bunch of other processes left running (and thus the
inability to remove the cgroup).

The solution to the above problem is killing all the container
processes, not just init.

The problem with the current implementation is, the killing logic is
implemented in libcontainer's initProcess.wait, and thus only available
to libcontainer users, but not the runc kill command (which uses
nonChildProcess.kill and does not use wait at all). So, some workarounds
exist:
 - func destroy(c *Container) calls signalAllProcesses;
 - runc kill implements -a flag.

This code became very tangled over time. Let's simplify things by moving
the killing all processes from initProcess.wait to container.Signal,
and documents the new behavior.

In essence, this also makes `runc kill` to automatically kill all container
processes when the container does not have its own PID namespace.
Document that as well.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2023-06-08 09:29:25 -07:00

237 lines
4.7 KiB
Go

package libcontainer
import (
"fmt"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)
func newStateTransitionError(from, to containerState) error {
return &stateTransitionError{
From: from.status().String(),
To: to.status().String(),
}
}
// stateTransitionError is returned when an invalid state transition happens from one
// state to another.
type stateTransitionError struct {
From string
To string
}
func (s *stateTransitionError) Error() string {
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
}
type containerState interface {
transition(containerState) error
destroy() error
status() Status
}
func destroy(c *Container) error {
err := c.cgroupManager.Destroy()
if c.intelRdtManager != nil {
if ierr := c.intelRdtManager.Destroy(); err == nil {
err = ierr
}
}
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}
c.initProcess = nil
if herr := runPoststopHooks(c); err == nil {
err = herr
}
c.state = &stoppedState{c: c}
return err
}
func runPoststopHooks(c *Container) error {
hooks := c.config.Hooks
if hooks == nil {
return nil
}
s, err := c.currentOCIState()
if err != nil {
return err
}
s.Status = specs.StateStopped
if err := hooks[configs.Poststop].RunHooks(s); err != nil {
return err
}
return nil
}
// stoppedState represents a container is a stopped/destroyed state.
type stoppedState struct {
c *Container
}
func (b *stoppedState) status() Status {
return Stopped
}
func (b *stoppedState) transition(s containerState) error {
switch s.(type) {
case *runningState, *restoredState:
b.c.state = s
return nil
case *stoppedState:
return nil
}
return newStateTransitionError(b, s)
}
func (b *stoppedState) destroy() error {
return destroy(b.c)
}
// runningState represents a container that is currently running.
type runningState struct {
c *Container
}
func (r *runningState) status() Status {
return Running
}
func (r *runningState) transition(s containerState) error {
switch s.(type) {
case *stoppedState:
if r.c.runType() == Running {
return ErrRunning
}
r.c.state = s
return nil
case *pausedState:
r.c.state = s
return nil
case *runningState:
return nil
}
return newStateTransitionError(r, s)
}
func (r *runningState) destroy() error {
if r.c.runType() == Running {
return ErrRunning
}
return destroy(r.c)
}
type createdState struct {
c *Container
}
func (i *createdState) status() Status {
return Created
}
func (i *createdState) transition(s containerState) error {
switch s.(type) {
case *runningState, *pausedState, *stoppedState:
i.c.state = s
return nil
case *createdState:
return nil
}
return newStateTransitionError(i, s)
}
func (i *createdState) destroy() error {
_ = i.c.initProcess.signal(unix.SIGKILL)
return destroy(i.c)
}
// pausedState represents a container that is currently pause. It cannot be destroyed in a
// paused state and must transition back to running first.
type pausedState struct {
c *Container
}
func (p *pausedState) status() Status {
return Paused
}
func (p *pausedState) transition(s containerState) error {
switch s.(type) {
case *runningState, *stoppedState:
p.c.state = s
return nil
case *pausedState:
return nil
}
return newStateTransitionError(p, s)
}
func (p *pausedState) destroy() error {
t := p.c.runType()
if t != Running && t != Created {
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
return err
}
return destroy(p.c)
}
return ErrPaused
}
// restoredState is the same as the running state but also has associated checkpoint
// information that maybe need destroyed when the container is stopped and destroy is called.
type restoredState struct {
imageDir string
c *Container
}
func (r *restoredState) status() Status {
return Running
}
func (r *restoredState) transition(s containerState) error {
switch s.(type) {
case *stoppedState, *runningState:
return nil
}
return newStateTransitionError(r, s)
}
func (r *restoredState) destroy() error {
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
if !os.IsNotExist(err) {
return err
}
}
return destroy(r.c)
}
// loadedState is used whenever a container is restored, loaded, or setting additional
// processes inside and it should not be destroyed when it is exiting.
type loadedState struct {
c *Container
s Status
}
func (n *loadedState) status() Status {
return n.s
}
func (n *loadedState) transition(s containerState) error {
n.c.state = s
return nil
}
func (n *loadedState) destroy() error {
if err := n.c.refreshState(); err != nil {
return err
}
return n.c.state.destroy()
}