mirror of
https://github.com/opencontainers/runc.git
synced 2025-10-03 22:56:47 +08:00
start: don't kill runc init too early
The stars can be aligned in a way that results in runc to leave a stale
bind mount in container's state directory, which manifests itself later,
while trying to remove the container, in an error like this:
> remove /run/runc/test2: unlinkat /run/runc/test2/runc.W24K2t: device or resource busy
The stale mount happens because runc start/run/exec kills runc init
while it is inside ensure_cloned_binary(). One such scenario is when
a unified cgroup resource is specified for cgroup v1, a cgroup manager's
Apply returns an error (as of commit b006f4a180
), and when
(*initProcess).start() kills runc init just after it was started.
One solution is NOT to kill runc init too early. To achieve that,
amend the libcontainer/nsenter code to send a \0 byte to signal
that it is past the initial setup, and make start() (for both
run/start and exec) wait for this byte before proceeding with
kill on an error path.
While at it, improve some error messages.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
@@ -3,6 +3,7 @@ package nsenter
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
@@ -48,6 +49,7 @@ func TestNsenterValidPaths(t *testing.T) {
|
||||
if err := cmd.Start(); err != nil {
|
||||
t.Fatalf("nsenter failed to start %v", err)
|
||||
}
|
||||
|
||||
// write cloneFlags
|
||||
r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0)
|
||||
r.AddData(&libcontainer.Int32msg{
|
||||
@@ -62,6 +64,8 @@ func TestNsenterValidPaths(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
initWaiter(t, parent)
|
||||
|
||||
decoder := json.NewDecoder(parent)
|
||||
var pid *pid
|
||||
|
||||
@@ -118,6 +122,7 @@ func TestNsenterInvalidPaths(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
initWaiter(t, parent)
|
||||
if err := cmd.Wait(); err == nil {
|
||||
t.Fatalf("nsenter exits with a zero exit status")
|
||||
}
|
||||
@@ -158,6 +163,7 @@ func TestNsenterIncorrectPathType(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
initWaiter(t, parent)
|
||||
if err := cmd.Wait(); err == nil {
|
||||
t.Fatalf("nsenter exits with a zero exit status")
|
||||
}
|
||||
@@ -206,6 +212,8 @@ func TestNsenterChildLogging(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
initWaiter(t, parent)
|
||||
|
||||
logsDecoder := json.NewDecoder(logread)
|
||||
var logentry *logentry
|
||||
|
||||
@@ -235,3 +243,19 @@ func newPipe() (parent *os.File, child *os.File, err error) {
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
|
||||
}
|
||||
|
||||
// initWaiter reads back the initial \0 from runc init
|
||||
func initWaiter(t *testing.T, r io.Reader) {
|
||||
inited := make([]byte, 1)
|
||||
n, err := r.Read(inited)
|
||||
if err == nil {
|
||||
if n < 1 {
|
||||
err = errors.New("short read")
|
||||
} else if inited[0] != 0 {
|
||||
err = fmt.Errorf("unexpected %d != 0", inited[0])
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatalf("waiting for init preliminary setup: %v", err)
|
||||
}
|
||||
|
@@ -598,6 +598,13 @@ void nsexec(void)
|
||||
if (ensure_cloned_binary() < 0)
|
||||
bail("could not ensure we are a cloned binary");
|
||||
|
||||
/*
|
||||
* Inform the parent we're past initial setup.
|
||||
* For the other side of this, see initWaiter.
|
||||
*/
|
||||
if (write(pipenum, "", 1) != 1)
|
||||
bail("could not inform the parent we are past initial setup");
|
||||
|
||||
write_log(DEBUG, "nsexec started");
|
||||
|
||||
/* Parse all of the netlink configuration. */
|
||||
|
@@ -98,23 +98,34 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "starting setns process")
|
||||
}
|
||||
|
||||
waitInit := initWaiter(p.messageSockPair.parent)
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom {
|
||||
// Someone in this cgroup was killed, this _might_ be us.
|
||||
retErr = newSystemErrorWithCause(retErr, "possibly OOM-killed")
|
||||
}
|
||||
werr := <-waitInit
|
||||
if werr != nil {
|
||||
logrus.WithError(werr).Warn()
|
||||
}
|
||||
err := ignoreTerminateErrors(p.terminate())
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("unable to terminate setnsProcess")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if p.bootstrapData != nil {
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
}
|
||||
err = <-waitInit
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
}
|
||||
@@ -326,9 +337,13 @@ func (p *initProcess) start() (retErr error) {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
}
|
||||
|
||||
waitInit := initWaiter(p.messageSockPair.parent)
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
// init might be killed by the kernel's OOM killer.
|
||||
// Find out if init is killed by the kernel's OOM killer.
|
||||
// Get the count before killing init as otherwise cgroup
|
||||
// might be removed by systemd.
|
||||
oom, err := p.manager.OOMKillCount()
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("unable to get oom kill count")
|
||||
@@ -346,7 +361,12 @@ func (p *initProcess) start() (retErr error) {
|
||||
}
|
||||
}
|
||||
|
||||
// terminate the process to ensure we can remove cgroups
|
||||
werr := <-waitInit
|
||||
if werr != nil {
|
||||
logrus.WithError(werr).Warn()
|
||||
}
|
||||
|
||||
// Terminate the process to ensure we can remove cgroups.
|
||||
if err := ignoreTerminateErrors(p.terminate()); err != nil {
|
||||
logrus.WithError(err).Warn("unable to terminate initProcess")
|
||||
}
|
||||
@@ -372,6 +392,11 @@ func (p *initProcess) start() (retErr error) {
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
err = <-waitInit
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
childPid, err := p.getChildPid()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
|
||||
@@ -674,3 +699,28 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// initWaiter returns a channel to wait on for making sure
|
||||
// runc init has finished the initial setup.
|
||||
func initWaiter(r io.Reader) chan error {
|
||||
ch := make(chan error, 1)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
inited := make([]byte, 1)
|
||||
n, err := r.Read(inited)
|
||||
if err == nil {
|
||||
if n < 1 {
|
||||
err = errors.New("short read")
|
||||
} else if inited[0] != 0 {
|
||||
err = fmt.Errorf("unexpected %d != 0", inited[0])
|
||||
} else {
|
||||
ch <- nil
|
||||
return
|
||||
}
|
||||
}
|
||||
ch <- newSystemErrorWithCause(err, "waiting for init preliminary setup")
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
|
Reference in New Issue
Block a user