Skip to content
This repository was archived by the owner on Oct 13, 2023. It is now read-only.

Commit fdfe818

Browse files
authored
Merge pull request #296 from thaJeztah/19.03_backport_exec_hang
[19.03 backport] Handle blocked I/O of exec'd processes Upstream-commit: 9a9ff4441831c7656e46639c1d8c60a99e488fb3 Component: engine
2 parents a2d782b + bb50508 commit fdfe818

6 files changed

Lines changed: 45 additions & 115 deletions

File tree

components/engine/container/container.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ func (i *rio) Close() error {
730730
}
731731

732732
func (i *rio) Wait() {
733-
i.sc.Wait()
733+
i.sc.Wait(context.Background())
734734

735735
i.IO.Wait()
736736
}

components/engine/container/stream/streams.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package stream // import "github.com/docker/docker/container/stream"
22

33
import (
4+
"context"
45
"fmt"
56
"io"
67
"io/ioutil"
@@ -24,11 +25,12 @@ import (
2425
// copied and delivered to all StdoutPipe and StderrPipe consumers, using
2526
// a kind of "broadcaster".
2627
type Config struct {
27-
sync.WaitGroup
28+
wg sync.WaitGroup
2829
stdout *broadcaster.Unbuffered
2930
stderr *broadcaster.Unbuffered
3031
stdin io.ReadCloser
3132
stdinPipe io.WriteCloser
33+
dio *cio.DirectIO
3234
}
3335

3436
// NewConfig creates a stream config and initializes
@@ -115,14 +117,15 @@ func (c *Config) CloseStreams() error {
115117

116118
// CopyToPipe connects streamconfig with a libcontainerd.IOPipe
117119
func (c *Config) CopyToPipe(iop *cio.DirectIO) {
120+
c.dio = iop
118121
copyFunc := func(w io.Writer, r io.ReadCloser) {
119-
c.Add(1)
122+
c.wg.Add(1)
120123
go func() {
121124
if _, err := pools.Copy(w, r); err != nil {
122125
logrus.Errorf("stream copy error: %v", err)
123126
}
124127
r.Close()
125-
c.Done()
128+
c.wg.Done()
126129
}()
127130
}
128131

@@ -144,3 +147,23 @@ func (c *Config) CopyToPipe(iop *cio.DirectIO) {
144147
}
145148
}
146149
}
150+
151+
// Wait for the stream to close
152+
// Wait supports timeouts via the context to unblock and forcefully
153+
// close the io streams
154+
func (c *Config) Wait(ctx context.Context) {
155+
done := make(chan struct{}, 1)
156+
go func() {
157+
c.wg.Wait()
158+
close(done)
159+
}()
160+
select {
161+
case <-done:
162+
case <-ctx.Done():
163+
if c.dio != nil {
164+
c.dio.Cancel()
165+
c.dio.Wait()
166+
c.dio.Close()
167+
}
168+
}
169+
}

components/engine/daemon/exec/exec.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package exec // import "github.com/docker/docker/daemon/exec"
22

33
import (
4+
"context"
45
"runtime"
56
"sync"
67

@@ -58,7 +59,7 @@ func (i *rio) Close() error {
5859
}
5960

6061
func (i *rio) Wait() {
61-
i.sc.Wait()
62+
i.sc.Wait(context.Background())
6263

6364
i.IO.Wait()
6465
}

components/engine/daemon/monitor.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
5555
if err != nil {
5656
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
5757
}
58-
59-
c.StreamConfig.Wait()
58+
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
59+
c.StreamConfig.Wait(ctx)
60+
cancel()
6061
c.Reset(false)
6162

6263
exitStatus := container.ExitStatus{
@@ -124,7 +125,11 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
124125
defer execConfig.Unlock()
125126
execConfig.ExitCode = &ec
126127
execConfig.Running = false
127-
execConfig.StreamConfig.Wait()
128+
129+
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
130+
execConfig.StreamConfig.Wait(ctx)
131+
cancel()
132+
128133
if err := execConfig.CloseStreams(); err != nil {
129134
logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
130135
}

components/engine/integration-cli/docker_cli_exec_test.go

Lines changed: 0 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@ import (
1111
"reflect"
1212
"runtime"
1313
"sort"
14-
"strconv"
1514
"strings"
1615
"sync"
1716
"time"
1817

1918
"github.com/docker/docker/client"
2019
"github.com/docker/docker/integration-cli/cli"
2120
"github.com/docker/docker/integration-cli/cli/build"
22-
"github.com/docker/docker/pkg/parsers/kernel"
2321
"github.com/go-check/check"
2422
"gotest.tools/assert"
2523
is "gotest.tools/assert/cmp"
@@ -534,100 +532,3 @@ func (s *DockerSuite) TestExecEnvLinksHost(c *check.C) {
534532
assert.Check(c, is.Contains(out, "HOSTNAME=myhost"))
535533
assert.Check(c, is.Contains(out, "DB_NAME=/bar/db"))
536534
}
537-
538-
func (s *DockerSuite) TestExecWindowsOpenHandles(c *check.C) {
539-
testRequires(c, DaemonIsWindows)
540-
541-
if runtime.GOOS == "windows" {
542-
v, err := kernel.GetKernelVersion()
543-
assert.NilError(c, err)
544-
build, _ := strconv.Atoi(strings.Split(strings.SplitN(v.String(), " ", 3)[2][1:], ".")[0])
545-
if build >= 17743 {
546-
c.Skip("Temporarily disabled on RS5 17743+ builds due to platform bug")
547-
548-
// This is being tracked internally. @jhowardmsft. Summary of failure
549-
// from an email in early July 2018 below:
550-
//
551-
// Platform regression. In cmd.exe by the look of it. I can repro
552-
// it outside of CI. It fails the same on 17681, 17676 and even as
553-
// far back as 17663, over a month old. From investigating, I can see
554-
// what's happening in the container, but not the reason. The test
555-
// starts a long-running container based on the Windows busybox image.
556-
// It then adds another process (docker exec) to that container to
557-
// sleep. It loops waiting for two instances of busybox.exe running,
558-
// and cmd.exe to quit. What's actually happening is that the second
559-
// exec hangs indefinitely, and from docker top, I can see
560-
// "OpenWith.exe" running.
561-
562-
//Manual repro would be
563-
//# Start the first long-running container
564-
//docker run --rm -d --name test busybox sleep 300
565-
566-
//# In another window, docker top test. There should be a single instance of busybox.exe running
567-
//# In a third window, docker exec test cmd /c start sleep 10 NOTE THIS HANGS UNTIL 5 MIN TIMEOUT
568-
//# In the second window, run docker top test. Note that OpenWith.exe is running, one cmd.exe and only one busybox. I would expect no "OpenWith" and two busybox.exe's.
569-
}
570-
}
571-
572-
runSleepingContainer(c, "-d", "--name", "test")
573-
exec := make(chan bool)
574-
go func() {
575-
dockerCmd(c, "exec", "test", "cmd", "/c", "start sleep 10")
576-
exec <- true
577-
}()
578-
579-
count := 0
580-
for {
581-
top := make(chan string)
582-
var out string
583-
go func() {
584-
out, _ := dockerCmd(c, "top", "test")
585-
top <- out
586-
}()
587-
588-
select {
589-
case <-time.After(time.Second * 5):
590-
c.Fatal("timed out waiting for top while exec is exiting")
591-
case out = <-top:
592-
break
593-
}
594-
595-
if strings.Count(out, "busybox.exe") == 2 && !strings.Contains(out, "cmd.exe") {
596-
// The initial exec process (cmd.exe) has exited, and both sleeps are currently running
597-
break
598-
}
599-
count++
600-
if count >= 30 {
601-
c.Fatal("too many retries")
602-
}
603-
time.Sleep(1 * time.Second)
604-
}
605-
606-
inspect := make(chan bool)
607-
go func() {
608-
dockerCmd(c, "inspect", "test")
609-
inspect <- true
610-
}()
611-
612-
select {
613-
case <-time.After(time.Second * 5):
614-
c.Fatal("timed out waiting for inspect while exec is exiting")
615-
case <-inspect:
616-
break
617-
}
618-
619-
// Ensure the background sleep is still running
620-
out, _ := dockerCmd(c, "top", "test")
621-
assert.Equal(c, strings.Count(out, "busybox.exe"), 2)
622-
623-
// The exec should exit when the background sleep exits
624-
select {
625-
case <-time.After(time.Second * 15):
626-
c.Fatal("timed out waiting for async exec to exit")
627-
case <-exec:
628-
// Ensure the background sleep has actually exited
629-
out, _ := dockerCmd(c, "top", "test")
630-
assert.Equal(c, strings.Count(out, "busybox.exe"), 1)
631-
break
632-
}
633-
}

components/engine/libcontainerd/remote/client.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -652,13 +652,6 @@ func (c *client) processEvent(ctx context.Context, et libcontainerdtypes.EventTy
652652
}).Error("exit event")
653653
return
654654
}
655-
_, err = p.Delete(context.Background())
656-
if err != nil {
657-
c.logger.WithError(err).WithFields(logrus.Fields{
658-
"container": ei.ContainerID,
659-
"process": ei.ProcessID,
660-
}).Warn("failed to delete process")
661-
}
662655

663656
ctr, err := c.getContainer(ctx, ei.ContainerID)
664657
if err != nil {
@@ -672,11 +665,18 @@ func (c *client) processEvent(ctx context.Context, et libcontainerdtypes.EventTy
672665
c.logger.WithFields(logrus.Fields{
673666
"container": ei.ContainerID,
674667
"error": err,
675-
}).Error("failed to find container")
668+
}).Error("failed to get container labels")
676669
return
677670
}
678671
newFIFOSet(labels[DockerContainerBundlePath], ei.ProcessID, true, false).Close()
679672
}
673+
_, err = p.Delete(context.Background())
674+
if err != nil {
675+
c.logger.WithError(err).WithFields(logrus.Fields{
676+
"container": ei.ContainerID,
677+
"process": ei.ProcessID,
678+
}).Warn("failed to delete process")
679+
}
680680
}
681681
})
682682
}

0 commit comments

Comments
 (0)