Skip to content

Commit

Permalink
chore: various run local fixes (#5459)
Browse files Browse the repository at this point in the history
Fixes the following issues:
- Download the ARM version of session manager plugin in pause container if using ARM binary
- Considers all containers "essential" as we did previously - meaning, if a container stops, that is reported as an error
- Adds back status messages of what the orchestrator is doing after `Stop()`
- Passes `--init` to `docker run` for the pause container so it responds to `docker stop`

By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the Apache 2.0 License.
  • Loading branch information
dannyrandall authored Nov 9, 2023
1 parent c94bba3 commit 9d9e8d6
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 78 deletions.
5 changes: 5 additions & 0 deletions internal/pkg/docker/dockerengine/dockerengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ type RunOptions struct {
ContainerNetwork string // Optional. Network mode for the container.
LogOptions RunLogOptions // Optional. Configure logging for output from the container
AddLinuxCapabilities []string // Optional. Adds linux capabilities to the container.
Init bool // Optional. Adds an init process as an entrypoint.
}

// RunLogOptions holds the logging configuration for Run().
Expand Down Expand Up @@ -274,6 +275,10 @@ func (in *RunOptions) generateRunArguments() []string {
args = append(args, "--cap-add", cap)
}

if in.Init {
args = append(args, "--init")
}

args = append(args, in.ImageURI)

if in.Command != nil && len(in.Command) > 0 {
Expand Down
5 changes: 4 additions & 1 deletion internal/pkg/docker/orchestrator/Pause-Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
FROM public.ecr.aws/amazonlinux/amazonlinux:2023

RUN dnf install -y aws-cli iptables
RUN dnf install -y https://s3.amazonaws.com/session-manager-downloads/plugin/latest/linux_64bit/session-manager-plugin.rpm

ARG ARCH=64bit
# url from https://docs.aws.amazon.com/systems-manager/latest/userguide/install-plugin-linux.html
RUN dnf install -y https://s3.amazonaws.com/session-manager-downloads/plugin/latest/linux_${ARCH}/session-manager-plugin.rpm
43 changes: 29 additions & 14 deletions internal/pkg/docker/orchestrator/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ import (
"maps"
"net"
"os"
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
Expand Down Expand Up @@ -283,11 +285,6 @@ func (o *Orchestrator) setupProxyConnections(ctx context.Context, pauseContainer
return fmt.Errorf("modify iptables: %w", err)
}

err = o.docker.Exec(ctx, pauseContainer, io.Discard, "iptables-save")
if err != nil {
return fmt.Errorf("save iptables: %w", err)
}

err = o.docker.Exec(ctx, pauseContainer, io.Discard, "/bin/bash",
"-c", fmt.Sprintf(`echo %s %s >> /etc/hosts`, ip.String(), host.Name))
if err != nil {
Expand Down Expand Up @@ -338,10 +335,18 @@ func ipv4Increment(ip net.IP, network *net.IPNet) (net.IP, error) {
}

func (o *Orchestrator) buildPauseContainer(ctx context.Context) error {
arch := "64bit"
if strings.Contains(runtime.GOARCH, "arm") {
arch = "arm64"
}

return o.docker.Build(ctx, &dockerengine.BuildArguments{
URI: pauseCtrURI,
Tags: []string{pauseCtrTag},
DockerfileContent: pauseDockerfile,
Args: map[string]string{
"ARCH": arch,
},
}, os.Stderr)
}

Expand All @@ -351,6 +356,7 @@ func (o *Orchestrator) buildPauseContainer(ctx context.Context) error {
func (o *Orchestrator) Stop() {
o.stopOnce.Do(func() {
close(o.stopped)
fmt.Printf("\nStopping task...\n")
o.actions <- &stopAction{}
})
}
Expand All @@ -368,9 +374,11 @@ func (a *stopAction) Do(o *Orchestrator) error {
}

// stop pause container
fmt.Printf("Stopping %q\n", "pause")
if err := o.docker.Stop(context.Background(), o.containerID("pause")); err != nil {
errs = append(errs, fmt.Errorf("stop %q: %w", "pause", err))
}
fmt.Printf("Stopped %q\n", "pause")

return errors.Join(errs...)
}
Expand All @@ -386,10 +394,12 @@ func (o *Orchestrator) stopTask(ctx context.Context, task Task) error {
for name := range task.Containers {
name := name
go func() {
fmt.Printf("Stopping %q\n", name)
if err := o.docker.Stop(ctx, o.containerID(name)); err != nil {
errCh <- fmt.Errorf("stop %q: %w", name, err)
return
}
fmt.Printf("Stopped %q\n", name)
errCh <- nil
}()
}
Expand Down Expand Up @@ -456,6 +466,7 @@ func (o *Orchestrator) pauseRunOptions(t Task) dockerengine.RunOptions {
ContainerPorts: make(map[string]string),
Secrets: t.PauseSecrets,
AddLinuxCapabilities: []string{"NET_ADMIN"},
Init: true,
}

for _, ctr := range t.Containers {
Expand Down Expand Up @@ -486,18 +497,22 @@ func (o *Orchestrator) run(taskID int32, opts dockerengine.RunOptions) {
o.wg.Add(1)
go func() {
defer o.wg.Done()
err := o.docker.Run(context.Background(), &opts)

if err := o.docker.Run(context.Background(), &opts); err != nil {
curTaskID := o.curTaskID.Load()
if curTaskID == orchestratorStoppedTaskID {
return
}
// if the orchestrator has already stopped,
// we don't want to report the error
curTaskID := o.curTaskID.Load()
if curTaskID == orchestratorStoppedTaskID {
return
}

// the error is from the pause container
// or from the currently running task
if taskID == pauseCtrTaskID || taskID == curTaskID {
o.runErrs <- fmt.Errorf("run %q: %w", opts.ContainerName, err)
// the error is from the pause container
// or from the currently running task
if taskID == pauseCtrTaskID || taskID == curTaskID {
if err == nil {
err = errors.New("container stopped unexpectedly")
}
o.runErrs <- fmt.Errorf("run %q: %w", opts.ContainerName, err)
}
}()
}
Loading

0 comments on commit 9d9e8d6

Please sign in to comment.