diff --git a/Dockerfile b/Dockerfile index eddeefc67d84f..532b8cd54797e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,28 +32,39 @@ RUN echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et # Packaged dependencies RUN apt-get update && apt-get install -y \ apparmor \ + asciidoc \ aufs-tools \ automake \ bash-completion \ + bsdmainutils \ btrfs-tools \ build-essential \ curl \ dpkg-sig \ git \ iptables \ + libaio-dev \ libapparmor-dev \ libcap-dev \ + libprotobuf-c0-dev \ + libprotobuf-dev \ libsqlite3-dev \ mercurial \ parallel \ + pkg-config \ + protobuf-compiler \ + protobuf-c-compiler \ + python-minimal \ python-mock \ python-pip \ + python-protobuf \ python-websocket \ reprepro \ ruby1.9.1 \ ruby1.9.1-dev \ s3cmd=1.1.0* \ ubuntu-zfs \ + xmlto \ libzfs-dev \ --no-install-recommends @@ -78,6 +89,13 @@ RUN cd /usr/src/lxc \ && make install \ && ldconfig +# Install Criu +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 +RUN cd /usr/src/criu \ + && make \ + && make install + # Install Go ENV GO_VERSION 1.4.2 RUN curl -sSL https://golang.org/dl/go${GO_VERSION}.src.tar.gz | tar -v -C /usr/local -xz \ diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go new file mode 100644 index 0000000000000..8c681bcf9716f --- /dev/null +++ b/api/client/checkpoint.go @@ -0,0 +1,52 @@ +package client + +import ( + "fmt" + + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +func (cli *DockerCli) CmdCheckpoint(args ...string) error { + cmd := cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file") + flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint") + flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow checkpointing tcp connections") + flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow checkpointing external unix connections") + flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow checkpointing shell jobs") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + criuOpts := &runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + LeaveRunning: *flLeaveRunning, + TcpEstablished: *flCheckTcp, + ExternalUnixConnections: *flExtUnix, + ShellJob: *flShell, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/client/restore.go b/api/client/restore.go new file mode 100644 index 0000000000000..0c4085fbbbd84 --- /dev/null +++ b/api/client/restore.go @@ -0,0 +1,54 @@ +package client + +import ( + "fmt" + + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +func (cli *DockerCli) CmdRestore(args ...string) error { + cmd := cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log") + flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow restoring tcp connections") + flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow restoring external unix connections") + flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow restoring shell jobs") + flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + restoreOpts := &runconfig.RestoreConfig{ + CriuOpts: runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + TcpEstablished: *flCheckTcp, + ExternalUnixConnections: *flExtUnix, + ShellJob: *flShell, + }, + ForceRestore: *flForce, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to restore one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/server/server.go b/api/server/server.go index 0c165d024e198..b8c44b6531491 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -1348,6 +1348,48 @@ func (s *Server) postContainersCopy(version version.Version, w http.ResponseWrit return nil } +func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + + criuOpts := &runconfig.CriuConfig{} + if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { + return err + } + + if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + +func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + + restoreOpts := runconfig.RestoreConfig{} + if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { + return err + } + + if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + func (s *Server) postContainerExecCreate(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { if err := parseForm(r); err != nil { return err @@ -1559,28 +1601,30 @@ func createRouter(s *Server) *mux.Router { "/exec/{id:.*}/json": s.getExecByID, }, "POST": { - "/auth": s.postAuth, - "/commit": s.postCommit, - "/build": s.postBuild, - "/images/create": s.postImagesCreate, - "/images/load": s.postImagesLoad, - "/images/{name:.*}/push": s.postImagesPush, - "/images/{name:.*}/tag": s.postImagesTag, - "/containers/create": s.postContainersCreate, - "/containers/{name:.*}/kill": s.postContainersKill, - "/containers/{name:.*}/pause": s.postContainersPause, - "/containers/{name:.*}/unpause": s.postContainersUnpause, - "/containers/{name:.*}/restart": s.postContainersRestart, - "/containers/{name:.*}/start": s.postContainersStart, - "/containers/{name:.*}/stop": s.postContainersStop, - "/containers/{name:.*}/wait": s.postContainersWait, - "/containers/{name:.*}/resize": s.postContainersResize, - "/containers/{name:.*}/attach": s.postContainersAttach, - "/containers/{name:.*}/copy": s.postContainersCopy, - "/containers/{name:.*}/exec": s.postContainerExecCreate, - "/exec/{name:.*}/start": s.postContainerExecStart, - "/exec/{name:.*}/resize": s.postContainerExecResize, - "/containers/{name:.*}/rename": s.postContainerRename, + "/auth": s.postAuth, + "/commit": s.postCommit, + "/build": s.postBuild, + "/images/create": s.postImagesCreate, + "/images/load": s.postImagesLoad, + "/images/{name:.*}/push": s.postImagesPush, + "/images/{name:.*}/tag": s.postImagesTag, + "/containers/create": s.postContainersCreate, + "/containers/{name:.*}/kill": s.postContainersKill, + "/containers/{name:.*}/pause": s.postContainersPause, + "/containers/{name:.*}/unpause": s.postContainersUnpause, + "/containers/{name:.*}/restart": s.postContainersRestart, + "/containers/{name:.*}/start": s.postContainersStart, + "/containers/{name:.*}/stop": s.postContainersStop, + "/containers/{name:.*}/wait": s.postContainersWait, + "/containers/{name:.*}/resize": s.postContainersResize, + "/containers/{name:.*}/attach": s.postContainersAttach, + "/containers/{name:.*}/copy": s.postContainersCopy, + "/containers/{name:.*}/exec": s.postContainerExecCreate, + "/exec/{name:.*}/start": s.postContainerExecStart, + "/exec/{name:.*}/resize": s.postContainerExecResize, + "/containers/{name:.*}/rename": s.postContainerRename, + "/containers/{name:.*}/checkpoint": s.postContainersCheckpoint, + "/containers/{name:.*}/restore": s.postContainersRestore, }, "DELETE": { "/containers/{name:.*}": s.deleteContainers, diff --git a/api/types/types.go b/api/types/types.go index a27755f69f589..4151d4ee39d28 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -194,16 +194,18 @@ type ExecStartCheck struct { } type ContainerState struct { - Running bool - Paused bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt time.Time - FinishedAt time.Time + Running bool + Paused bool + Checkpointed bool + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt time.Time + FinishedAt time.Time + CheckpointedAt time.Time } // GET "/containers/{name:.*}/json" diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go new file mode 100644 index 0000000000000..a39662cc0f325 --- /dev/null +++ b/daemon/checkpoint.go @@ -0,0 +1,56 @@ +package daemon + +import ( + "fmt" + + "github.com/docker/docker/runconfig" +) + +// Checkpoint a running container. +func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error { + container, err := daemon.Get(name) + if err != nil { + return err + } + if !container.IsRunning() { + return fmt.Errorf("Container %s not running", name) + } + if err := container.Checkpoint(opts); err != nil { + return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) + } + + container.LogEvent("checkpoint") + return nil +} + +// Restore a checkpointed container. +func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error { + container, err := daemon.Get(name) + if err != nil { + return err + } + + if !forceRestore { + // TODO: It's possible we only want to bypass the checkpointed check, + // I'm not sure how this will work if the container is already running + if container.IsRunning() { + return fmt.Errorf("Container %s already running", name) + } + + if !container.IsCheckpointed() { + return fmt.Errorf("Container %s is not checkpointed", name) + } + } else { + if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" { + return fmt.Errorf("You must specify an image directory to restore from %s", name) + } + } + + if err = container.Restore(opts, forceRestore); err != nil { + container.LogEvent("die") + return fmt.Errorf("Cannot restore container %s: %s", name, err) + } + + container.LogEvent("restore") + return nil +} diff --git a/daemon/container.go b/daemon/container.go index a06033c890df6..72cf56472e510 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -258,7 +258,7 @@ func (container *Container) Start() (err error) { if err := container.Mount(); err != nil { return err } - if err := container.initializeNetworking(); err != nil { + if err := container.initializeNetworking(false); err != nil { return err } linkedEnv, err := container.setupLinkedContainers() @@ -344,7 +344,11 @@ func (container *Container) isNetworkAllocated() bool { // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - container.ReleaseNetwork() + if container.IsCheckpointed() { + logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + } else { + container.ReleaseNetwork(false) + } disableAllActiveLinks(container) diff --git a/daemon/container_checkpoint.go b/daemon/container_checkpoint.go new file mode 100644 index 0000000000000..3fdf83e645f3b --- /dev/null +++ b/daemon/container_checkpoint.go @@ -0,0 +1,84 @@ +package daemon + +import ( + "fmt" + + "github.com/docker/docker/pkg/promise" + "github.com/docker/docker/runconfig" +) + +func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { + if err := container.daemon.Checkpoint(container, opts); err != nil { + return err + } + + if opts.LeaveRunning == false { + container.ReleaseNetwork(true) + } + return nil +} + +func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var err error + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.setError(err) + // if no one else has set it, make sure we don't leave it at zero + if container.ExitCode == 0 { + container.ExitCode = 128 + } + container.toDisk() + container.cleanup() + } + }() + + if err := container.Mount(); err != nil { + return err + } + if err = container.initializeNetworking(true); err != nil { + return err + } + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + if err = container.setupWorkingDirectory(); err != nil { + return err + } + + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommand(container, env); err != nil { + return err + } + + mounts, err := container.setupMounts() + if err != nil { + return err + } + + container.command.Mounts = mounts + return container.waitForRestore(opts, forceRestore) +} + +func (container *Container) waitForRestore(opts *runconfig.CriuConfig, forceRestore bool) error { + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): + return err + } + + return nil +} diff --git a/daemon/container_linux.go b/daemon/container_linux.go index 8c5b15be0a382..316dffeda188d 100644 --- a/daemon/container_linux.go +++ b/daemon/container_linux.go @@ -663,7 +663,7 @@ func (container *Container) UpdateNetwork() error { return nil } -func (container *Container) buildCreateEndpointOptions() ([]libnetwork.EndpointOption, error) { +func (container *Container) buildCreateEndpointOptions(restoring bool) ([]libnetwork.EndpointOption, error) { var ( portSpecs = make(nat.PortSet) bindings = make(nat.PortMap) @@ -734,6 +734,14 @@ func (container *Container) buildCreateEndpointOptions() ([]libnetwork.EndpointO createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) } + /*if restoring && container.NetworkSettings.IPAddress != "" { + genericOption := options.Generic{ + netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress), + } + + createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) + }*/ + return createOptions, nil } @@ -787,7 +795,7 @@ func (container *Container) secondaryNetworkRequired(primaryNetworkType string) return false } -func (container *Container) AllocateNetwork() error { +func (container *Container) AllocateNetwork(isRestoring bool) error { mode := container.hostConfig.NetworkMode controller := container.daemon.netController if container.Config.NetworkDisabled || mode.IsContainer() { @@ -823,19 +831,19 @@ func (container *Container) AllocateNetwork() error { if container.secondaryNetworkRequired(networkDriver) { // Configure Bridge as secondary network for port binding purposes - if err := container.configureNetwork("bridge", service, "bridge", false); err != nil { + if err := container.configureNetwork("bridge", service, "bridge", false, isRestoring); err != nil { return err } } - if err := container.configureNetwork(networkName, service, networkDriver, mode.IsDefault()); err != nil { + if err := container.configureNetwork(networkName, service, networkDriver, mode.IsDefault(), isRestoring); err != nil { return err } return container.WriteHostConfig() } -func (container *Container) configureNetwork(networkName, service, networkDriver string, canCreateNetwork bool) error { +func (container *Container) configureNetwork(networkName, service, networkDriver string, canCreateNetwork bool, isRestoring bool) error { controller := container.daemon.netController n, err := controller.NetworkByName(networkName) if err != nil { @@ -848,23 +856,38 @@ func (container *Container) configureNetwork(networkName, service, networkDriver } } - ep, err := n.EndpointByName(service) - if err != nil { - if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { - return err - } + var ep libnetwork.Endpoint - createOptions, err := container.buildCreateEndpointOptions() - if err != nil { - return err + if isRestoring == true { + // Use existing Endpoint for a checkpointed container + for _, endpoint := range n.Endpoints() { + if endpoint.ID() == container.NetworkSettings.EndpointID { + ep = endpoint + } } - - ep, err = n.CreateEndpoint(service, createOptions...) + if ep == nil { + return fmt.Errorf("Fail to find the Endpoint for the checkpointed container") + } + } else { + ep, err = n.EndpointByName(service) if err != nil { - return err + if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { + return err + } + + createOptions, err := container.buildCreateEndpointOptions(isRestoring) + if err != nil { + return err + } + + ep, err = n.CreateEndpoint(service, createOptions...) + if err != nil { + return err + } } } + if err := container.updateNetworkSettings(n, ep); err != nil { return err } @@ -885,7 +908,7 @@ func (container *Container) configureNetwork(networkName, service, networkDriver return nil } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(restoring bool) error { var err error // Make sure NetworkMode has an acceptable value before @@ -921,7 +944,7 @@ func (container *Container) initializeNetworking() error { } - if err := container.AllocateNetwork(); err != nil { + if err := container.AllocateNetwork(restoring); err != nil { return err } @@ -1004,7 +1027,7 @@ func (container *Container) getNetworkedContainer() (*Container, error) { } } -func (container *Container) ReleaseNetwork() { +func (container *Container) ReleaseNetwork(is_checkpoint bool) { if container.hostConfig.NetworkMode.IsContainer() || container.Config.NetworkDisabled { return } @@ -1012,7 +1035,6 @@ func (container *Container) ReleaseNetwork() { eid := container.NetworkSettings.EndpointID nid := container.NetworkSettings.NetworkID - container.NetworkSettings = &network.Settings{} if nid == "" || eid == "" { return @@ -1043,6 +1065,13 @@ func (container *Container) ReleaseNetwork() { } } + if is_checkpoint == true { + return + } + + container.NetworkSettings = &network.Settings{} + + // In addition to leaving all endpoints, delete implicitly created endpoint if container.Config.PublishService == "" { if err := ep.Delete(); err != nil { diff --git a/daemon/container_windows.go b/daemon/container_windows.go index 8b8ef3e0401c0..c7288f335dd68 100644 --- a/daemon/container_windows.go +++ b/daemon/container_windows.go @@ -53,7 +53,7 @@ func (container *Container) createDaemonEnvironment(linkedEnv []string) []string return container.Config.Env } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(restoring bool) error { return nil } diff --git a/daemon/daemon.go b/daemon/daemon.go index 549a1c188ead3..a0b05024a3adf 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -277,6 +277,18 @@ func (daemon *Daemon) restore() error { logrus.Debugf("Loaded container %v", container.ID) containers[container.ID] = &cr{container: container} + + // If the container was checkpointed, we need to reserve + // the IP address that it was using. + // + // XXX We should also reserve host ports (if any). + if container.IsCheckpointed() { + /*err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) + if err != nil { + log.Errorf("Failed to reserve IP %s for container %s", + container.ID, container.NetworkSettings.IPAddress) + }*/ + } } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } @@ -812,6 +824,25 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, startCallback) } +func (daemon *Daemon) Checkpoint(c *Container, opts *runconfig.CriuConfig) error { + if err := daemon.execDriver.Checkpoint(c.command, opts); err != nil { + return err + } + c.SetCheckpointed(opts.LeaveRunning) + return nil +} + +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + // Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go). + _, err := daemon.driver.Get(c.ID, c.GetMountLabel()) + if err != nil { + return execdriver.ExitStatus{ExitCode: 0}, err + } + + exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback, opts, forceRestore) + return exitCode, err +} + func (daemon *Daemon) Kill(c *Container, sig int) error { return daemon.execDriver.Kill(c.command, sig) } diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index eca77e921eaac..80d775bac2640 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -8,6 +8,7 @@ import ( // TODO Windows: Factor out ulimit "github.com/docker/docker/pkg/ulimit" + "github.com/docker/docker/runconfig" "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) @@ -24,6 +25,7 @@ var ( ) type StartCallback func(*ProcessConfig, int) +type RestoreCallback func(*ProcessConfig, int) // Driver specific information based on // processes registered with the driver @@ -59,6 +61,8 @@ type Driver interface { Kill(c *Command, sig int) error Pause(c *Command) error Unpause(c *Command) error + Checkpoint(c *Command, opts *runconfig.CriuConfig) error + Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (ExitStatus, error) Name() string // Driver name Info(id string) Info // "temporary" hack (until we move state from core to plugins) GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container. diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 692b9f356f3aa..6116a1e87ce42 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -25,6 +25,7 @@ import ( sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/version" + "github.com/docker/docker/runconfig" "github.com/docker/libcontainer" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" @@ -547,6 +548,14 @@ func (d *driver) Unpause(c *execdriver.Command) error { return err } +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Checkpointing lxc containers not supported yet\n") +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n") +} + func (d *driver) Terminate(c *execdriver.Command) error { return KillLxc(c.ID, 9) } diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 2017a9c65ca7e..4176b14b13454 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" + "github.com/docker/docker/runconfig" "github.com/docker/libcontainer" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups/systemd" @@ -141,8 +142,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba d.activeContainers[c.ID] = cont d.Unlock() defer func() { - cont.Destroy() - d.cleanContainer(c.ID) + status, err := cont.Status() + if err != nil || status != libcontainer.Checkpointed { + cont.Destroy() + d.cleanContainer(c.ID) + } }() if err := cont.Start(p); err != nil { @@ -277,6 +281,112 @@ func (d *driver) Unpause(c *execdriver.Command) error { return active.Resume() } +func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts { + return &libcontainer.CriuOpts{ + ImagesDirectory: runconfigOpts.ImagesDirectory, + WorkDirectory: runconfigOpts.WorkDirectory, + LeaveRunning: runconfigOpts.LeaveRunning, + TcpEstablished: runconfigOpts.TcpEstablished, + ExternalUnixConnections: runconfigOpts.ExternalUnixConnections, + ShellJob: runconfigOpts.ShellJob, + } +} + +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) + } + + d.Lock() + defer d.Unlock() + err := active.Checkpoint(libcontainerCriuOpts(opts)) + if err != nil { + return err + } + + return nil +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + var ( + cont libcontainer.Container + err error + ) + + cont, err = d.factory.Load(c.ID) + if err != nil { + if forceRestore { + var config *configs.Config + config, err = d.createContainer(c) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + cont, err = d.factory.Create(c.ID, config) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } else { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } + + p := &libcontainer.Process{ + Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, + } + + config := cont.Config() + if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + + d.Lock() + d.activeContainers[c.ID] = cont + d.Unlock() + defer func() { + cont.Destroy() + d.cleanContainer(c.ID) + }() + + if err := cont.Restore(p, libcontainerCriuOpts(opts)); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + + // FIXME: no idea if any of this is needed... + if restoreCallback != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return execdriver.ExitStatus{ExitCode: -1}, err + } + restoreCallback(&c.ProcessConfig, pid) + } + + oom := notifyOnOOM(cont) + waitF := p.Wait + if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { + // we need such hack for tracking processes with inherited fds, + // because cmd.Wait() waiting for all streams to be copied + waitF = waitInPIDHost(p, cont) + } + ps, err := waitF() + if err != nil { + execErr, ok := err.(*exec.ExitError) + if !ok { + return execdriver.ExitStatus{ExitCode: -1}, err + } + ps = execErr.ProcessState + } + + cont.Destroy() + _, oomKill := <-oom + return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil +} + func (d *driver) Terminate(c *execdriver.Command) error { defer d.cleanContainer(c.ID) container, err := d.factory.Load(c.ID) diff --git a/daemon/execdriver/windows/windows.go b/daemon/execdriver/windows/windows.go index 9837270235d5c..d49f76a95cf93 100644 --- a/daemon/execdriver/windows/windows.go +++ b/daemon/execdriver/windows/windows.go @@ -13,6 +13,7 @@ import ( "fmt" "github.com/docker/docker/daemon/execdriver" + "github.com/docker/docker/runconfig" ) const ( @@ -95,3 +96,11 @@ func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) { func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { return 0, nil } + +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Windows: Containers cannot be checkpointed") +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Windows: Containers cannot be restored") +} diff --git a/daemon/inspect.go b/daemon/inspect.go index 73b394ca24c0f..e9d302f1bc9b1 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -64,16 +64,18 @@ func (daemon *Daemon) getInspectData(container *Container) (*types.ContainerJSON } containerState := &types.ContainerState{ - Running: container.State.Running, - Paused: container.State.Paused, - Restarting: container.State.Restarting, - OOMKilled: container.State.OOMKilled, - Dead: container.State.Dead, - Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, - StartedAt: container.State.StartedAt, - FinishedAt: container.State.FinishedAt, + Running: container.State.Running, + Paused: container.State.Paused, + Checkpointed: container.State.Checkpointed, + Restarting: container.State.Restarting, + OOMKilled: container.State.OOMKilled, + Dead: container.State.Dead, + Pid: container.State.Pid, + ExitCode: container.State.ExitCode, + Error: container.State.Error, + StartedAt: container.State.StartedAt, + FinishedAt: container.State.FinishedAt, + CheckpointedAt: container.State.CheckpointedAt, } volumes := make(map[string]string) diff --git a/daemon/monitor.go b/daemon/monitor.go index ff173c8f0324e..c4abcfe68af3b 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -44,6 +44,9 @@ type containerMonitor struct { // left waiting for nothing to happen during this time stopChan chan struct{} + // like startSignal but for restoring a container + restoreSignal chan struct{} + // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int @@ -61,6 +64,7 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) * timeIncrement: defaultTimeIncrement, stopChan: make(chan struct{}), startSignal: make(chan struct{}), + restoreSignal: make(chan struct{}), } } @@ -181,6 +185,51 @@ func (m *containerMonitor) Start() error { } } +// Like Start() but for restoring a container. +func (m *containerMonitor) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var ( + err error + // XXX The following line should be changed to + // exitStatus execdriver.ExitStatus to match Start() + exitCode execdriver.ExitStatus + afterRestore bool + ) + defer func() { + if afterRestore { + m.container.Lock() + m.container.setStopped(&execdriver.ExitStatus{exitCode.ExitCode, false}) + defer m.container.Unlock() + } + m.Close() + }() + + // FIXME: right now if we startLogging again we get double logs after a restore + if m.container.logCopier == nil { + if err := m.container.startLogging(); err != nil { + m.resetContainer(false) + return err + } + } + + pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) + + m.container.LogEvent("restore") + m.lastStartTime = time.Now() + if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback, opts, forceRestore); err != nil { + logrus.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) + m.container.ExitCode = -1 + m.resetContainer(false) + return err + } + afterRestore = true + + m.container.ExitCode = exitCode.ExitCode + m.resetMonitor(err == nil && exitCode.ExitCode == 0) + m.container.LogEvent("die") + m.resetContainer(true) + return err +} + // resetMonitor resets the stateful fields on the containerMonitor based on the // previous runs success or failure. Regardless of success, if the container had // an execution time of more than 10s then reset the timer back to the default @@ -267,6 +316,29 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid } } +// Like callback() but for restoring a container. +func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) { + // If restorePid is 0, it means that restore failed. + if restorePid != 0 { + m.container.setRunning(restorePid) + } + + // Unblock the goroutine waiting in waitForRestore(). + select { + case <-m.restoreSignal: + default: + close(m.restoreSignal) + } + + if restorePid != 0 { + // Write config.json and hostconfig.json files + // to /var/lib/docker/containers/. + if err := m.container.ToDisk(); err != nil { + logrus.Debugf("%s", err) + } + } +} + // resetContainer resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct // if lock is true, then container locked during reset diff --git a/daemon/state.go b/daemon/state.go index 0270c88e88774..f6ef8977695b0 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -14,6 +14,7 @@ type State struct { Running bool Paused bool Restarting bool + Checkpointed bool OOMKilled bool removalInProgress bool // Not need for this to be persistent on disk. Dead bool @@ -22,6 +23,7 @@ type State struct { Error string // contains last known error when starting the container StartedAt time.Time FinishedAt time.Time + CheckpointedAt time.Time waitChan chan struct{} } @@ -48,6 +50,10 @@ func (s *State) String() string { return "Removal In Progress" } + if s.Checkpointed { + return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) + } + if s.Dead { return "Dead" } @@ -75,6 +81,10 @@ func (s *State) StateString() string { return "running" } + if s.Checkpointed { + return "checkpointed'" + } + if s.Dead { return "dead" } @@ -166,6 +176,7 @@ func (s *State) setRunning(pid int) { s.Error = "" s.Running = true s.Paused = false + s.Checkpointed = false s.Restarting = false s.ExitCode = 0 s.Pid = pid @@ -262,3 +273,24 @@ func (s *State) SetDead() { s.Dead = true s.Unlock() } + +func (s *State) SetCheckpointed(leaveRunning bool) { + s.Lock() + s.CheckpointedAt = time.Now().UTC() + s.Checkpointed = !leaveRunning + s.Running = leaveRunning + s.Paused = false + s.Restarting = false + // XXX Not sure if we need to close and recreate waitChan. + // close(s.waitChan) + // s.waitChan = make(chan struct{}) + s.Unlock() +} + +func (s *State) HasBeenCheckpointed() bool { + return s.CheckpointedAt != time.Time{} +} + +func (s *State) IsCheckpointed() bool { + return s.Checkpointed +} diff --git a/docker/flags.go b/docker/flags.go index 969469c176214..1f34079a6eb53 100644 --- a/docker/flags.go +++ b/docker/flags.go @@ -31,6 +31,7 @@ var ( dockerCommands = []command{ {"attach", "Attach to a running container"}, {"build", "Build an image from a Dockerfile"}, + {"checkpoint", "Checkpoint one or more running containers"}, {"commit", "Create a new image from a container's changes"}, {"cp", "Copy files/folders from a container to a HOSTDIR or to STDOUT"}, {"create", "Create a new container"}, @@ -55,6 +56,7 @@ var ( {"push", "Push an image or a repository to a registry"}, {"rename", "Rename a container"}, {"restart", "Restart a running container"}, + {"restore", "Restore one or more checkpointed containers"}, {"rm", "Remove one or more containers"}, {"rmi", "Remove one or more images"}, {"run", "Run a command in a new container"}, @@ -131,7 +133,7 @@ func init() { sort.Sort(byName(dockerCommands)) for _, cmd := range dockerCommands { - help += fmt.Sprintf(" %-10.10s%s\n", cmd.name, cmd.description) + help += fmt.Sprintf(" %-11.11s%s\n", cmd.name, cmd.description) } help += "\nRun 'docker COMMAND --help' for more information on a command." diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go new file mode 100644 index 0000000000000..e19ef524efd43 --- /dev/null +++ b/integration-cli/docker_cli_checkpoint_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "os/exec" + "strings" + + "github.com/go-check/check" +) + +func (s *DockerSuite) TestCheckpointAndRestore(c *check.C) { + defer unpauseAllContainers() + + runCmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top") + out, _, err := runCommandWithOutput(runCmd) + if err != nil { + c.Fatalf("failed to run container: %v, output: %q", err, out) + } + + containerID := strings.TrimSpace(out) + checkpointCmd := exec.Command(dockerBinary, "checkpoint", containerID) + out, _, err = runCommandWithOutput(checkpointCmd) + if err != nil { + c.Fatalf("failed to checkpoint container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "true") + + restoreCmd := exec.Command(dockerBinary, "restore", containerID) + out, _, _, err = runCommandWithStdoutStderr(restoreCmd) + if err != nil { + c.Fatalf("failed to restore container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "false") +} diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go index ce9f54e69af84..4eb515f02a9a7 100644 --- a/integration-cli/docker_cli_help_test.go +++ b/integration-cli/docker_cli_help_test.go @@ -229,7 +229,7 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) { } - expected := 39 + expected := 41 if len(cmds) != expected { c.Fatalf("Wrong # of cmds(%d), it should be: %d\nThe list:\n%q", len(cmds), expected, cmds) diff --git a/project/PACKAGERS.md b/project/PACKAGERS.md index fd2156c5855bf..da0d2da9ca0f2 100644 --- a/project/PACKAGERS.md +++ b/project/PACKAGERS.md @@ -304,6 +304,9 @@ by having support for them in the kernel or userspace. A few examples include: least the "auplink" utility from aufs-tools) * BTRFS graph driver (requires BTRFS support enabled in the kernel) * ZFS graph driver (requires userspace zfs-utils and a corresponding kernel module) +* Checkpoint/Restore containers: + - requires criu version 1.5.2 or later (criu.org) + - requires kernel version 3.19 or later if using overlay-fs ## Daemon Init Script diff --git a/runconfig/restore.go b/runconfig/restore.go new file mode 100644 index 0000000000000..22f8b0ab0a096 --- /dev/null +++ b/runconfig/restore.go @@ -0,0 +1,15 @@ +package runconfig + +type CriuConfig struct { + ImagesDirectory string + WorkDirectory string + LeaveRunning bool + TcpEstablished bool + ExternalUnixConnections bool + ShellJob bool +} + +type RestoreConfig struct { + CriuOpts CriuConfig + ForceRestore bool +} diff --git a/vendor/src/github.com/docker/libcontainer/container_linux.go b/vendor/src/github.com/docker/libcontainer/container_linux.go index 215f35d38d8c6..f944bfded779b 100644 --- a/vendor/src/github.com/docker/libcontainer/container_linux.go +++ b/vendor/src/github.com/docker/libcontainer/container_linux.go @@ -386,7 +386,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { return err } - err = c.criuSwrk(nil, &req, criuOpts) + err = c.criuSwrk(nil, &req, criuOpts.LeaveRunning) if err != nil { return err } @@ -513,14 +513,14 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { } } - err = c.criuSwrk(process, &req, criuOpts) + err = c.criuSwrk(process, &req, false) if err != nil { return err } return nil } -func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error { +func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, leaveRunning bool) error { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0) if err != nil { return err @@ -595,7 +595,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * t := resp.GetType() switch { case t == criurpc.CriuReqType_NOTIFY: - if err := c.criuNotifications(resp, process, opts, extFds); err != nil { + if err := c.criuNotifications(resp, process, leaveRunning, extFds); err != nil { return err } t = criurpc.CriuReqType_NOTIFY @@ -662,7 +662,7 @@ func unlockNetwork(config *configs.Config) error { return nil } -func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error { +func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, leaveRunning bool, fds []string) error { notify := resp.GetNotify() if notify == nil { return fmt.Errorf("invalid response: %s", resp.String()) @@ -670,7 +670,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc switch { case notify.GetScript() == "post-dump": - if !opts.LeaveRunning { + if !leaveRunning { f, err := os.Create(filepath.Join(c.root, "checkpoint")) if err != nil { return err