Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reuse the endpoint of the checkpointed container when restore #7

Open
wants to merge 29 commits into
base: cr-combined
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
08507cf
Checkpoint/Restore Support: add exec driver methods
Feb 6, 2015
5d19d1a
Update checkpoint/restore support to match docker/master
boucher May 25, 2015
de50b10
Checkpoint/Restore Support: add functionality to daemon
Feb 6, 2015
55fcafa
Release the network resource during checkpoint
huikang May 19, 2015
7c45210
Update daemon and cli support for checkpoint and restore.
boucher Jun 1, 2015
5638d05
Add compilation steps for Criu to the Dockerfile
boucher Jun 2, 2015
931ab3c
Add optional dependency info to the PACKAGERS file.
boucher Jun 16, 2015
25627f6
Don't destroy/delete the container if it has been checkpointed.
boucher Jun 18, 2015
637d7ac
Reuse the endpoint of the checkpointed container when restore
huikang Jun 19, 2015
56cd5a1
Checkpoint/Restore Support: add exec driver methods
Feb 6, 2015
456ecbf
Update checkpoint/restore support to match docker/master
boucher May 25, 2015
34bd07f
Checkpoint/Restore Support: add functionality to daemon
Feb 6, 2015
ca085a8
Release the network resource during checkpoint
huikang May 19, 2015
d06dedf
Update daemon and cli support for checkpoint and restore.
boucher Jun 1, 2015
87179b4
Add compilation steps for Criu to the Dockerfile
boucher Jun 2, 2015
b8cebfc
Add optional dependency info to the PACKAGERS file.
boucher Jun 16, 2015
c1a59ac
Don't destroy/delete the container if it has been checkpointed.
boucher Jun 18, 2015
297994e
Reuse endpoint for 1.8.0-dev
huikang Jun 29, 2015
b654a74
Checkpoint/Restore Support: add exec driver methods
Feb 6, 2015
90f5917
Update checkpoint/restore support to match docker/master
boucher May 25, 2015
a3b970f
Checkpoint/Restore Support: add functionality to daemon
Feb 6, 2015
b3c2a8e
Release the network resource during checkpoint
huikang May 19, 2015
10003bf
Update daemon and cli support for checkpoint and restore.
boucher Jun 1, 2015
b7c8e38
Add compilation steps for Criu to the Dockerfile
boucher Jun 2, 2015
28d3e84
Add optional dependency info to the PACKAGERS file.
boucher Jun 16, 2015
590ae6b
Don't destroy/delete the container if it has been checkpointed.
boucher Jun 18, 2015
ff844f8
Move checkpoint methods into a separate container_checkpoint file.
boucher Jul 9, 2015
e1348e6
Merge remote-tracking branch 'upstream/cr-combined' into cr-combined
huikang Jul 10, 2015
a7559b4
Update endpoint fix code
huikang Jul 10, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,39 @@ RUN echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et
# Packaged dependencies
RUN apt-get update && apt-get install -y \
apparmor \
asciidoc \
aufs-tools \
automake \
bash-completion \
bsdmainutils \
btrfs-tools \
build-essential \
curl \
dpkg-sig \
git \
iptables \
libaio-dev \
libapparmor-dev \
libcap-dev \
libprotobuf-c0-dev \
libprotobuf-dev \
libsqlite3-dev \
mercurial \
parallel \
pkg-config \
protobuf-compiler \
protobuf-c-compiler \
python-minimal \
python-mock \
python-pip \
python-protobuf \
python-websocket \
reprepro \
ruby1.9.1 \
ruby1.9.1-dev \
s3cmd=1.1.0* \
ubuntu-zfs \
xmlto \
libzfs-dev \
--no-install-recommends

Expand All @@ -78,6 +89,13 @@ RUN cd /usr/src/lxc \
&& make install \
&& ldconfig

# Install Criu
RUN mkdir -p /usr/src/criu \
&& curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1
RUN cd /usr/src/criu \
&& make \
&& make install

# Install Go
ENV GO_VERSION 1.4.2
RUN curl -sSL https://golang.org/dl/go${GO_VERSION}.src.tar.gz | tar -v -C /usr/local -xz \
Expand Down
52 changes: 52 additions & 0 deletions api/client/checkpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package client

import (
"fmt"

flag "github.com/docker/docker/pkg/mflag"
"github.com/docker/docker/runconfig"
)

func (cli *DockerCli) CmdCheckpoint(args ...string) error {
cmd := cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true)
cmd.Require(flag.Min, 1)

var (
flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files")
flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file")
flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint")
flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow checkpointing tcp connections")
flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow checkpointing external unix connections")
flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow checkpointing shell jobs")
)

if err := cmd.ParseFlags(args, true); err != nil {
return err
}

if cmd.NArg() < 1 {
cmd.Usage()
return nil
}

criuOpts := &runconfig.CriuConfig{
ImagesDirectory: *flImgDir,
WorkDirectory: *flWorkDir,
LeaveRunning: *flLeaveRunning,
TcpEstablished: *flCheckTcp,
ExternalUnixConnections: *flExtUnix,
ShellJob: *flShell,
}

var encounteredError error
for _, name := range cmd.Args() {
_, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil))
if err != nil {
fmt.Fprintf(cli.err, "%s\n", err)
encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers")
} else {
fmt.Fprintf(cli.out, "%s\n", name)
}
}
return encounteredError
}
54 changes: 54 additions & 0 deletions api/client/restore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package client

import (
"fmt"

flag "github.com/docker/docker/pkg/mflag"
"github.com/docker/docker/runconfig"
)

func (cli *DockerCli) CmdRestore(args ...string) error {
cmd := cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true)
cmd.Require(flag.Min, 1)

var (
flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from")
flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log")
flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow restoring tcp connections")
flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow restoring external unix connections")
flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow restoring shell jobs")
flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state")
)

if err := cmd.ParseFlags(args, true); err != nil {
return err
}

if cmd.NArg() < 1 {
cmd.Usage()
return nil
}

restoreOpts := &runconfig.RestoreConfig{
CriuOpts: runconfig.CriuConfig{
ImagesDirectory: *flImgDir,
WorkDirectory: *flWorkDir,
TcpEstablished: *flCheckTcp,
ExternalUnixConnections: *flExtUnix,
ShellJob: *flShell,
},
ForceRestore: *flForce,
}

var encounteredError error
for _, name := range cmd.Args() {
_, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil))
if err != nil {
fmt.Fprintf(cli.err, "%s\n", err)
encounteredError = fmt.Errorf("Error: failed to restore one or more containers")
} else {
fmt.Fprintf(cli.out, "%s\n", name)
}
}
return encounteredError
}
88 changes: 66 additions & 22 deletions api/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,48 @@ func (s *Server) postContainersCopy(version version.Version, w http.ResponseWrit
return nil
}

func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if vars == nil {
return fmt.Errorf("Missing parameter")
}
if err := parseForm(r); err != nil {
return err
}

criuOpts := &runconfig.CriuConfig{}
if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil {
return err
}

if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil {
return err
}

w.WriteHeader(http.StatusNoContent)
return nil
}

func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if vars == nil {
return fmt.Errorf("Missing parameter")
}
if err := parseForm(r); err != nil {
return err
}

restoreOpts := runconfig.RestoreConfig{}
if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil {
return err
}

if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil {
return err
}

w.WriteHeader(http.StatusNoContent)
return nil
}

func (s *Server) postContainerExecCreate(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := parseForm(r); err != nil {
return err
Expand Down Expand Up @@ -1559,28 +1601,30 @@ func createRouter(s *Server) *mux.Router {
"/exec/{id:.*}/json": s.getExecByID,
},
"POST": {
"/auth": s.postAuth,
"/commit": s.postCommit,
"/build": s.postBuild,
"/images/create": s.postImagesCreate,
"/images/load": s.postImagesLoad,
"/images/{name:.*}/push": s.postImagesPush,
"/images/{name:.*}/tag": s.postImagesTag,
"/containers/create": s.postContainersCreate,
"/containers/{name:.*}/kill": s.postContainersKill,
"/containers/{name:.*}/pause": s.postContainersPause,
"/containers/{name:.*}/unpause": s.postContainersUnpause,
"/containers/{name:.*}/restart": s.postContainersRestart,
"/containers/{name:.*}/start": s.postContainersStart,
"/containers/{name:.*}/stop": s.postContainersStop,
"/containers/{name:.*}/wait": s.postContainersWait,
"/containers/{name:.*}/resize": s.postContainersResize,
"/containers/{name:.*}/attach": s.postContainersAttach,
"/containers/{name:.*}/copy": s.postContainersCopy,
"/containers/{name:.*}/exec": s.postContainerExecCreate,
"/exec/{name:.*}/start": s.postContainerExecStart,
"/exec/{name:.*}/resize": s.postContainerExecResize,
"/containers/{name:.*}/rename": s.postContainerRename,
"/auth": s.postAuth,
"/commit": s.postCommit,
"/build": s.postBuild,
"/images/create": s.postImagesCreate,
"/images/load": s.postImagesLoad,
"/images/{name:.*}/push": s.postImagesPush,
"/images/{name:.*}/tag": s.postImagesTag,
"/containers/create": s.postContainersCreate,
"/containers/{name:.*}/kill": s.postContainersKill,
"/containers/{name:.*}/pause": s.postContainersPause,
"/containers/{name:.*}/unpause": s.postContainersUnpause,
"/containers/{name:.*}/restart": s.postContainersRestart,
"/containers/{name:.*}/start": s.postContainersStart,
"/containers/{name:.*}/stop": s.postContainersStop,
"/containers/{name:.*}/wait": s.postContainersWait,
"/containers/{name:.*}/resize": s.postContainersResize,
"/containers/{name:.*}/attach": s.postContainersAttach,
"/containers/{name:.*}/copy": s.postContainersCopy,
"/containers/{name:.*}/exec": s.postContainerExecCreate,
"/exec/{name:.*}/start": s.postContainerExecStart,
"/exec/{name:.*}/resize": s.postContainerExecResize,
"/containers/{name:.*}/rename": s.postContainerRename,
"/containers/{name:.*}/checkpoint": s.postContainersCheckpoint,
"/containers/{name:.*}/restore": s.postContainersRestore,
},
"DELETE": {
"/containers/{name:.*}": s.deleteContainers,
Expand Down
22 changes: 12 additions & 10 deletions api/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,18 @@ type ExecStartCheck struct {
}

type ContainerState struct {
Running bool
Paused bool
Restarting bool
OOMKilled bool
Dead bool
Pid int
ExitCode int
Error string
StartedAt time.Time
FinishedAt time.Time
Running bool
Paused bool
Checkpointed bool
Restarting bool
OOMKilled bool
Dead bool
Pid int
ExitCode int
Error string
StartedAt time.Time
FinishedAt time.Time
CheckpointedAt time.Time
}

// GET "/containers/{name:.*}/json"
Expand Down
56 changes: 56 additions & 0 deletions daemon/checkpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package daemon

import (
"fmt"

"github.com/docker/docker/runconfig"
)

// Checkpoint a running container.
func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error {
container, err := daemon.Get(name)
if err != nil {
return err
}
if !container.IsRunning() {
return fmt.Errorf("Container %s not running", name)
}
if err := container.Checkpoint(opts); err != nil {
return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
}

container.LogEvent("checkpoint")
return nil
}

// Restore a checkpointed container.
func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error {
container, err := daemon.Get(name)
if err != nil {
return err
}

if !forceRestore {
// TODO: It's possible we only want to bypass the checkpointed check,
// I'm not sure how this will work if the container is already running
if container.IsRunning() {
return fmt.Errorf("Container %s already running", name)
}

if !container.IsCheckpointed() {
return fmt.Errorf("Container %s is not checkpointed", name)
}
} else {
if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" {
return fmt.Errorf("You must specify an image directory to restore from %s", name)
}
}

if err = container.Restore(opts, forceRestore); err != nil {
container.LogEvent("die")
return fmt.Errorf("Cannot restore container %s: %s", name, err)
}

container.LogEvent("restore")
return nil
}
8 changes: 6 additions & 2 deletions daemon/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func (container *Container) Start() (err error) {
if err := container.Mount(); err != nil {
return err
}
if err := container.initializeNetworking(); err != nil {
if err := container.initializeNetworking(false); err != nil {
return err
}
linkedEnv, err := container.setupLinkedContainers()
Expand Down Expand Up @@ -344,7 +344,11 @@ func (container *Container) isNetworkAllocated() bool {
// cleanup releases any network resources allocated to the container along with any rules
// around how containers are linked together. It also unmounts the container's root filesystem.
func (container *Container) cleanup() {
container.ReleaseNetwork()
if container.IsCheckpointed() {
logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
} else {
container.ReleaseNetwork(false)
}

disableAllActiveLinks(container)

Expand Down
Loading