From 501e70eec17077a97b837eb6b970671c41245831 Mon Sep 17 00:00:00 2001 From: steiler Date: Tue, 15 Oct 2024 15:32:19 +0200 Subject: [PATCH 1/3] Adding HostExec for stages and RestartPolicy --- clab/config.go | 1 + clab/dependency_manager/dependency_node.go | 72 +++++++++++++++------- nodes/host/host.go | 4 ++ nodes/linux/linux.go | 5 ++ runtime/docker/docker.go | 15 ++++- types/node_definition.go | 8 +++ types/stages.go | 47 +++++++++++--- types/topology.go | 12 ++++ types/types.go | 3 +- 9 files changed, 135 insertions(+), 32 deletions(-) diff --git a/clab/config.go b/clab/config.go index 3928628c5..e45f9e761 100644 --- a/clab/config.go +++ b/clab/config.go @@ -197,6 +197,7 @@ func (c *CLab) createNodeCfg(nodeName string, nodeDef *types.NodeDefinition, idx Memory: c.Config.Topology.GetNodeMemory(nodeName), StartupDelay: c.Config.Topology.GetNodeStartupDelay(nodeName), AutoRemove: c.Config.Topology.GetNodeAutoRemove(nodeName), + RestartPolicy: c.Config.Topology.GetRestartPolicy(nodeName), Extras: c.Config.Topology.GetNodeExtras(nodeName), DNS: c.Config.Topology.GetNodeDns(nodeName), Certificate: c.Config.Topology.GetCertificateConfig(nodeName), diff --git a/clab/dependency_manager/dependency_node.go b/clab/dependency_manager/dependency_node.go index 77a5e6c3a..4d2bc7cfb 100644 --- a/clab/dependency_manager/dependency_node.go +++ b/clab/dependency_manager/dependency_node.go @@ -8,6 +8,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/srl-labs/containerlab/clab/exec" "github.com/srl-labs/containerlab/nodes" + "github.com/srl-labs/containerlab/nodes/host" "github.com/srl-labs/containerlab/types" ) @@ -61,22 +62,40 @@ func (d *DependencyNode) getStageWG(n types.WaitForStage) *sync.WaitGroup { return d.stageWG[n] } -func (d *DependencyNode) getExecs(p types.WaitForStage, t types.CommandType) ([]*exec.ExecCmd, error) { +func (d *DependencyNode) getExecs(p types.WaitForStage, t types.CommandType, target types.CommandTarget) ([]*exec.ExecCmd, error) { + + var sb types.StageBase switch p { case types.WaitForCreate: - return d.Config().Stages.Create.GetExecCommands(t) + sb = d.Config().Stages.Create.StageBase case types.WaitForCreateLinks: - return d.Config().Stages.CreateLinks.GetExecCommands(t) + sb = d.Config().Stages.CreateLinks.StageBase case types.WaitForConfigure: - return d.Config().Stages.Configure.GetExecCommands(t) + sb = d.Config().Stages.Configure.StageBase case types.WaitForHealthy: - return d.Config().Stages.Healthy.GetExecCommands(t) + sb = d.Config().Stages.Healthy.StageBase case types.WaitForExit: - return d.Config().Stages.Exit.GetExecCommands(t) + sb = d.Config().Stages.Exit.StageBase + default: + return nil, fmt.Errorf("stage %s unknown", p) + } + + var e types.Execs + switch target { + case types.CommandTargetContainer: + e = sb.Execs + case types.CommandTargetHost: + e = sb.HostExecs } - return nil, fmt.Errorf("stage %s unknown", p) + + return e.GetExecCommands(t) } +const ( + execsConst = "execs" + hostExecsConst = "host-execs" +) + // EnterStage is called by a node that is meant to enter the specified stage. // The call will be blocked until all dependencies for the node to enter the stage are met. func (d *DependencyNode) EnterStage(ctx context.Context, p types.WaitForStage) { @@ -87,26 +106,35 @@ func (d *DependencyNode) EnterStage(ctx context.Context, p types.WaitForStage) { } func (d *DependencyNode) runExecs(ctx context.Context, ct types.CommandType, p types.WaitForStage) { - execs, err := d.getExecs(p, ct) - if err != nil { - log.Errorf("error getting exec commands defined for %s: %v", d.GetShortName(), err) - } - if len(execs) == 0 { - return - } - - // exec the commands - execResultCollection := exec.NewExecCollection() + for _, target := range []types.CommandTarget{types.CommandTargetContainer, types.CommandTargetHost} { - for _, exec := range execs { - execResult, err := d.RunExec(ctx, exec) + execs, err := d.getExecs(p, ct, target) if err != nil { - log.Errorf("error on exec in node %s for stage %s: %v", d.GetShortName(), p, err) + log.Errorf("error getting exec commands defined for %s: %v", d.GetShortName(), err) + } + + if len(execs) == 0 { + continue } - execResultCollection.Add(d.GetShortName(), execResult) + + // exec the commands + execResultCollection := exec.NewExecCollection() + var execResult *exec.ExecResult + for _, exec := range execs { + if target == types.CommandTargetContainer { + execResult, err = d.RunExec(ctx, exec) + } else { + execResult, err = host.RunExec(ctx, exec) + } + if err != nil { + log.Errorf("error on exec in node %s for stage %s: %v", d.GetShortName(), p, err) + } + execResultCollection.Add(d.GetShortName(), execResult) + } + execResultCollection.Log() } - execResultCollection.Log() + } // Done is called by a node that has finished all tasks for the provided stage. diff --git a/nodes/host/host.go b/nodes/host/host.go index 131c8c87e..1be522d1a 100644 --- a/nodes/host/host.go +++ b/nodes/host/host.go @@ -109,6 +109,10 @@ func (*host) GetContainers(_ context.Context) ([]runtime.GenericContainer, error // RunExec runs commands on the container host. func (*host) RunExec(ctx context.Context, e *cExec.ExecCmd) (*cExec.ExecResult, error) { + return RunExec(ctx, e) +} + +func RunExec(ctx context.Context, e *cExec.ExecCmd) (*cExec.ExecResult, error) { // retireve the command with its arguments command := e.GetCmd() diff --git a/nodes/linux/linux.go b/nodes/linux/linux.go index 7bc1c5503..5a93e0c01 100644 --- a/nodes/linux/linux.go +++ b/nodes/linux/linux.go @@ -37,6 +37,11 @@ func (n *linux) Init(cfg *types.NodeConfig, opts ...nodes.NodeOption) error { // Init DefaultNode n.DefaultNode = *nodes.NewDefaultNode(n) n.Cfg = cfg + + if n.Cfg.RestartPolicy == "" { + n.Cfg.RestartPolicy = "always" + } + for _, o := range opts { o(n) } diff --git a/runtime/docker/docker.go b/runtime/docker/docker.go index 27bce9cc6..b952150d8 100644 --- a/runtime/docker/docker.go +++ b/runtime/docker/docker.go @@ -502,8 +502,19 @@ func (d *DockerRuntime) CreateContainer(ctx context.Context, node *types.NodeCon // regular linux containers may benefit from automatic restart on failure // note, that veth pairs added to this container (outside of eth0) will be lost on restart - if node.Kind == "linux" && !node.AutoRemove { - containerHostConfig.RestartPolicy.Name = "on-failure" + if !node.AutoRemove && node.RestartPolicy != "" { + var rp container.RestartPolicyMode + switch node.RestartPolicy { + case "no": + rp = container.RestartPolicyDisabled + case "always": + rp = container.RestartPolicyAlways + case "on-failure": + rp = container.RestartPolicyOnFailure + case "unless-stopped": + rp = container.RestartPolicyUnlessStopped + } + containerHostConfig.RestartPolicy.Name = rp } cont, err := d.Client.ContainerCreate( diff --git a/types/node_definition.go b/types/node_definition.go index c9947ec63..f3fbb1435 100644 --- a/types/node_definition.go +++ b/types/node_definition.go @@ -22,6 +22,7 @@ type NodeDefinition struct { EnforceStartupConfig *bool `yaml:"enforce-startup-config,omitempty"` SuppressStartupConfig *bool `yaml:"suppress-startup-config,omitempty"` AutoRemove *bool `yaml:"auto-remove,omitempty"` + RestartPolicy string `yaml:"restart-policy,omitempty"` Config *ConfigDispatcher `yaml:"config,omitempty"` Image string `yaml:"image,omitempty"` ImagePullPolicy string `yaml:"image-pull-policy,omitempty"` @@ -169,6 +170,13 @@ func (n *NodeDefinition) GetAutoRemove() *bool { return n.AutoRemove } +func (n *NodeDefinition) GetRestartPolicy() string { + if n == nil { + return "" + } + return n.RestartPolicy +} + func (n *NodeDefinition) GetConfigDispatcher() *ConfigDispatcher { if n == nil { return nil diff --git a/types/stages.go b/types/stages.go index a4dca48b4..481c8cdad 100644 --- a/types/stages.go +++ b/types/stages.go @@ -33,27 +33,32 @@ func NewStages() *Stages { return &Stages{ Create: &StageCreate{ StageBase: StageBase{ - Execs: Execs{}, + Execs: Execs{}, + HostExecs: Execs{}, }, }, CreateLinks: &StageCreateLinks{ StageBase: StageBase{ - Execs: Execs{}, + Execs: Execs{}, + HostExecs: Execs{}, }, }, Configure: &StageConfigure{ StageBase: StageBase{ - Execs: Execs{}, + Execs: Execs{}, + HostExecs: Execs{}, }, }, Healthy: &StageHealthy{ StageBase: StageBase{ - Execs: Execs{}, + Execs: Execs{}, + HostExecs: Execs{}, }, }, Exit: &StageExit{ StageBase: StageBase{ - Execs: Execs{}, + Execs: Execs{}, + HostExecs: Execs{}, }, }, } @@ -135,6 +140,15 @@ const ( CommandTypeExit ) +type CommandTarget uint + +const ( + // CommandTargetContainer determines that the commands are meant to be executed within the container + CommandTargetContainer CommandTarget = iota + // CommandTargetHost determines that the commands are meant to be executed on the host system + CommandTargetHost +) + // GetExecCommands returns a list of exec commands to be executed. func (e *Execs) GetExecCommands(ct CommandType) ([]*exec.ExecCmd, error) { var commands []string @@ -233,8 +247,9 @@ func (s *StageExit) Merge(other *StageExit) error { // StageBase represents a common configuration stage. // Other stages embed this type to inherit its configuration options. type StageBase struct { - WaitFor WaitForList `yaml:"wait-for,omitempty"` - Execs `yaml:"exec,omitempty"` + WaitFor WaitForList `yaml:"wait-for,omitempty"` + Execs Execs `yaml:"exec,omitempty"` + HostExecs Execs `yaml:"host-exec,omitempty"` } // WaitForList is a list of WaitFor configurations. @@ -284,6 +299,24 @@ func (s *StageBase) Merge(sc *StageBase) error { s.Execs.CommandsOnExit = append(s.Execs.CommandsOnExit, cmd) } + for _, cmd := range sc.HostExecs.CommandsOnEnter { + // prevent adding the same dependency twice + if slices.Contains(s.HostExecs.CommandsOnEnter, cmd) { + continue + } + + s.HostExecs.CommandsOnEnter = append(s.HostExecs.CommandsOnEnter, cmd) + } + + for _, cmd := range sc.HostExecs.CommandsOnExit { + // prevent adding the same dependency twice + if slices.Contains(s.HostExecs.CommandsOnExit, cmd) { + continue + } + + s.HostExecs.CommandsOnExit = append(s.HostExecs.CommandsOnExit, cmd) + } + return nil } diff --git a/types/topology.go b/types/topology.go index 035f06bee..4669eb24c 100644 --- a/types/topology.go +++ b/types/topology.go @@ -245,6 +245,18 @@ func (t *Topology) GetNodeAutoRemove(name string) bool { return false } +func (t *Topology) GetRestartPolicy(name string) string { + if ndef, ok := t.Nodes[name]; ok { + if l := ndef.GetRestartPolicy(); l != "" { + return l + } + if l := t.GetKind(t.GetNodeKind(name)).GetRestartPolicy(); l != "" { + return l + } + } + return t.GetDefaults().GetRestartPolicy() +} + func (t *Topology) GetNodeLicense(name string) string { if ndef, ok := t.Nodes[name]; ok { if l := ndef.GetLicense(); l != "" { diff --git a/types/types.go b/types/types.go index 439f27b4b..bc5f78285 100644 --- a/types/types.go +++ b/types/types.go @@ -114,7 +114,8 @@ type NodeConfig struct { // when set to true will prevent creation of a startup-config, for auto-provisioning testing (ZTP) SuppressStartupConfig bool `json:"suppress-startup-config,omitempty"` // when set to true will auto-remove a stopped/failed container - AutoRemove bool `json:"auto-remove,omitempty"` + AutoRemove bool `json:"auto-remove,omitempty"` + RestartPolicy string `json:"restart-policy,omitempty"` // path to config file that is actually mounted to the container and is a result of templation ResStartupConfig string `json:"startup-config-abs-path,omitempty"` Config *ConfigDispatcher `json:"config,omitempty"` From 8f7c41c44cde92f4c0cad816d0134d064d01edf8 Mon Sep 17 00:00:00 2001 From: steiler Date: Tue, 15 Oct 2024 15:48:39 +0200 Subject: [PATCH 2/3] cleanup --- clab/dependency_manager/dependency_node.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clab/dependency_manager/dependency_node.go b/clab/dependency_manager/dependency_node.go index 4d2bc7cfb..85aefebd7 100644 --- a/clab/dependency_manager/dependency_node.go +++ b/clab/dependency_manager/dependency_node.go @@ -91,11 +91,6 @@ func (d *DependencyNode) getExecs(p types.WaitForStage, t types.CommandType, tar return e.GetExecCommands(t) } -const ( - execsConst = "execs" - hostExecsConst = "host-execs" -) - // EnterStage is called by a node that is meant to enter the specified stage. // The call will be blocked until all dependencies for the node to enter the stage are met. func (d *DependencyNode) EnterStage(ctx context.Context, p types.WaitForStage) { From 5ea2aa0c5eef213cc947f90045ee04d817845b6b Mon Sep 17 00:00:00 2001 From: Roman Dodin Date: Thu, 17 Oct 2024 14:08:40 +0300 Subject: [PATCH 3/3] added docs and test --- clab/dependency_manager/dependency_node.go | 2 +- docs/manual/nodes.md | 44 ++++++++++++++++++++++ nodes/linux/linux.go | 4 ++ schemas/clab.schema.json | 17 ++++++++- tests/01-smoke/18-stages.robot | 4 ++ tests/01-smoke/stages.clab.yml | 3 ++ 6 files changed, 72 insertions(+), 2 deletions(-) diff --git a/clab/dependency_manager/dependency_node.go b/clab/dependency_manager/dependency_node.go index 85aefebd7..2f3d30da9 100644 --- a/clab/dependency_manager/dependency_node.go +++ b/clab/dependency_manager/dependency_node.go @@ -102,7 +102,7 @@ func (d *DependencyNode) EnterStage(ctx context.Context, p types.WaitForStage) { func (d *DependencyNode) runExecs(ctx context.Context, ct types.CommandType, p types.WaitForStage) { - for _, target := range []types.CommandTarget{types.CommandTargetContainer, types.CommandTargetHost} { + for _, target := range []types.CommandTarget{types.CommandTargetHost, types.CommandTargetContainer} { execs, err := d.getExecs(p, ct, target) if err != nil { diff --git a/docs/manual/nodes.md b/docs/manual/nodes.md index 0a9de97d8..2dc1b0856 100644 --- a/docs/manual/nodes.md +++ b/docs/manual/nodes.md @@ -88,6 +88,32 @@ topology: image-pull-policy: Always ``` +### restart-policy + +With `restart-policy` a user defines the restart policy of a container as per [docker docs](https://docs.docker.com/engine/containers/start-containers-automatically/). + +Valid values are: + +- `no` - Don't automatically restart the container. +- `on-failure` - Restart the container if it exits due to an error, which manifests as a non-zero exit code. The on-failure policy only prompts a restart if the container exits with a failure. It doesn't restart the container if the daemon restarts. +- `always` - Always restart the container if it stops. If it's manually stopped, it's restarted only when Docker daemon restarts or the container itself is manually restarted. +- `unless-stopped` Similar to always, except that when the container is stopped (manually or otherwise), it isn't restarted even after Docker daemon restarts. + +`no` is the default restart policy value for all kinds, but `linux`. Linux kind defaults to `always`. + +```yaml +topology: + nodes: + srl: + image: ghcr.io/nokia/srlinux + kind: nokia_srlinux + restart-policy: always + alpine: + kind: linux + image: alpine + restart-policy: "no" +``` + ### license Some containerized NOSes require a license to operate or can leverage a license to lift-off limitations of an unlicensed version. With `license` property a user sets a path to a license file that a node will use. The license file will then be mounted to the container by the path that is defined by the `kind/type` of the node. @@ -758,6 +784,24 @@ Per-stage command execution gives you additional flexibility in terms of when th +##### Host exec + +The stage's `exec` property runs the commands in the container namespace and therefore targets the container node itself. This is super useful in itself, but sometimes you need to run a command on the host as a reaction to a stage enter/exit event. + +This is what `host-exec` is designed for. It runs the command in the host namespace and therefore targets the host itself. + +```yaml +nodes: + node1: + stages: + create-links: + host-exec: + on-enter: + - touch /tmp/hello +``` + +In the example above, containerlab will run `touch /tmp/hello` command when the `node1` is about to enter the `create-links` stage. You can use `host-exec` in every stage. + ### certificate To automatically generate a TLS certificate for a node and sign it with the Certificate Authority created by containerlab, use `certificate.issue: true` parameter. diff --git a/nodes/linux/linux.go b/nodes/linux/linux.go index 5a93e0c01..85009f783 100644 --- a/nodes/linux/linux.go +++ b/nodes/linux/linux.go @@ -38,6 +38,10 @@ func (n *linux) Init(cfg *types.NodeConfig, opts ...nodes.NodeOption) error { n.DefaultNode = *nodes.NewDefaultNode(n) n.Cfg = cfg + // linux kind uses `always` as a default restart policy + // since often they run auxiliary services that might fail because + // of the wrong configuration or other reasons. + // Usually we want those services to automatically restart. if n.Cfg.RestartPolicy == "" { n.Cfg.RestartPolicy = "always" } diff --git a/schemas/clab.schema.json b/schemas/clab.schema.json index 22fb6f497..50b31559b 100644 --- a/schemas/clab.schema.json +++ b/schemas/clab.schema.json @@ -15,7 +15,7 @@ }, "image-pull-policy": { "type": "string", - "description": "policy for pulling the referenced cotnainer image", + "description": "policy for pulling the referenced container image", "markdownDescription": "container [image-pull-policy](https://containerlab.dev/manual/nodes/#image-pull-policy) to use for this node", "enum": [ "always", @@ -26,6 +26,21 @@ "IfNotPresent" ] }, + "restart-policy": { + "type": "string", + "description": "restart policy for the referenced container image", + "markdownDescription": "container [restart-policy](https://containerlab.dev/manual/nodes/#restart-policy) to use for this node", + "enum": [ + "no", + "No", + "on-failure", + "On-failure", + "Always", + "always", + "unless-stopped", + "Unless-stopped" + ] + }, "kind": { "type": "string", "description": "kind of this node", diff --git a/tests/01-smoke/18-stages.robot b/tests/01-smoke/18-stages.robot index 387213626..8ba25f350 100644 --- a/tests/01-smoke/18-stages.robot +++ b/tests/01-smoke/18-stages.robot @@ -92,6 +92,10 @@ Ensure node1 executed on-enter commands for its create-links stage and this outp Log ${match} +Ensure host-exec file is created with the right content + ${content} = Get File /tmp/host-exec-test + Should Contain ${content} foo msg=File does not contain the expected string + Deploy ${lab-name} lab with a single worker Run Keyword Teardown diff --git a/tests/01-smoke/stages.clab.yml b/tests/01-smoke/stages.clab.yml index 3577d1ee5..7bfa46571 100644 --- a/tests/01-smoke/stages.clab.yml +++ b/tests/01-smoke/stages.clab.yml @@ -17,6 +17,9 @@ topology: - node: node2 stage: create create-links: + host-exec: + on-enter: + - bash -c 'echo foo > /tmp/host-exec-test' exec: on-enter: - ls /sys/class/net/