Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Actions; fully support multi-node with n>0 workers #164

Merged
merged 7 commits into from
Jan 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Gopkg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ required = [
name = "k8s.io/code-generator"
branch = "master"


[[constraint]]
branch = "master"
name = "k8s.io/utils"
25 changes: 24 additions & 1 deletion cmd/kind/create/cluster/createcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,33 @@ func runE(flags *flagpole, cmd *cobra.Command, args []string) error {
return fmt.Errorf("aborting due to invalid configuration")
}

// TODO(fabrizio pandini): this check is temporary / WIP
// kind v1alpha config fully supports multi nodes, but the cluster creation logic implemented in
// pkg/cluster/contex.go does it only partially (yet).
// As soon a external load-balancer and external etcd is implemented in pkg/cluster, this should go away

if cfg.ExternalLoadBalancer() != nil {
return fmt.Errorf("multi node support is still a work in progress, currently external load balancer node is not supported")
}

if cfg.SecondaryControlPlanes() != nil {
return fmt.Errorf("multi node support is still a work in progress, currently only single control-plane node are supported")
}

if cfg.ExternalEtcd() != nil {
return fmt.Errorf("multi node support is still a work in progress, currently external etcd node is not supported")
}

// create a cluster context and create the cluster
ctx := cluster.NewContext(flags.Name)
if flags.ImageName != "" {
cfg.Image = flags.ImageName
// Apply image override to all the Nodes defined in Config
// TODO(fabrizio pandini): this should be reconsidered when implementing
// https://github.com/kubernetes-sigs/kind/issues/133
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
for _, n := range cfg.Nodes() {
n.Image = flags.ImageName
}

err := cfg.Validate()
if err != nil {
log.Errorf("Invalid flags, configuration failed validation: %v", err)
Expand Down
1 change: 0 additions & 1 deletion hack/update-generated.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ deepcopy-gen -i ./pkg/cluster/config/ -O zz_generated.deepcopy --go-header-file

deepcopy-gen -i ./pkg/cluster/config/v1alpha1 -O zz_generated.deepcopy --go-header-file hack/boilerplate.go.txt
defaulter-gen -i ./pkg/cluster/config/v1alpha1 -O zz_generated.default --go-header-file hack/boilerplate.go.txt
conversion-gen -i ./pkg/cluster/config/v1alpha1 -O zz_generated.conversion --go-header-file hack/boilerplate.go.txt

deepcopy-gen -i ./pkg/cluster/config/v1alpha2 -O zz_generated.deepcopy --go-header-file hack/boilerplate.go.txt
defaulter-gen -i ./pkg/cluster/config/v1alpha2 -O zz_generated.default --go-header-file hack/boilerplate.go.txt
Expand Down
232 changes: 232 additions & 0 deletions pkg/cluster/actions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/*
Copyright 2018 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package cluster

import (
"fmt"
"sort"
"sync"

"sigs.k8s.io/kind/pkg/cluster/config"
)

// Action define a set of tasks to be executed on a `kind` cluster.
// Usage of actions allows to define repetitive, high level abstractions/workflows
// by composing lower level tasks
type Action interface {
// Tasks returns the list of task that are identified by this action
// Please note that the order of task is important, and it will be
// respected during execution
Tasks() []Task
}
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved

// Task define a logical step of an action to be executed on a `kind` cluster.
// At exec time the logical step will then apply to the current cluster
// topology, and be planned for execution zero, one or many times accordingly.
type Task struct {
// Description of the task
Description string
// TargetNodes define a function that identifies the nodes where this
// task should be executed
TargetNodes NodeSelector
// Run the func that implements the task action
Run func(*execContext, *config.Node) error
}

// NodeSelector defines a function returning a subset of nodes where tasks
// should be planned.
type NodeSelector func(*config.Config) config.NodeList

// PlannedTask defines a Task planned for execution on a given node.
type PlannedTask struct {
// task to be executed
Task Task
// node where the task should be executed
Node *config.Node
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PlannedTask has a Node reference, but also a Task and this Task holds a list of Nodes.

ideally an Action should have a list of Tasks.
and a Task should execute on a list of Nodes.

we should think of how to omit the PlannedTasks object.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@neolit123 The distinction between Task and PlannedTask is the key to manage variable list of actions in a way that can adapt to different cluster topologies

Task are defined at programming time - before knowing the cluster topology -, and they have a NodeSelector function, not a list of nodes.

PlannedTask is the "runtime version" of a Task, that is created when assigning a "logical task" to a Node in the actual cluster topology (a node that matches NodeSelector criteria).


// PlannedTask should respects the given order of actions and tasks
actionIndex int
taskIndex int
}

// ExecutionPlan contain an ordered list of Planned Tasks
// Please note that the planning order is critical for providing a
// predictable, "kubeadm friendly" and consistent execution order.
type ExecutionPlan []*PlannedTask
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved

// internal registry of named Action implementations
var actionImpls = struct {
impls map[string]func() Action
sync.Mutex
}{
impls: map[string]func() Action{},
}

// RegisterAction registers a new named actionBuilder function for use
func RegisterAction(name string, actionBuilderFunc func() Action) {
actionImpls.Lock()
actionImpls.impls[name] = actionBuilderFunc
actionImpls.Unlock()
}

// GetAction returns one instance of a registered action
func GetAction(name string) (Action, error) {
actionImpls.Lock()
actionBuilderFunc, ok := actionImpls.impls[name]
actionImpls.Unlock()
if !ok {
return nil, fmt.Errorf("no Action implementation with name: %s", name)
}
return actionBuilderFunc(), nil
}

// NewExecutionPlan creates an execution plan by applying logical step/task
// defined for each action to the actual cluster topology. As a result task
// could be executed zero, one or more times according with the target nodes
// selector defined for each task.
// The execution plan is ordered, providing a predictable, "kubeadm friendly"
// and consistent execution order; with this regard please note that the order
// of actions is important, and it will be respected by planning.
// TODO(fabrizio pandini): probably it will be necessary to add another criteria
// for ordering planned task for the most complex workflows (e.g.
// init-join-upgrade and then join again)
// e.g. it should be something like "action group" where each action
// group is a list of actions
func NewExecutionPlan(cfg *config.Config, actionNames []string) (ExecutionPlan, error) {
// for each actionName
var plan = ExecutionPlan{}
for i, name := range actionNames {
// get the action implementation instance
actionImpl, err := GetAction(name)
if err != nil {
return nil, err
}
// for each logical tasks defined for the action
for j, t := range actionImpl.Tasks() {
// get the list of target nodes in the current topology
targetNodes := t.TargetNodes(cfg)
for _, n := range targetNodes {
// creates the planned task
taskContext := &PlannedTask{
Node: n,
Task: t,
actionIndex: i,
taskIndex: j,
}
plan = append(plan, taskContext)
}
}
}

// sorts the list of planned task ensuring a predictable, "kubeadm friendly"
// and consistent execution order
sort.Sort(plan)
return plan, nil
}

// Len of the ExecutionPlan.
// It is required for making ExecutionPlan sortable.
func (t ExecutionPlan) Len() int {
return len(t)
}

// Less return the lower between two elements of the ExecutionPlan, where the
// lower element should be executed before the other.
// It is required for making ExecutionPlan sortable.
func (t ExecutionPlan) Less(i, j int) bool {
return t[i].ExecutionOrder() < t[j].ExecutionOrder()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it a bit strange to relay on string sorting here given that ExecutionOrder() returns a string? Could we have a utility function that takes into account the node ProvisioningOrder() and the actionIndex and taskIndex? https://play.golang.org/p/z8ZGpyl_EPt

Maybe it's minor as long as the order is always preserved.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the order is always preserved

The ordering is 100% deterministic, and this is covered by unit tests as well

Nevertheless, I'm ready to change implementation if this one seems counter-intuitive, but I'm not 100% sure the alternative reads better. see https://gist.github.com/fabriziopandini/e544842b8c38c90974b215167a77d934

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to simplify actions quite a bit in a follow-up O(very soon)

}

// ExecutionOrder returns a string that can be used for sorting planned tasks
// into a predictable, "kubeadm friendly" and consistent order.
// NB. we are using a string to combine all the item considered into something
// that can be easily sorted using a lexicographical order
func (p *PlannedTask) ExecutionOrder() string {
return fmt.Sprintf("Node.ProvisioningOrder: %d - Node.Name: %s - actionIndex: %d - taskIndex: %d",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i might need a diagram to better understand how this work j/k.
in all seriousness, i think the sorting introduces a non-deterministic factor that could be avoided.

some questions:

  • why do we even need the sorting?
  • what if we want to execute tasks with an order not based on the node names?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we even need the sorting?
what if we want to execute tasks with an order not based on the node names?

Sorting is used to provide a predictable, "kubeadm friendly" and consistent task exection order that can automatically adapt to any requested list of actions and to any cluster topology supported by kind.

Such order is not based on the node name only, but on a combination of 4 attributes considered with a well known priority: node provisioning order, node name, action order, task order (I improved comments to make this more clear).

By combining above parameters you get a task execution order consistent with all the major kubeadm workflows (init, join control-plane, join) or (upgrade apply, upgrade node control-plane, upgrade node).

See discussion below with @ereslibre about implementation details, but in any case the ordering is 100% deterministic and it is already covered by unit tests signal.

As noted in NewExecutionPlan comments, I already know that this won't be enough for complex CI workflows, but this is a good step in that direction and this is why I'm putting order into this PR.

// Then PlannedTask are grouped by machines, respecting the kubeadm node
// ProvisioningOrder: first complete provisioning on bootstrap control
// plane, then complete provisioning of secondary control planes, and
// finally provision worker nodes.
p.Node.ProvisioningOrder(),
// Node name is considered in order to get a predictable/repeatable ordering
// in case of many nodes with the same ProvisioningOrder
p.Node.Name,
// If both the two criteria above are equal, the given order of actions will
// be respected and, for each action, the predefined order of tasks
// will be used
p.actionIndex,
p.taskIndex,
)
}

// Swap two elements of the ExecutionPlan.
// It is required for making ExecutionPlan sortable.
func (t ExecutionPlan) Swap(i, j int) {
t[i], t[j] = t[j], t[i]
}

// SelectAllNodes is a NodeSelector that returns all the nodes defined in
// the `kind` Config
func SelectAllNodes(cfg *config.Config) config.NodeList {
return cfg.Nodes()
}

// SelectControlPlaneNodes is a NodeSelector that returns all the nodes
// with control-plane role
func SelectControlPlaneNodes(cfg *config.Config) config.NodeList {
return cfg.ControlPlanes()
}

// SelectBootstrapControlPlaneNode is a NodeSelector that returns the
// first node with control-plane role
func SelectBootstrapControlPlaneNode(cfg *config.Config) config.NodeList {
if cfg.BootStrapControlPlane() != nil {
return config.NodeList{cfg.BootStrapControlPlane()}
}
return nil
}

// SelectSecondaryControlPlaneNodes is a NodeSelector that returns all
// the nodes with control-plane roleexcept the BootStrapControlPlane
// node, if any,
func SelectSecondaryControlPlaneNodes(cfg *config.Config) config.NodeList {
return cfg.SecondaryControlPlanes()
}

// SelectWorkerNodes is a NodeSelector that returns all the nodes with
// Worker role, if any
func SelectWorkerNodes(cfg *config.Config) config.NodeList {
return cfg.Workers()
}

// SelectExternalEtcdNode is a NodeSelector that returns the node with
//external-etcd role, if defined
func SelectExternalEtcdNode(cfg *config.Config) config.NodeList {
if cfg.ExternalEtcd() != nil {
return config.NodeList{cfg.ExternalEtcd()}
}
return nil
}

// SelectExternalLoadBalancerNode is a NodeSelector that returns the node
// with external-load-balancer role, if defined
func SelectExternalLoadBalancerNode(cfg *config.Config) config.NodeList {
if cfg.ExternalLoadBalancer() != nil {
return config.NodeList{cfg.ExternalLoadBalancer()}
}
return nil
}
Loading