diff --git a/.github/.spelling b/.github/.spelling index d79ec3e8fd..b5ead0c58e 100644 --- a/.github/.spelling +++ b/.github/.spelling @@ -146,7 +146,6 @@ SPIDERPOOL_CLI_PORT SPIDERPOOL_UPDATE_CR_MAX_RETRIES SPIDERPOOL_WORKLOADENDPOINT_MAX_HISTORY_RECORDS SPIDERPOOL_IPPOOL_MAX_ALLOCATED_IPS -SPIDERPOOL_GC_TERMINATING_POD_IP_ENABLED SPIDERPOOL_GC_DEFAULT_INTERVAL_DURATION kubernetes vlan @@ -294,3 +293,5 @@ auto_pool_scale_max_duration_seconds auto_pool_scale_min_duration_seconds auto_pool_scale_latest_duration_seconds auto_pool_scale_duration_seconds_histogram +SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_READY_NODE_ENABLED +SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED diff --git a/charts/spiderpool/README.md b/charts/spiderpool/README.md index e8e5946911..1c63967ab2 100644 --- a/charts/spiderpool/README.md +++ b/charts/spiderpool/README.md @@ -127,18 +127,19 @@ helm install spiderpool spiderpool/spiderpool --wait --namespace kube-system \ ### ipam parameters -| Name | Description | Value | -| -------------------------------------- | --------------------------------------------------------------------------- | ------ | -| `ipam.enableIPv4` | enable ipv4 | `true` | -| `ipam.enableIPv6` | enable ipv6 | `true` | -| `ipam.enableStatefulSet` | the network mode | `true` | -| `ipam.enableKubevirtStaticIP` | the feature to keep kubevirt vm pod static IP | `true` | -| `ipam.enableSpiderSubnet` | SpiderSubnet feature gate. | `true` | -| `ipam.subnetDefaultFlexibleIPNumber` | the default flexible IP number of SpiderSubnet feature auto-created IPPools | `1` | -| `ipam.gc.enabled` | enable retrieve IP in spiderippool CR | `true` | -| `ipam.gc.gcAll.intervalInSecond` | the gc all interval duration | `600` | -| `ipam.gc.GcDeletingTimeOutPod.enabled` | enable retrieve IP for the pod who times out of deleting graceful period | `true` | -| `ipam.gc.GcDeletingTimeOutPod.delay` | the gc delay seconds after the pod times out of deleting graceful period | `0` | +| Name | Description | Value | +| ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------- | ------ | +| `ipam.enableIPv4` | enable ipv4 | `true` | +| `ipam.enableIPv6` | enable ipv6 | `true` | +| `ipam.enableStatefulSet` | the network mode | `true` | +| `ipam.enableKubevirtStaticIP` | the feature to keep kubevirt vm pod static IP | `true` | +| `ipam.enableSpiderSubnet` | SpiderSubnet feature gate. | `true` | +| `ipam.subnetDefaultFlexibleIPNumber` | the default flexible IP number of SpiderSubnet feature auto-created IPPools | `1` | +| `ipam.gc.enabled` | enable retrieve IP in spiderippool CR | `true` | +| `ipam.gc.gcAll.intervalInSecond` | the gc all interval duration | `600` | +| `ipam.gc.enableGcStatelesTerminatingPodOnReadyNode` | enable reclaim IP for the stateless pod that times out of deleting graceful period with its node ready | `true` | +| `ipam.gc.enableGcStatelesTerminatingPodOnNotReadyNode` | enable reclaim IP for the stateless pod who that out of deleting graceful period with its node not ready | `true` | +| `ipam.gc.gcDeletingTimeOutPodDelay` | the gc delay seconds after the pod times out of deleting graceful period | `0` | ### grafanaDashboard parameters diff --git a/charts/spiderpool/templates/deployment.yaml b/charts/spiderpool/templates/deployment.yaml index 777ec9a3a0..68650c6354 100644 --- a/charts/spiderpool/templates/deployment.yaml +++ b/charts/spiderpool/templates/deployment.yaml @@ -161,10 +161,12 @@ spec: value: {{ .Values.spiderpoolController.httpPort | quote }} - name: SPIDERPOOL_GC_IP_ENABLED value: {{ .Values.ipam.gc.enabled | quote }} - - name: SPIDERPOOL_GC_TERMINATING_POD_IP_ENABLED - value: {{ .Values.ipam.gc.GcDeletingTimeOutPod.enabled | quote }} + - name: SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_READY_NODE_ENABLED + value: {{ .Values.ipam.gc.enableGcStatelesTerminatingPodOnReadyNode | quote }} + - name: SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED + value: {{ .Values.ipam.gc.enableGcStatelesTerminatingPodOnNotReadyNode | quote }} - name: SPIDERPOOL_GC_ADDITIONAL_GRACE_DELAY - value: {{ .Values.ipam.gc.GcDeletingTimeOutPod.delay | quote }} + value: {{ .Values.ipam.gc.gcDeletingTimeOutPodDelay | quote }} - name: SPIDERPOOL_GC_DEFAULT_INTERVAL_DURATION value: {{ .Values.ipam.gc.gcAll.intervalInSecond | quote }} - name: SPIDERPOOL_MULTUS_CONFIG_ENABLED diff --git a/charts/spiderpool/values.yaml b/charts/spiderpool/values.yaml index 142c9d0672..82104f5abd 100644 --- a/charts/spiderpool/values.yaml +++ b/charts/spiderpool/values.yaml @@ -67,12 +67,14 @@ ipam: ## @param ipam.gc.gcAll.intervalInSecond the gc all interval duration intervalInSecond: 600 - GcDeletingTimeOutPod: - ## @param ipam.gc.GcDeletingTimeOutPod.enabled enable retrieve IP for the pod who times out of deleting graceful period - enabled: true + ## @param ipam.gc.enableGcStatelesTerminatingPodOnReadyNode enable reclaim IP for the stateless pod that times out of deleting graceful period with its node ready + enableGcStatelesTerminatingPodOnReadyNode: true - ## @param ipam.gc.GcDeletingTimeOutPod.delay the gc delay seconds after the pod times out of deleting graceful period - delay: 0 + ## @param ipam.gc.enableGcStatelesTerminatingPodOnNotReadyNode enable reclaim IP for the stateless pod who that out of deleting graceful period with its node not ready + enableGcStatelesTerminatingPodOnNotReadyNode: true + + ## @param ipam.gc.gcDeletingTimeOutPodDelay the gc delay seconds after the pod times out of deleting graceful period + gcDeletingTimeOutPodDelay: 0 ## @section grafanaDashboard parameters ## diff --git a/cmd/spiderpool-controller/cmd/config.go b/cmd/spiderpool-controller/cmd/config.go index b48c91fbc1..cd8f302c13 100644 --- a/cmd/spiderpool-controller/cmd/config.go +++ b/cmd/spiderpool-controller/cmd/config.go @@ -64,7 +64,8 @@ var envInfo = []envConf{ {"SPIDERPOOL_PYROSCOPE_PUSH_SERVER_ADDRESS", "", false, &controllerContext.Cfg.PyroscopeAddress, nil, nil}, {"SPIDERPOOL_GC_IP_ENABLED", "true", true, nil, &gcIPConfig.EnableGCIP, nil}, - {"SPIDERPOOL_GC_TERMINATING_POD_IP_ENABLED", "true", true, nil, &gcIPConfig.EnableGCForTerminatingPod, nil}, + {"SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_READY_NODE_ENABLED", "true", true, nil, &gcIPConfig.EnableGCStatelessTerminatingPodOnReadyNode, nil}, + {"SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED", "true", true, nil, &gcIPConfig.EnableGCStatelessTerminatingPodOnNotReadyNode, nil}, {"SPIDERPOOL_GC_IP_WORKER_NUM", "3", true, nil, nil, &gcIPConfig.ReleaseIPWorkerNum}, {"SPIDERPOOL_GC_CHANNEL_BUFFER", "5000", true, nil, nil, &gcIPConfig.GCIPChannelBuffer}, {"SPIDERPOOL_GC_MAX_PODENTRY_DB_CAP", "100000", true, nil, nil, &gcIPConfig.MaxPodEntryDatabaseCap}, diff --git a/cmd/spiderpool-controller/cmd/daemon.go b/cmd/spiderpool-controller/cmd/daemon.go index 90d4bef2dc..38393b85e2 100644 --- a/cmd/spiderpool-controller/cmd/daemon.go +++ b/cmd/spiderpool-controller/cmd/daemon.go @@ -380,6 +380,7 @@ func initGCManager(ctx context.Context) { controllerContext.PodManager, controllerContext.StsManager, controllerContext.KubevirtManager, + controllerContext.NodeManager, controllerContext.Leader, ) if nil != err { diff --git a/docs/concepts/coordinator-zh_CN.md b/docs/concepts/coordinator-zh_CN.md index a9ebe5f3fa..27c9c2d66d 100644 --- a/docs/concepts/coordinator-zh_CN.md +++ b/docs/concepts/coordinator-zh_CN.md @@ -24,24 +24,24 @@ Spiderpool 内置一个叫 `coordinator` 的 CNI meta-plugin, 它在 Main CNI ## CNI 配置字段说明 -| Field | Description | Schema | Validation | Default | -|-----------|---------------------------------------------------|--------|------------|---------| -| type | CNI 的类型 | 字符串 | required |coordinator | -| mode | coordinator 运行的模式. "auto": coordinator 自动判断运行在 Underlay 或者 Overlay; "underlay": 为 Pod 创建一对 Veth 设备,用于转发集群东西向流量。由 Pod 的 Underlay 网卡转发南北向流量; "overlay": 不额外创建 veth 设备,运行在多网卡模式。由 overlay 类型的 CNI(calico,cilium) 转发集群东西向流量,由 underlay 网卡转发南北向流量; "disable": 禁用 coordinator | 字符串 | optional | auto | -| tunePodRoutes | Pod 多网卡模式下,是否调协 Pod 的路由,解决访问来回路径不一致的问题 | 布尔型 | optional | true | -| podDefaultRouteNic | Pod 多网卡时,配置 Pod 的默认路由网卡。默认为 "", 其 value 实际为 Pod 第一张拥有默认路由的网卡| 字符串 | optional | "" | -| podDefaultCniNic | K8s 中 Pod 默认的第一张网卡 | 布尔型 | optional | eth0 | -| detectGateway | 创建 Pod 时是否检查网关是否可达 | 布尔型 | optional | false | -| detectIPConflict | 创建 Pod 时是否检查 Pod 的 IP 是否可达 | 布尔型 | optional | false | -| podMACPrefix | 是否固定 Pod 的 Mac 地址前缀 | 字符串 | optional | "" | -| overlayPodCIDR | 默认的集群 Pod 的子网,会注入到 Pod 中。不需要配置,自动从 Spidercoordinator default 中获取 | []stirng | optional | 默认从 Spidercoordinator default 中获取 | -| serviceCIDR | 默认的集群 Service 子网, 会注入到 Pod 中。不需要配置,自动从 Spidercoordinator default 中获取 | []stirng | optional | 默认从 Spidercoordinator default 中获取 | -| hijackCIDR | 额外的需要从主机转发的子网路由。比如nodelocaldns 的地址: 169.254.20.10/32 | []stirng | optional | 空 | -| hostRuleTable | 策略路由表号,同主机与 Pod 通信的路由将会存放于这个表号 | 整数型 | optional | 500 | -| hostRPFilter | 设置主机上的 sysctl 参数 rp_filter | 整数型 | optional | 0 | -| txQueueLen | 设置 Pod 的网卡传输队列 | 整数型 | optional | 0 | -| detectOptions | 检测地址冲突和网关可达性的高级配置项: 包括重试次数(默认为 3 次), 探测间隔(默认为 1s) 和 超时时间(默认为 1s) | 对象类型 | optional | 空 | -| logOptions | 日志配置,包括 logLevel(默认为 debug) 和 logFile(默认为 /var/log/spidernet/coordinator.log) | 对象类型 | optional | - | +| Field | Description | Schema | Validation | Default | +|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|------------|-----------------------------------| +| type | CNI 的类型 | 字符串 | required | coordinator | +| mode | coordinator 运行的模式. "auto": coordinator 自动判断运行在 Underlay 或者 Overlay; "underlay": 为 Pod 创建一对 Veth 设备,用于转发集群东西向流量。由 Pod 的 Underlay 网卡转发南北向流量; "overlay": 不额外创建 veth 设备,运行在多网卡模式。由 overlay 类型的 CNI(calico,cilium) 转发集群东西向流量,由 underlay 网卡转发南北向流量; "disable": 禁用 coordinator | 字符串 | optional | auto | +| tunePodRoutes | Pod 多网卡模式下,是否调协 Pod 的路由,解决访问来回路径不一致的问题 | 布尔型 | optional | true | +| podDefaultRouteNic | Pod 多网卡时,配置 Pod 的默认路由网卡。默认为 "", 其 value 实际为 Pod 第一张拥有默认路由的网卡 | 字符串 | optional | "" | +| podDefaultCniNic | K8s 中 Pod 默认的第一张网卡 | 布尔型 | optional | eth0 | +| detectGateway | 创建 Pod 时是否检查网关是否可达 | 布尔型 | optional | false | +| detectIPConflict | 创建 Pod 时是否检查 Pod 的 IP 是否冲突 | 布尔型 | optional | false | +| podMACPrefix | 是否固定 Pod 的 Mac 地址前缀 | 字符串 | optional | "" | +| overlayPodCIDR | 默认的集群 Pod 的子网,会注入到 Pod 中。不需要配置,自动从 Spidercoordinator default 中获取 | []stirng | optional | 默认从 Spidercoordinator default 中获取 | +| serviceCIDR | 默认的集群 Service 子网, 会注入到 Pod 中。不需要配置,自动从 Spidercoordinator default 中获取 | []stirng | optional | 默认从 Spidercoordinator default 中获取 | +| hijackCIDR | 额外的需要从主机转发的子网路由。比如nodelocaldns 的地址: 169.254.20.10/32 | []stirng | optional | 空 | +| hostRuleTable | 策略路由表号,同主机与 Pod 通信的路由将会存放于这个表号 | 整数型 | optional | 500 | +| hostRPFilter | 设置主机上的 sysctl 参数 rp_filter | 整数型 | optional | 0 | +| txQueueLen | 设置 Pod 的网卡传输队列 | 整数型 | optional | 0 | +| detectOptions | 检测地址冲突和网关可达性的高级配置项: 包括重试次数(默认为 3 次), 探测间隔(默认为 1s) 和 超时时间(默认为 1s) | 对象类型 | optional | 空 | +| logOptions | 日志配置,包括 logLevel(默认为 debug) 和 logFile(默认为 /var/log/spidernet/coordinator.log) | 对象类型 | optional | - | > 如果您通过 `SpinderMultusConfig CR` 帮助创建 NetworkAttachmentDefinition CR,您可以在 `SpinderMultusConfig` 中配置 `coordinator` (所有字段)。参考: [SpinderMultusConfig](../reference/crd-spidermultusconfig.md)。 > @@ -75,6 +75,8 @@ spec: detectIPConflict: true # Enable detectIPConflict ``` +> 若 IP 冲突检查发现某 IP 已被占用,请检查是否被集群中其他处于 `Terminating` 阶段的 **无状态** Pod 所占用,并配合 [IP 回收机制](./ipam-des-zh_CN.md#ip-回收机制) 相关参数进行配置。 + ## 支持检测 Pod 的网关是否可达(alpha) 在 Underlay 网络下,Pod 访问外部需要通过网关转发。如果网关不可达,那么在外界看来,这个 Pod 实际是失联的。有时候我们希望创建 Pod 时,其网关是可达的。 我们可借助 `coordinator` 检测 Pod 的网关是否可达, diff --git a/docs/concepts/coordinator.md b/docs/concepts/coordinator.md index 85965a8d79..91ef067dd5 100644 --- a/docs/concepts/coordinator.md +++ b/docs/concepts/coordinator.md @@ -25,24 +25,24 @@ Let's delve into how coordinator implements these features. ## CNI fields description -| Field | Description | Schema | Validation | Default | -|-----------|---------------------------------------------------|--------|------------|---------| -| type | The name of this Spidercoordinators resource | string | required |coordinator | -| mode | the mode in which the coordinator run. "auto": Automatically determine if it's overlay or underlay; "underlay": All NICs for pods are underlay NICs, and in this case the coordinator will create veth-pairs device to solve the problem of underlay pods accessing services; "overlay": The coordinator does not create veth-pair devices, but the first NIC of the pod cannot be an underlay NIC, which is created by overlay CNI (e.g. calico, cilium). Solve the problem of pod access to service through the first NIC; "disable": The coordinator does nothing and exits directly | string | optional | auto | -| tunePodRoutes | Tune the pod's routing tables while a pod is in multi-NIC mode | bool | optional | true | -| podDefaultRouteNic | Configure the default routed NIC for the pod while a pod is in multi-NIC mode, The default value is 0, indicate that the first network interface of the pod has the default route. | string | optional | "" | -| podDefaultCniNic | The name of the pod's first NIC defaults to eth0 in kubernetes | bool | optional | eth0 | -| detectGateway | Enable gateway detection while creating pods, which prevent pod creation if the gateway is unreachable | bool | optional | false | -| detectIPConflict | Enable IP conflicting checking for pods, which prevent pod creation if the pod's ip is conflicting | bool | optional | false | -| podMACPrefix | Enable fixing MAC address prefixes for pods. empty value is mean to disable | string | optional | "" | -| overlayPodCIDR | The default cluster CIDR for the cluster. It doesn't need to be configured, and it collected automatically by SpiderCoordinator | []stirng | optional | []string{} | -| serviceCIDR | The default service CIDR for the cluster. It doesn't need to be configured, and it collected automatically by SpiderCoordinator | []stirng | optional | []string{} | -| hijackCIDR | The CIDR that need to be forwarded via the host network, For example, the address of nodelocaldns(169.254.20.10/32 by default) | []stirng | optional | []string{} | -| hostRuleTable | The routes on the host that communicates with the pod's underlay IPs will belong to this routing table number | int | optional | 500 | -| hostRPFilter | Set the rp_filter sysctl parameter on the host, which is recommended to be set to 0 | int | optional | 0 | -| txQueueLen | set txqueuelen(Transmit Queue Length) of the pod's interface | int | optional | 0 | -| detectOptions | The advanced configuration of detectGateway and detectIPConflict, including retry numbers(default is 3), interval(default is 1s) and timeout(default is 1s) | obejct | optional | nil | -| logOptions | The configuration of logging, including logLevel(default is debug) and logFile(default is /var/log/spidernet/coordinator.log) | obejct | optional | nil | +| Field | Description | Schema | Validation | Default | +|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|------------|-------------| +| type | The name of this Spidercoordinators resource | string | required | coordinator | +| mode | the mode in which the coordinator run. "auto": Automatically determine if it's overlay or underlay; "underlay": All NICs for pods are underlay NICs, and in this case the coordinator will create veth-pairs device to solve the problem of underlay pods accessing services; "overlay": The coordinator does not create veth-pair devices, but the first NIC of the pod cannot be an underlay NIC, which is created by overlay CNI (e.g. calico, cilium). Solve the problem of pod access to service through the first NIC; "disable": The coordinator does nothing and exits directly | string | optional | auto | +| tunePodRoutes | Tune the pod's routing tables while a pod is in multi-NIC mode | bool | optional | true | +| podDefaultRouteNic | Configure the default routed NIC for the pod while a pod is in multi-NIC mode, The default value is 0, indicate that the first network interface of the pod has the default route. | string | optional | "" | +| podDefaultCniNic | The name of the pod's first NIC defaults to eth0 in kubernetes | bool | optional | eth0 | +| detectGateway | Enable gateway detection while creating pods, which prevent pod creation if the gateway is unreachable | bool | optional | false | +| detectIPConflict | Enable IP conflicting checking for pods, which prevent pod creation if the pod's ip is conflicting | bool | optional | false | +| podMACPrefix | Enable fixing MAC address prefixes for pods. empty value is mean to disable | string | optional | "" | +| overlayPodCIDR | The default cluster CIDR for the cluster. It doesn't need to be configured, and it collected automatically by SpiderCoordinator | []stirng | optional | []string{} | +| serviceCIDR | The default service CIDR for the cluster. It doesn't need to be configured, and it collected automatically by SpiderCoordinator | []stirng | optional | []string{} | +| hijackCIDR | The CIDR that need to be forwarded via the host network, For example, the address of nodelocaldns(169.254.20.10/32 by default) | []stirng | optional | []string{} | +| hostRuleTable | The routes on the host that communicates with the pod's underlay IPs will belong to this routing table number | int | optional | 500 | +| hostRPFilter | Set the rp_filter sysctl parameter on the host, which is recommended to be set to 0 | int | optional | 0 | +| txQueueLen | set txqueuelen(Transmit Queue Length) of the pod's interface | int | optional | 0 | +| detectOptions | The advanced configuration of detectGateway and detectIPConflict, including retry numbers(default is 3), interval(default is 1s) and timeout(default is 1s) | obejct | optional | nil | +| logOptions | The configuration of logging, including logLevel(default is debug) and logFile(default is /var/log/spidernet/coordinator.log) | obejct | optional | nil | > You can configure `coordinator` by specifying all the relevant fields in `SpinderMultusConfig` if a NetworkAttachmentDefinition CR is created via `SpinderMultusConfig CR`. For more information, please refer to [SpinderMultusConfig](../reference/crd-spidermultusconfig.md). > @@ -73,6 +73,8 @@ spec: detectIPConflict: true # Enable detectIPConflict ``` +> If the IP address conflict check indicates that an IP address is occupied, please check it whether is occupied by another **stateless** Pod in `Terminating` phase in the cluster, please refer to [IP garbage collection](./ipam-des.md#ip-garbage-collection). + ## Detect Pod gateway reachability(alpha) Under the underlay network, pod access to the outside needs to be forwarded through the gateway. If the gateway is unreachable, then the pod is actually lost. Sometimes we want to create a pod with a gateway reachable. We can use the 'coordinator' to check if the pod's gateway is reachable. diff --git a/docs/concepts/ipam-des-zh_CN.md b/docs/concepts/ipam-des-zh_CN.md index 27fc7553b1..8d7bb41540 100644 --- a/docs/concepts/ipam-des-zh_CN.md +++ b/docs/concepts/ipam-des-zh_CN.md @@ -181,7 +181,8 @@ NOTE: 在集群中 `delete Pod` 时,但由于`网络异常`或 `cni 二进制 crash` 等问题,导致调用 `cni delete` 失败,从而导致 IP 地址无法被 cni 回收。 - 在 `cni delete 失败` 等故障场景,如果一个曾经分配了 IP 的 Pod 被销毁后,但在 IPAM 中还记录分配着IP 地址,形成了僵尸 IP 的现象。Spiderpool 针对这种问题,会基于周期和事件扫描机制,自动回收这些僵尸 IP 地址。 +- 因其他意外导致 **无状态** Pod 一直处于 `Terminating` 阶段,Spiderpool 将在 Pod 的 `spec.terminationGracePeriodSecond` 后,自动释放其 IP 地址。该功能可通过环境变量 `SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_READY_NODE_ENABLED` 来控制。该能力能够用以解决 `节点正常但 Pod 删除失败` 的故障场景。 -节点意外宕机后,集群中的 Pod 永久处于 `deleting` 状态,Pod 占用的 IP 地址无法被释放。 +节点意外宕机后,集群中的 Pod 永久处于 `Terminating` 阶段,Pod 占用的 IP 地址无法被释放。 -- 对处于 `Terminating` 状态的 Pod,Spiderpool 将在 Pod 的 `spec.terminationGracePeriodSecond` 后,自动释放其 IP 地址。该功能可通过环境变量 `SPIDERPOOL_GC_TERMINATING_POD_IP_ENABLED` 来控制。该能力能够用以解决 `节点意外宕机` 的故障场景。 +- 对处于 `Terminating` 阶段的 **无状态** Pod,Spiderpool 将在 Pod 的 `spec.terminationGracePeriodSecond` 后,自动释放其 IP 地址。该功能可通过环境变量 `SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED` 来控制。该能力能够用以解决 `节点意外宕机` 的故障场景。 diff --git a/docs/concepts/ipam-des.md b/docs/concepts/ipam-des.md index 0f8b6f1f8e..3a796fb3df 100644 --- a/docs/concepts/ipam-des.md +++ b/docs/concepts/ipam-des.md @@ -224,7 +224,8 @@ The IP addresses assigned to Pods are recorded in IPAM, but these Pods no longer When `deleting Pod` in the cluster, but due to problems such as `network exception` or `cni binary crash`, the call to `cni delete` fails, resulting in the IP address not being reclaimed by cni. - In failure scenarios such as `cni delete failure`, if a Pod that has been assigned an IP is destroyed, but the IP address is still recorded in the IPAM, a phenomenon of zombie IP is formed. For this kind of problem, Spiderpool will automatically recycle these zombie IP addresses based on the cycle and event scanning mechanism. +- In some accidents, the **stateless** Pod is in a constant `Terminating` phase, Spiderpool will automatically release its IP address after the Pod's `spec.terminationGracePeriodSecond`. This feature can be controlled by the environment variable `SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_READY_NODE_ENABLED`. This capability can be used to solve the failure scenario of `unexpected Pod downtime with Node ready`. -After a node goes down unexpectedly, the Pod in the cluster is permanently in the `deleting` state, and the IP address occupied by the Pod cannot be released. +After a node goes down unexpectedly, the Pod in the cluster is permanently in the `Terminating` phase, and the IP address occupied by the Pod cannot be released. -- For a Pod in `Terminating` state, Spiderpool will automatically release its IP address after the Pod's `spec.terminationGracePeriodSecond`. This feature can be controlled by the environment variable `SPIDERPOOL_GC_TERMINATING_POD_IP_ENABLED`. This capability can be used to solve the failure scenario of `unexpected node downtime`. +- For the **stateless** Pod in `Terminating` phase, Spiderpool will automatically release its IP address after the Pod's `spec.terminationGracePeriodSecond`. This feature can be controlled by the environment variable `SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED`. This capability can be used to solve the failure scenario of `unexpected node downtime`. diff --git a/docs/reference/spiderpool-controller.md b/docs/reference/spiderpool-controller.md index ea4c131167..01449315b5 100644 --- a/docs/reference/spiderpool-controller.md +++ b/docs/reference/spiderpool-controller.md @@ -14,23 +14,23 @@ Run the spiderpool controller daemon. ### ENV -| env | default | description | -|---------------------------------------------|---------|------------------------------------------------------------------------------------| -| SPIDERPOOL_LOG_LEVEL | info | Log level, optional values are "debug", "info", "warn", "error", "fatal", "panic". | -| SPIDERPOOL_ENABLED_METRIC | false | Enable/disable metrics. | -| SPIDERPOOL_ENABLED_DEBUG_METRIC | false | Enable spiderpool agent to collect debug level metrics. | -| SPIDERPOOL_METRIC_HTTP_PORT | false | The metrics port of spiderpool agent. | -| SPIDERPOOL_GOPS_LISTEN_PORT | 5724 | The gops port of spiderpool Controller. | -| SPIDERPOOL_WEBHOOK_PORT | 5722 | Webhook HTTP server port. | -| SPIDERPOOL_HEALTH_PORT | 5720 | The http Port for spiderpoolController, for health checking and http service. | -| SPIDERPOOL_GC_IP_ENABLED | true | Enable/disable IP GC. | -| SPIDERPOOL_GC_TERMINATING_POD_IP_ENABLED | true | Enable/disable IP GC for Terminating pod. | -| SPIDERPOOL_GC_ADDITIONAL_GRACE_DELAY | true | The gc delay seconds after the pod times out of deleting graceful period. | -| SPIDERPOOL_GC_DEFAULT_INTERVAL_DURATION | true | The gc all interval duration. | -| SPIDERPOOL_MULTUS_CONFIG_ENABLED | true | Enable/disable SpiderMultusConfig. | -| SPIDERPOOL_CNI_CONFIG_DIR | true | The host path of the cni config directory. | -| SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME | true | The cilium's configMap, default is kube-system/cilium-config. | - +| env | default | description | +|-------------------------------------------------------------------|---------|--------------------------------------------------------------------------------------------------| +| SPIDERPOOL_LOG_LEVEL | info | Log level, optional values are "debug", "info", "warn", "error", "fatal", "panic". | +| SPIDERPOOL_ENABLED_METRIC | false | Enable/disable metrics. | +| SPIDERPOOL_ENABLED_DEBUG_METRIC | false | Enable spiderpool agent to collect debug level metrics. | +| SPIDERPOOL_METRIC_HTTP_PORT | false | The metrics port of spiderpool agent. | +| SPIDERPOOL_GOPS_LISTEN_PORT | 5724 | The gops port of spiderpool Controller. | +| SPIDERPOOL_WEBHOOK_PORT | 5722 | Webhook HTTP server port. | +| SPIDERPOOL_HEALTH_PORT | 5720 | The http Port for spiderpoolController, for health checking and http service. | +| SPIDERPOOL_GC_IP_ENABLED | true | Enable/disable IP GC. | +| SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_READY_NODE_ENABLED | true | Enable/disable IP GC for stateless Terminating pod when the pod corresponding node is ready. | +| SPIDERPOOL_GC_STATELESS_TERMINATING_POD_ON_NOT_READY_NODE_ENABLED | true | Enable/disable IP GC for stateless Terminating pod when the pod corresponding node is not ready. | +| SPIDERPOOL_GC_ADDITIONAL_GRACE_DELAY | true | The gc delay seconds after the pod times out of deleting graceful period. | +| SPIDERPOOL_GC_DEFAULT_INTERVAL_DURATION | true | The gc all interval duration. | +| SPIDERPOOL_MULTUS_CONFIG_ENABLED | true | Enable/disable SpiderMultusConfig. | +| SPIDERPOOL_CNI_CONFIG_DIR | true | The host path of the cni config directory. | +| SPIDERPOOL_CILIUM_CONFIGMAP_NAMESPACE_NAME | true | The cilium's configMap, default is kube-system/cilium-config. | ## spiderpool-controller shutdown diff --git a/pkg/gcmanager/gc_manager.go b/pkg/gcmanager/gc_manager.go index d896638337..e38ac555d9 100644 --- a/pkg/gcmanager/gc_manager.go +++ b/pkg/gcmanager/gc_manager.go @@ -17,16 +17,18 @@ import ( "github.com/spidernet-io/spiderpool/pkg/kubevirtmanager" "github.com/spidernet-io/spiderpool/pkg/limiter" "github.com/spidernet-io/spiderpool/pkg/logutils" + "github.com/spidernet-io/spiderpool/pkg/nodemanager" "github.com/spidernet-io/spiderpool/pkg/podmanager" "github.com/spidernet-io/spiderpool/pkg/statefulsetmanager" "github.com/spidernet-io/spiderpool/pkg/workloadendpointmanager" ) type GarbageCollectionConfig struct { - EnableGCIP bool - EnableGCForTerminatingPod bool - EnableStatefulSet bool - EnableKubevirtStaticIP bool + EnableGCIP bool + EnableGCStatelessTerminatingPodOnReadyNode bool + EnableGCStatelessTerminatingPodOnNotReadyNode bool + EnableStatefulSet bool + EnableKubevirtStaticIP bool ReleaseIPWorkerNum int GCIPChannelBuffer int @@ -69,6 +71,7 @@ type SpiderGC struct { podMgr podmanager.PodManager stsMgr statefulsetmanager.StatefulSetManager kubevirtMgr kubevirtmanager.KubevirtManager + nodeMgr nodemanager.NodeManager leader election.SpiderLeaseElector informerFactory informers.SharedInformerFactory @@ -81,6 +84,7 @@ func NewGCManager(clientSet *kubernetes.Clientset, config *GarbageCollectionConf podManager podmanager.PodManager, stsManager statefulsetmanager.StatefulSetManager, kubevirtMgr kubevirtmanager.KubevirtManager, + nodeMgr nodemanager.NodeManager, spiderControllerLeader election.SpiderLeaseElector) (GCManager, error) { if clientSet == nil { return nil, fmt.Errorf("k8s ClientSet must be specified") @@ -121,6 +125,7 @@ func NewGCManager(clientSet *kubernetes.Clientset, config *GarbageCollectionConf podMgr: podManager, stsMgr: stsManager, kubevirtMgr: kubevirtMgr, + nodeMgr: nodeMgr, leader: spiderControllerLeader, gcLimiter: limiter.NewLimiter(limiter.LimiterConfig{}), diff --git a/pkg/gcmanager/pod_cache.go b/pkg/gcmanager/pod_cache.go index 88a60b3684..34705bc9ff 100644 --- a/pkg/gcmanager/pod_cache.go +++ b/pkg/gcmanager/pod_cache.go @@ -17,6 +17,7 @@ import ( "github.com/spidernet-io/spiderpool/pkg/constant" "github.com/spidernet-io/spiderpool/pkg/lock" "github.com/spidernet-io/spiderpool/pkg/logutils" + "github.com/spidernet-io/spiderpool/pkg/nodemanager" "github.com/spidernet-io/spiderpool/pkg/types" ) @@ -224,9 +225,19 @@ func (s *SpiderGC) buildPodEntry(oldPod, currentPod *corev1.Pod, deleted bool) ( } if isBuildTerminatingPodEntry { - // disable for gc terminating pod - if !s.gcConfig.EnableGCForTerminatingPod { - logger.Sugar().Debugf("IP gc already turn off 'EnableGCForTerminatingPod' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name) + // check terminating Pod corresponding Node status + node, err := s.nodeMgr.GetNodeByName(ctx, currentPod.Spec.NodeName, constant.UseCache) + if nil != err { + return nil, fmt.Errorf("failed to get terminating Pod '%s/%s' corredponing Node '%s', error: %v", currentPod.Namespace, currentPod.Name, currentPod.Spec.NodeName, err) + } + // disable for gc terminating pod with Node Ready + if nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnReadyNode { + logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeReady' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name) + return nil, nil + } + // disable for gc terminating pod with Node NotReady + if !nodemanager.IsNodeReady(node) && !s.gcConfig.EnableGCStatelessTerminatingPodOnNotReadyNode { + logger.Sugar().Debugf("IP GC already turn off 'EnableGCForTerminatingPodWithNodeNotReady' configuration, disacrd tracing pod '%s/%s'", currentPod.Namespace, currentPod.Name) return nil, nil } diff --git a/pkg/nodemanager/utils.go b/pkg/nodemanager/utils.go new file mode 100644 index 0000000000..e403fce9a8 --- /dev/null +++ b/pkg/nodemanager/utils.go @@ -0,0 +1,18 @@ +// Copyright 2023 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 + +package nodemanager + +import corev1 "k8s.io/api/core/v1" + +func IsNodeReady(node *corev1.Node) bool { + var readyCondition corev1.NodeCondition + for _, tmpCondition := range node.Status.Conditions { + if tmpCondition.Type == corev1.NodeReady { + readyCondition = tmpCondition + break + } + } + + return readyCondition.Status == corev1.ConditionTrue +} diff --git a/pkg/nodemanager/utils_test.go b/pkg/nodemanager/utils_test.go new file mode 100644 index 0000000000..03bc1bad13 --- /dev/null +++ b/pkg/nodemanager/utils_test.go @@ -0,0 +1,48 @@ +// Copyright 2023 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 + +package nodemanager + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = Describe("NodeManager utils", Label("node_manager_utils_test"), func() { + Describe("IsNodeReady", func() { + var node *corev1.Node + BeforeEach(func() { + node = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "master", + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeMemoryPressure, + Status: corev1.ConditionFalse, + }, + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + }) + + It("Node is ready", func() { + isNodeReady := IsNodeReady(node) + Expect(isNodeReady).To(BeTrue()) + }) + + It("Node is not ready", func() { + node.Status.Conditions[1].Status = corev1.ConditionUnknown + isNodeReady := IsNodeReady(node) + Expect(isNodeReady).To(BeFalse()) + }) + }) +}) diff --git a/test/e2e/ippoolcr/ippoolcr_test.go b/test/e2e/ippoolcr/ippoolcr_test.go index 634cad18fd..4de306abb7 100644 --- a/test/e2e/ippoolcr/ippoolcr_test.go +++ b/test/e2e/ippoolcr/ippoolcr_test.go @@ -999,7 +999,7 @@ var _ = Describe("test ippool CR", Label("ippoolCR"), func() { return fmt.Errorf("IPPool %s is not controlled by subnet, wait for Subnet's reconcile to take in", poolName) } return nil - }).WithTimeout(time.Minute * 3).WithPolling(time.Second * 5).Should(BeNil()) + }).WithTimeout(time.Minute * 5).WithPolling(time.Second * 5).Should(BeNil()) GinkgoWriter.Println("check whether the IPPool inherits the Subnet properties") Expect(demoSpiderIPPool.Spec.Gateway).To(Equal(demoSpiderSubnet.Spec.Gateway)) @@ -1011,7 +1011,7 @@ var _ = Describe("test ippool CR", Label("ippoolCR"), func() { }() } - // for IPv4, create Subnet first and create IPPool later + // for IPv6, create Subnet first and create IPPool later if frame.Info.IpV6Enabled { wg.Add(1) go func() { @@ -1073,7 +1073,7 @@ var _ = Describe("test ippool CR", Label("ippoolCR"), func() { return fmt.Errorf("IPPool %s is not controlled by subnet, wait for Subnet's reconcile to take in", poolName) } return nil - }).WithTimeout(time.Minute * 3).WithPolling(time.Second * 5).Should(BeNil()) + }).WithTimeout(time.Minute * 5).WithPolling(time.Second * 5).Should(BeNil()) GinkgoWriter.Println("check whether the IPPool inherits the Subnet properties") Expect(demoSpiderIPPool.Spec.Gateway).To(Equal(demoSpiderSubnet.Spec.Gateway))