准备用于 pdsh 的节点列表
cat << 'EOF' > server
mn[01-03]
EOF
cat << 'EOF' > agent
gn001
EOF
# 安装 haproxy 和 keepalived
pdsh -w ^server apt install -y haproxy keepalived
# 准备 haproxy 配置文件
cat << 'EOF' > haproxy.cfg
frontend k3s-frontend
bind *:7443
mode tcp
option tcplog
default_backend k3s-backend
backend k3s-backend
mode tcp
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s
server mn01 10.128.0.1:6443 check
server mn02 10.128.0.2:6443 check
server mn03 10.128.0.3:6443 check
EOF
pdcp -w ^server haproxy.cfg /etc/haproxy/haproxy.cfg
pdsh -w ^server systemctl restart haproxy
# 准备 keepalived 配置文件
# * virtual_router_id 请**随机从 0-255 之间选择一个值**,如果相同网络环境有其它用户也启动 keepalived, 需要避免此值相同, 否则会导致冲突
# * 10.128.0.10 为 vip (**子网掩码值必须和物理网络一致**, 否则可能无法访问),可以使用局域网中空闲的 IP,如果使用数据中心则需要联系管理员获取
cat << 'EOF' > keepalived.conf
vrrp_script chk_haproxy {
script 'killall -0 haproxy' # faster than pidof
interval 2
}
vrrp_instance haproxy-vip {
interface enp3s0 # change it
state MASTER # MASTER on mn01, BACKUP on mn02 and mn03
priority 100 # 100 on mn01, 90 on mn02, 80 on mn03
virtual_router_id 52
virtual_ipaddress {
10.128.0.10/16
}
track_script {
chk_haproxy
}
}
EOF
# 拷贝配置文件到所有 server 节点
pdcp -w ^server keepalived.conf /etc/keepalived/keepalived.conf
# 修改 mn02 和 mn03 节点为 BACKUP
pdsh -w mn02,mn03 sed -i 's/MASTER/BACKUP/g' /etc/keepalived/keepalived.conf
pdsh -w mn02 sed -i 's/100/90/g' /etc/keepalived/keepalived.conf
pdsh -w mn03 sed -i 's/100/80/g' /etc/keepalived/keepalived.conf
# 重启 keepalived
pdsh -w ^server systemctl restart keepalived
生成 token, 替换下面配置文件中的
echo $(tr -dc a-z0-9 </dev/urandom | head -c 32)
zhhbdjwwite7o0wtbu1pxowqqod15bwu
zhhbdjwwite7o0wtbu1pxowqqod15bwu 为示例,请替换为自己的 token
# 准备 scheduler 配置文件, 用于调度优先填满一个 GPU 节点
cat << 'EOF' > scheduler.yaml
apiVersion: kubescheduler.config.k8s.io/v1
kind: KubeSchedulerConfiguration
clientConnection:
kubeconfig: /etc/rancher/k3s/k3s.yaml
profiles:
- pluginConfig:
- args:
scoringStrategy:
resources:
- name: cpu
weight: 1
- name: memory
weight: 1
- name: nvidia.com/gpu
weight: 3
type: MostAllocated
name: NodeResourcesFit
EOF
# 准备 server 配置文件
# * token 请替换为上面生成的 token
# * cluster-cidr 和 service-cidr 设置 Pod 和 Service 的 IP 地址范围, 需要询问用户是否存在地址段冲突问题
# * tls-san 设置需要签名的 IP 或者域名,通常设置为 vip 和需要通过外网连接 k3s 的 IP 地址, 如果不设置则 kubeconfig 中需要设置跳过安全检查
# * disable 关闭 k3s 缺省部署的服务,后续步骤部署 `nginx` 和 `metallb` 作为替代
cat << 'EOF' > server.yaml
token: zhhbdjwwite7o0wtbu1pxowqqod15bwu
cluster-cidr: 172.24.0.0/14
service-cidr: 172.29.0.0/16
tls-san:
- 10.128.0.10
disable:
- traefik
- servicelb
kubelet-arg:
- runtime-request-timeout=15m
- container-log-max-files=3
- container-log-max-size=10Mi
kube-scheduler-arg:
- authentication-tolerate-lookup-failure=false
- config=/etc/rancher/k3s/scheduler.yaml
embedded-registry: true
EOF
# 拷贝配置文件到所有 server 节点
pdsh -w ^server mkdir -p /etc/rancher/k3s
pdcp -w ^server scheduler.yaml /etc/rancher/k3s/scheduler.yaml
pdcp -w ^server server.yaml /etc/rancher/k3s/config.yaml
# 在 mn01 上初始化集群
# * 如果 k3s server 不需要支持 HA,则去掉 `--cluster-init` 即可,除 mn01 外其他所有节点使用下面 agent 方式加入集群
curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh \
| INSTALL_K3S_MIRROR=cn INSTALL_K3S_VERSION=v1.31.4+k3s1 sh -s - server \
--cluster-init
# 在剩余 mn[02-03] 节点上加入集群
curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh \
| INSTALL_K3S_MIRROR=cn INSTALL_K3S_VERSION=v1.31.4+k3s1 sh -s - server \
--server https://10.128.0.10:7443
- 对于生产环境,应当使用
INSTALL_K3S_VERSION
固定版本,版本信息可以从 channel 中查询
cat << 'EOF' > agent.yaml
token: zhhbdjwwite7o0wtbu1pxowqqod15bwu
kubelet-arg:
- runtime-request-timeout=15m
- container-log-max-files=3
- container-log-max-size=10Mi
EOF
pdsh -w ^agent mkdir -p /etc/rancher/k3s
pdcp -w ^agent agent.yaml /etc/rancher/k3s/config.yaml
# 在非 mn 节点上加入集群
curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh \
| INSTALL_K3S_MIRROR=cn INSTALL_K3S_VERSION=v1.31.4+k3s1 sh -s - agent \
--server https://10.128.0.10:7443
- 在集群中执行 kubectl 即可访问
- 在非集群的局域网网内
cp /etc/rancher/k3s/k3s.yaml ~/.kube/config sed -i 's/127.0.0.1:6443/10.128.0.10:7443/g' ~/.kube/config kubectl get node
- 在外网访问,假如能通过外网 IP 1.2.3.4 访问任意的 mn 节点 7443 端口
cp /etc/rancher/k3s/k3s.yaml ~/.kube/config sed -i 's/127.0.0.1:6443/1.2.3.4:7443/g' ~/.kube/config kubectl get node
- 如果访问的 IP 未在
--tls-san
中,则需要跳过安全检查,移除 kubeconfig 中的certificate-authority-data: xxx
并添加insecure-skip-tls-verify: true
即可
-
代理
containerd
拉取镜像 (必须配置,否则无法拉取镜像)# 设置 docker.io 镜像代理, 原因是 docker 官网有限速 cat << 'EOF' > registries.yaml mirrors: docker.io: endpoint: - "https://mirror.gcr.io" EOF pdcp -w ^all registries.yaml /etc/rancher/k3s # 设置 containerd 代理 # * 假设 10.128.0.200 为代理服务器的 IP 地址,3128 为代理服务器的端口 # * NO_PROXY 中还可以 bypass 域名,例如 `*.example.com`, 一般需要设置 harbor 搭建镜像仓库 cat << 'EOF' > k3s.service.env CONTAINERD_HTTP_PROXY=http://10.128.0.90:3128 CONTAINERD_HTTPS_PROXY=http://10.128.0.90:3128 CONTAINERD_NO_PROXY=127.0.0.0/8,10.128.0.0/8,172.16.0.0/12,192.168.0.0/16,100.64.0.0/10,*.example.com EOF # 拷贝配置文件到所有 server 和 agent 节点 cp k3s.service.env k3s-agent.service.env pdcp -w ^server k3s.service.env /etc/systemd/system pdcp -w ^agent k3s-agent.service.env /etc/systemd/system pdsh -w ^all systemctl daemon-reload pdsh -w ^server systemctl restart k3s pdsh -w ^agent systemctl restart k3s-agent
-
修复
kubectl logs
输出too many open files
错误cat << 'EOF' > 80-inotify.conf fs.inotify.max_user_instances=1280 fs.inotify.max_user_watches=655360 EOF pdcp -w ^all 80-inotify.conf /etc/sysctl.d pdsh -w ^all sysctl --system
-
修复
kubectl port-forward
输出unable to do port forwarding: socat not found
错误pdsh -w ^all apt install -y socat
-
修复在训练场景无法申请大内存的问题
pdsh -w ^server "sed -i '/LimitCORE/a LimitMEMLOCK=infinity' /etc/systemd/system/k3s.service" pdsh -w ^agent "sed -i '/LimitCORE/a LimitMEMLOCK=infinity' /etc/systemd/system/k3s-agent.service" pdsh -w ^all systemctl daemon-reload pdsh -w ^server systemctl restart k3s pdsh -w ^agent systemctl restart k3s-agent
pdsh -w ^agent k3s-agent-uninstall.sh
pdsh -w ^server k3s-uninstall.sh