# Kubernetes常见问题
# 集群搭建类
# 1. 使用kubectl get node命令发现status是notReady状态
多等待一会,等node初始化完成
# 2. 使用kubectl get cs命令发现组件是不健康的状态
案例
- 在使用这个命令的时候,我发现了之前搭建的集群的controller-manager组件和scheduler是不健康的状态,信息如下
[root@liuxiaolu-master ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0 Healthy {"health":"true"}
- 将controller-manager和scheduler配置文件中的–port=0这行删掉。
[root@liuxiaolu-master ~]# cat /etc/kubernetes/manifests/kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-controller-manager
tier: control-plane
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
......
- --node-cidr-mask-size=24
#### 这一行 ↓↓↓↓here↓↓↓↓
- --port=0
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
......
[root@liuxiaolu-master ~]# cat /etc/kubernetes/manifests/kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
......
- --leader-elect=true
#### 这一行 ↓↓↓↓here↓↓↓↓
- --port=0
......
- 重启kubectl
systemctl daemon-reload && systemctl restart kubelet
- 再次查询各组件健康状态,就可以看到各组件已经处于健康状态了
[root@liuxiaolu-master ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy {"health":"true"}
# 3. Calico初始化不成功
# 3.1 镜像拉取失败
[root@k8s-master ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-6fd7b9848d-x2xqk 0/1 ContainerCreating 0 29m
calico-node-2s2cx 0/1 ErrImagePull 0 11m
calico-node-h25hc 0/1 Init:0/3 0 29m
calico-node-hcj9k 0/1 ImagePullBackOff 0 29m
coredns-545d6fc579-4z7ll 0/1 ContainerCreating 0 39m
coredns-545d6fc579-v4h6l 0/1 ContainerCreating 0 39m
etcd-k8s-master 1/1 Running 0 40m
kube-apiserver-k8s-master 1/1 Running 0 40m
kube-controller-manager-k8s-master 1/1 Running 0 26m
kube-proxy-dwc9p 1/1 Running 0 39m
kube-proxy-pblfd 1/1 Running 0 31m
kube-proxy-spl6v 1/1 Running 0 35m
kube-scheduler-k8s-master 1/1 Running 0 26m
- 使用kubectl describe pod calico-node-hcj9k -n kube-system查看是哪个镜像下载不成功
- 在各node节点都手动下载指定镜像即可
# 4. core dns 提示镜像下载失败
修改deploy coredns,将镜像registry.aliyuncs.com/google_containers/coredns/coredns:v1.8.0修改为registry.aliyuncs.com/google_containers/coredns:1.7.0
kubectl -n kube-system edit deploy/coredns
执行这个命令后,就会在image栏发现我们要修改的镜像名称,改成registry.aliyuncs.com/google_containers/coredns:1.7.0即可,退出后自动生效。
....
spec:
containers:
- args:
- -conf
- /etc/coredns/Corefile
image: registry.aliyuncs.com/google_containers/coredns:1.7.0
....