# Kubernetes常见问题

# 集群搭建类

# 1. 使用kubectl get node命令发现status是notReady状态

多等待一会,等node初始化完成

# 2. 使用kubectl get cs命令发现组件是不健康的状态

案例

  1. 在使用这个命令的时候,我发现了之前搭建的集群的controller-manager组件和scheduler是不健康的状态,信息如下
[root@liuxiaolu-master ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME                 STATUS      MESSAGE                                                                                       ERROR
controller-manager   Unhealthy   Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
scheduler            Unhealthy   Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0               Healthy     {"health":"true"}
  1. 将controller-manager和scheduler配置文件中的–port=0这行删掉。
[root@liuxiaolu-master ~]# cat /etc/kubernetes/manifests/kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-controller-manager
    tier: control-plane
  name: kube-controller-manager
  namespace: kube-system
spec:
  containers:
  - command:
  ......
    - --node-cidr-mask-size=24
    #### 这一行 ↓↓↓↓here↓↓↓↓
    - --port=0
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
  ......

[root@liuxiaolu-master ~]# cat /etc/kubernetes/manifests/kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
   ......
    - --leader-elect=true
    #### 这一行 ↓↓↓↓here↓↓↓↓
    - --port=0
   ......
  1. 重启kubectl
systemctl daemon-reload && systemctl restart kubelet
  1. 再次查询各组件健康状态,就可以看到各组件已经处于健康状态了
[root@liuxiaolu-master ~]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME                 STATUS    MESSAGE             ERROR
scheduler            Healthy   ok
controller-manager   Healthy   ok
etcd-0               Healthy   {"health":"true"}

# 3. Calico初始化不成功

# 3.1 镜像拉取失败

[root@k8s-master ~]# kubectl get pod -n kube-system
NAME                                       READY   STATUS              RESTARTS   AGE
calico-kube-controllers-6fd7b9848d-x2xqk   0/1     ContainerCreating   0          29m
calico-node-2s2cx                          0/1     ErrImagePull        0          11m
calico-node-h25hc                          0/1     Init:0/3            0          29m
calico-node-hcj9k                          0/1     ImagePullBackOff    0          29m
coredns-545d6fc579-4z7ll                   0/1     ContainerCreating   0          39m
coredns-545d6fc579-v4h6l                   0/1     ContainerCreating   0          39m
etcd-k8s-master                            1/1     Running             0          40m
kube-apiserver-k8s-master                  1/1     Running             0          40m
kube-controller-manager-k8s-master         1/1     Running             0          26m
kube-proxy-dwc9p                           1/1     Running             0          39m
kube-proxy-pblfd                           1/1     Running             0          31m
kube-proxy-spl6v                           1/1     Running             0          35m
kube-scheduler-k8s-master                  1/1     Running             0          26m
  1. 使用kubectl describe pod calico-node-hcj9k -n kube-system查看是哪个镜像下载不成功
  2. 在各node节点都手动下载指定镜像即可

# 4. core dns 提示镜像下载失败

修改deploy coredns,将镜像registry.aliyuncs.com/google_containers/coredns/coredns:v1.8.0修改为registry.aliyuncs.com/google_containers/coredns:1.7.0

kubectl -n kube-system edit deploy/coredns

执行这个命令后,就会在image栏发现我们要修改的镜像名称,改成registry.aliyuncs.com/google_containers/coredns:1.7.0即可,退出后自动生效。

....
    spec:
      containers:
      - args:
        - -conf
        - /etc/coredns/Corefile
        image: registry.aliyuncs.com/google_containers/coredns:1.7.0
....
Last Updated: 3/30/2022, 5:47:50 PM