728x90
반응형

1. kube-state-metirc 설치

$ git clone https://github.com/kubernetes/kube-state-metrics.git
$ cd kube-state-metrics
$ kubectl apply -f examples/standard

2. namespace 생성

$ kubectl create ns monitoring

3. RBAC(ClusterRole) 생성

$ vi prometheus-cluster-role.yaml
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
  namespace: monitoring
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - ingresses
  verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
  namespace: monitoring
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: monitoring
---

$ kubectl apply -f prometheus-cluster-role.yaml

4. PV, PVC설정

$ vi prometheus-pv.yaml
---
apiVersion: v1
kind: PersistentVolume
metadata:
  name: prometheus-pv
  namespace: monitoring
  labels:
    type: local
    app: prometheus
spec:
  capacity:
    storage: 2Gi
  accessModes:
  - ReadWriteOnce
#연결 해제되어도 값 보존
  persistentVolumeReclaimPolicy: Retain
#storageClass가 있는경우 작성
  storageClassName: manual
  hostPath:
    path: /opt/prometheus
    type: DirectoryOrCreate
  nodeAffinity:
    required:
      nodeSelectorTerms:
      - matchExpressions:
        - key: kubernetes.io/hostname
          operator: In
          values:
# host 지정
          - kube-worker-1
---

$ vi prometheus-pvc.yaml 
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: prometheus-pvc
  namespace: monitoring
  labels:
    type: local
    app: prometheus
spec:
#storageClass가 있는경우 작성(PV와 값이 똑같아야함)
  storageClassName: manual
  accessModes:
    - ReadWriteOnce
  volumeMode: Filesystem
  resources:
    requests:
      storage: 1Gi
  selector:
    matchLabels:
      app: prometheus
      type: local
---

$ kubectl apply -f prometheus-pvc.yaml

5. Prometheus 설정값 작성

$ vim prometheus.rules
---
groups:
- name: container memory alert
  rules:
  - alert: container memory usage rate is very high( > 55%)
    expr: sum(container_memory_working_set_bytes{pod!="", name=""}) / sum (kube_node_status_allocatable_memory_bytes) * 100 > 55
    for: 1m
    labels:
      severity: fatal
    annotations:
      summary: High Memory Usage on {{ $labels.instance }}
      identifier: "{{ $labels.instance }}"
      description: "{{ $labels.job }} Memory Usage: {{ $value }}"
- name: container CPU alert
  rules:
  - alert: container CPU usage rate is very high( > 10%)
    expr: sum (rate (container_cpu_usage_seconds_total{pod!=""}[1m])) / sum (machine_cpu_cores) * 100 > 10
    for: 1m
    labels:
      severity: fatal
    annotations:
      summary: High Cpu Usage
---


$ vim prometheus.yml
#필요한 metric의 경우 scrape_configs: 아래 형식에 맞게 작성
---
global:
  scrape_interval: 10s
  evaluation_interval: 10s
rule_files:
  - /etc/prometheus/prometheus.rules
alerting:
  alertmanagers:
  - scheme: http
    static_configs:
    - targets:
      - "alertmanager.monitoring.svc:9093"
 
scrape_configs:
  - job_name: 'kubernetes-apiservers'
 
    kubernetes_sd_configs:
    - role: endpoints
    scheme: https
 
    tls_config:
      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 
    relabel_configs:
    - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
      action: keep
      regex: default;kubernetes;https
 
  - job_name: 'kubernetes-nodes'
 
    scheme: https
 
    tls_config:
      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 
    kubernetes_sd_configs:
    - role: node
 
    relabel_configs:
    - action: labelmap
      regex: __meta_kubernetes_node_label_(.+)
    - target_label: __address__
      replacement: kubernetes.default.svc:443
    - source_labels: [__meta_kubernetes_node_name]
      regex: (.+)
      target_label: __metrics_path__
      replacement: /api/v1/nodes/${1}/proxy/metrics
 
 
  - job_name: 'kubernetes-pods'
 
    kubernetes_sd_configs:
    - role: pod
 
    relabel_configs:
    - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
      action: keep
      regex: true
    - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
      action: replace
      target_label: __metrics_path__
      regex: (.+)
    - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
      action: replace
      regex: ([^:]+)(?::\d+)?;(\d+)
      replacement: $1:$2
      target_label: __address__
    - action: labelmap
      regex: __meta_kubernetes_pod_label_(.+)
    - source_labels: [__meta_kubernetes_namespace]
      action: replace
      target_label: kubernetes_namespace
    - source_labels: [__meta_kubernetes_pod_name]
      action: replace
      target_label: kubernetes_pod_name
 
  - job_name: 'kube-state-metrics'
    static_configs:
      - targets: ['kube-state-metrics.kube-system.svc:8080']
 
  - job_name: 'kubernetes-cadvisor'
 
    scheme: https
 
    tls_config:
      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 
    kubernetes_sd_configs:
    - role: node
 
    relabel_configs:
    - action: labelmap
      regex: __meta_kubernetes_node_label_(.+)
    - target_label: __address__
      replacement: kubernetes.default.svc:443
    - source_labels: [__meta_kubernetes_node_name]
      regex: (.+)
      target_label: __metrics_path__
      replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
 
  - job_name: 'kubernetes-service-endpoints'
 
    kubernetes_sd_configs:
    - role: endpoints
 
    relabel_configs:
    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
      action: keep
      regex: true
    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
      action: replace
      target_label: __scheme__
      regex: (https?)
    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
      action: replace
      target_label: __metrics_path__
      regex: (.+)
    - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
      action: replace
      target_label: __address__
      regex: ([^:]+)(?::\d+)?;(\d+)
      replacement: $1:$2
    - action: labelmap
      regex: __meta_kubernetes_service_label_(.+)
    - source_labels: [__meta_kubernetes_namespace]
      action: replace
      target_label: kubernetes_namespace
    - source_labels: [__meta_kubernetes_service_name]
      action: replace
      target_label: kubernetes_name
 ---
 
 # 두 파일을 하나로 합쳐 prometheus-config라는 이름의 configmap 생성
 
 $ kubectl create configmap prometheus-config -n monitoring --from-file=./

6. Deployment 작성 후 배포, 트러블 슈팅

vi prometheus-deployment.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus-deployment
  namespace: monitoring
  labels:
    app: prometheus-server
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus-server
  template:
    metadata:
      labels:
        app: prometheus-server
    spec:
      serviceAccountName: prometheus
      containers:
        - name: prometheus
          image: prom/prometheus
          args:
            - "--config.file=/etc/prometheus/prometheus.yml"
            - "--storage.tsdb.path=/prometheus/"
          ports:
            - containerPort: 9090
          volumeMounts:
            - name: prometheus-config-volume
              mountPath: /etc/prometheus/
            - name: prometheus-storage-volume
              mountPath: /prometheus/
      nodeSelector:
        kubernetes.io/hostname: kube-worker-1
      volumes:
        - name: prometheus-config-volume
          configMap:
            defaultMode: 420
            name: prometheus-config

        - name: prometheus-storage-volume
          persistentVolumeClaim:
            claimName: prometheus-pvc
---

$ kubectl apply -f prometheus-deployment.yaml

#PV 쪽 권한 에러가 발생하므로, PV생성한 host에(예시에서는 kube-worker-1) 다음 명령어를 입력
$ chmod 757 /opt/prometheus

7. 서비스 배포 및 node-exporter 배포

$ vim prometheus-service.yml
---
apiVersion: v1
kind: Service
metadata:
  name: prometheus-service
  namespace: monitoring
  annotations:
      prometheus.io/scrape: 'true'
      prometheus.io/port:   '9090'
 
spec:
  selector:
    app: prometheus-server
  type: NodePort
  ports:
    - port: 8080
      targetPort: 9090
      nodePort: 30003
---

$ kubectl apply -f prometheus-service.yaml

$ vim prometheus-node-exporter.yaml
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitoring
  labels:
    k8s-app: node-exporter
spec:
  selector:
    matchLabels:
      k8s-app: node-exporter
  template:
    metadata:
      labels:
        k8s-app: node-exporter
    spec:
      containers:
      - image: prom/node-exporter
        name: node-exporter
        ports:
        - containerPort: 9100
          protocol: TCP
          name: http
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: node-exporter
  name: node-exporter
  namespace: kube-system
spec:
  ports:
  - name: http
    port: 9100
    nodePort: 31672
    protocol: TCP
  type: NodePort
  selector:
    k8s-app: node-exporter
---

  $ kubectl apply -f prometheus-node-exporter.yaml

8. 확인

K8s 워커노드 ip:30003

반응형

+ Recent posts