728x90
반응형
1. kube-state-metirc 설치
$ git clone https://github.com/kubernetes/kube-state-metrics.git
$ cd kube-state-metrics
$ kubectl apply -f examples/standard
2. namespace 생성
$ kubectl create ns monitoring
3. RBAC(ClusterRole) 생성
$ vi prometheus-cluster-role.yaml
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
namespace: monitoring
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitoring
---
$ kubectl apply -f prometheus-cluster-role.yaml
4. PV, PVC설정
$ vi prometheus-pv.yaml
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: prometheus-pv
namespace: monitoring
labels:
type: local
app: prometheus
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
#연결 해제되어도 값 보존
persistentVolumeReclaimPolicy: Retain
#storageClass가 있는경우 작성
storageClassName: manual
hostPath:
path: /opt/prometheus
type: DirectoryOrCreate
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
# host 지정
- kube-worker-1
---
$ vi prometheus-pvc.yaml
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-pvc
namespace: monitoring
labels:
type: local
app: prometheus
spec:
#storageClass가 있는경우 작성(PV와 값이 똑같아야함)
storageClassName: manual
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
resources:
requests:
storage: 1Gi
selector:
matchLabels:
app: prometheus
type: local
---
$ kubectl apply -f prometheus-pvc.yaml
5. Prometheus 설정값 작성
$ vim prometheus.rules
---
groups:
- name: container memory alert
rules:
- alert: container memory usage rate is very high( > 55%)
expr: sum(container_memory_working_set_bytes{pod!="", name=""}) / sum (kube_node_status_allocatable_memory_bytes) * 100 > 55
for: 1m
labels:
severity: fatal
annotations:
summary: High Memory Usage on {{ $labels.instance }}
identifier: "{{ $labels.instance }}"
description: "{{ $labels.job }} Memory Usage: {{ $value }}"
- name: container CPU alert
rules:
- alert: container CPU usage rate is very high( > 10%)
expr: sum (rate (container_cpu_usage_seconds_total{pod!=""}[1m])) / sum (machine_cpu_cores) * 100 > 10
for: 1m
labels:
severity: fatal
annotations:
summary: High Cpu Usage
---
$ vim prometheus.yml
#필요한 metric의 경우 scrape_configs: 아래 형식에 맞게 작성
---
global:
scrape_interval: 10s
evaluation_interval: 10s
rule_files:
- /etc/prometheus/prometheus.rules
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager.monitoring.svc:9093"
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'kube-state-metrics'
static_configs:
- targets: ['kube-state-metrics.kube-system.svc:8080']
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
---
# 두 파일을 하나로 합쳐 prometheus-config라는 이름의 configmap 생성
$ kubectl create configmap prometheus-config -n monitoring --from-file=./
6. Deployment 작성 후 배포, 트러블 슈팅
vi prometheus-deployment.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-deployment
namespace: monitoring
labels:
app: prometheus-server
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-server
template:
metadata:
labels:
app: prometheus-server
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: prom/prometheus
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus/"
ports:
- containerPort: 9090
volumeMounts:
- name: prometheus-config-volume
mountPath: /etc/prometheus/
- name: prometheus-storage-volume
mountPath: /prometheus/
nodeSelector:
kubernetes.io/hostname: kube-worker-1
volumes:
- name: prometheus-config-volume
configMap:
defaultMode: 420
name: prometheus-config
- name: prometheus-storage-volume
persistentVolumeClaim:
claimName: prometheus-pvc
---
$ kubectl apply -f prometheus-deployment.yaml
#PV 쪽 권한 에러가 발생하므로, PV생성한 host에(예시에서는 kube-worker-1) 다음 명령어를 입력
$ chmod 757 /opt/prometheus
7. 서비스 배포 및 node-exporter 배포
$ vim prometheus-service.yml
---
apiVersion: v1
kind: Service
metadata:
name: prometheus-service
namespace: monitoring
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '9090'
spec:
selector:
app: prometheus-server
type: NodePort
ports:
- port: 8080
targetPort: 9090
nodePort: 30003
---
$ kubectl apply -f prometheus-service.yaml
$ vim prometheus-node-exporter.yaml
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
labels:
k8s-app: node-exporter
spec:
selector:
matchLabels:
k8s-app: node-exporter
template:
metadata:
labels:
k8s-app: node-exporter
spec:
containers:
- image: prom/node-exporter
name: node-exporter
ports:
- containerPort: 9100
protocol: TCP
name: http
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: node-exporter
name: node-exporter
namespace: kube-system
spec:
ports:
- name: http
port: 9100
nodePort: 31672
protocol: TCP
type: NodePort
selector:
k8s-app: node-exporter
---
$ kubectl apply -f prometheus-node-exporter.yaml
8. 확인
K8s 워커노드 ip:30003
반응형
'엔지니어링 > 모니터링' 카테고리의 다른 글
K8s Nginx Ingress Controller 모니터링 (0) | 2022.04.24 |
---|---|
Prometheus docker-swarm 모니터링 테스트(mariadb) (0) | 2022.04.11 |
Prometheus 모니터링 테스트(mysql) (0) | 2022.04.04 |