kubernetes使用prometheus的相关配置文件

﹏ヽ暗。殇╰゛Y 2022-05-25 05:19 278阅读 0赞

本文只是简单记录一下配置文件
prometheus.yaml

  1. # A scrape configuration for running Prometheus on a Kubernetes cluster.
  2. # This uses separate scrape configs for cluster components (i.e. API server, node)
  3. # and services to allow each to use different authentication configs.
  4. #
  5. # Kubernetes labels will be added as Prometheus labels on metrics via the
  6. # `labelmap` relabeling action.
  7. #
  8. # If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
  9. # for the kubernetes-cadvisor job; you will need to edit or remove this job.
  10. # Scrape config for API servers.
  11. #
  12. # Kubernetes exposes API servers as endpoints to the default/kubernetes
  13. # service so this uses `endpoints` role and uses relabelling to only keep
  14. # the endpoints associated with the default/kubernetes service using the
  15. # default named port `https`. This works for single API server deployments as
  16. # well as HA API server deployments.
  17. rule_files:
  18. - /etc/prometheus_rules/*.rule
  19. scrape_configs:
  20. - job_name: 'kubernetes-apiservers'
  21. kubernetes_sd_configs:
  22. - role: endpoints
  23. # Default to scraping over https. If required, just disable this or change to
  24. # `http`.
  25. scheme: https
  26. # This TLS & bearer token file config is used to connect to the actual scrape
  27. # endpoints for cluster components. This is separate to discovery auth
  28. # configuration because discovery & scraping are two separate concerns in
  29. # Prometheus. The discovery auth config is automatic if Prometheus runs inside
  30. # the cluster. Otherwise, more config options have to be provided within the
  31. # <kubernetes_sd_config>.
  32. tls_config:
  33. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  34. # If your node certificates are self-signed or use a different CA to the
  35. # master CA, then disable certificate verification below. Note that
  36. # certificate verification is an integral part of a secure infrastructure
  37. # so this should only be disabled in a controlled environment. You can
  38. # disable certificate verification by uncommenting the line below.
  39. #
  40. # insecure_skip_verify: true
  41. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  42. # Keep only the default/kubernetes service endpoints for the https port. This
  43. # will add targets for each API server which Kubernetes adds an endpoint to
  44. # the default/kubernetes service.
  45. relabel_configs:
  46. - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  47. action: keep
  48. regex: default;kubernetes;https
  49. # Scrape config for nodes (kubelet).
  50. #
  51. # Rather than connecting directly to the node, the scrape is proxied though the
  52. # Kubernetes apiserver. This means it will work if Prometheus is running out of
  53. # cluster, or can't connect to nodes for some other reason (e.g. because of
  54. # firewalling).
  55. - job_name: 'kubernetes-nodes'
  56. # Default to scraping over https. If required, just disable this or change to
  57. # `http`.
  58. scheme: https
  59. # This TLS & bearer token file config is used to connect to the actual scrape
  60. # endpoints for cluster components. This is separate to discovery auth
  61. # configuration because discovery & scraping are two separate concerns in
  62. # Prometheus. The discovery auth config is automatic if Prometheus runs inside
  63. # the cluster. Otherwise, more config options have to be provided within the
  64. # <kubernetes_sd_config>.
  65. tls_config:
  66. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  67. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  68. kubernetes_sd_configs:
  69. - role: node
  70. relabel_configs:
  71. - action: labelmap
  72. regex: __meta_kubernetes_node_label_(.+)
  73. - target_label: __address__
  74. replacement: kubernetes.default.svc:443
  75. - source_labels: [__meta_kubernetes_node_name]
  76. regex: (.+)
  77. target_label: __metrics_path__
  78. replacement: /api/v1/nodes/${1}/proxy/metrics
  79. # Scrape config for Kubelet cAdvisor.
  80. #
  81. # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
  82. # (those whose names begin with 'container_') have been removed from the
  83. # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
  84. # retrieve those metrics.
  85. #
  86. # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
  87. # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
  88. # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
  89. # the --cadvisor-port=0 Kubelet flag).
  90. #
  91. # This job is not necessary and should be removed in Kubernetes 1.6 and
  92. # earlier versions, or it will cause the metrics to be scraped twice.
  93. - job_name: 'kubernetes-cadvisor'
  94. # Default to scraping over https. If required, just disable this or change to
  95. # `http`.
  96. scheme: https
  97. # This TLS & bearer token file config is used to connect to the actual scrape
  98. # endpoints for cluster components. This is separate to discovery auth
  99. # configuration because discovery & scraping are two separate concerns in
  100. # Prometheus. The discovery auth config is automatic if Prometheus runs inside
  101. # the cluster. Otherwise, more config options have to be provided within the
  102. # <kubernetes_sd_config>.
  103. tls_config:
  104. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  105. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  106. kubernetes_sd_configs:
  107. - role: node
  108. relabel_configs:
  109. - action: labelmap
  110. regex: __meta_kubernetes_node_label_(.+)
  111. - target_label: __address__
  112. replacement: kubernetes.default.svc:443
  113. - source_labels: [__meta_kubernetes_node_name]
  114. regex: (.+)
  115. target_label: __metrics_path__
  116. replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
  117. # Scrape config for service endpoints.
  118. #
  119. # The relabeling allows the actual service scrape endpoint to be configured
  120. # via the following annotations:
  121. #
  122. # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
  123. # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
  124. # to set this to `https` & most likely set the `tls_config` of the scrape config.
  125. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
  126. # * `prometheus.io/port`: If the metrics are exposed on a different port to the
  127. # service then set this appropriately.
  128. - job_name: 'kubernetes-service-endpoints'
  129. kubernetes_sd_configs:
  130. - role: endpoints
  131. relabel_configs:
  132. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  133. action: keep
  134. regex: true
  135. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  136. action: replace
  137. target_label: __scheme__
  138. regex: (https?)
  139. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  140. action: replace
  141. target_label: __metrics_path__
  142. regex: (.+)
  143. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  144. action: replace
  145. target_label: __address__
  146. regex: ([^:]+)(?::\d+)?;(\d+)
  147. replacement: $1:$2
  148. - action: labelmap
  149. regex: __meta_kubernetes_service_label_(.+)
  150. - source_labels: [__meta_kubernetes_namespace]
  151. action: replace
  152. target_label: kubernetes_namespace
  153. - source_labels: [__meta_kubernetes_service_name]
  154. action: replace
  155. target_label: kubernetes_name
  156. # Example scrape config for probing services via the Blackbox Exporter.
  157. #
  158. # The relabeling allows the actual service scrape endpoint to be configured
  159. # via the following annotations:
  160. #
  161. # * `prometheus.io/probe`: Only probe services that have a value of `true`
  162. - job_name: 'kubernetes-services'
  163. metrics_path: /metrics
  164. kubernetes_sd_configs:
  165. - role: service
  166. relabel_configs:
  167. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
  168. action: keep
  169. regex: true
  170. - source_labels: [__address__]
  171. target_label: __param_target
  172. - target_label: __address__
  173. replacement: blackbox-exporter.example.com:9115
  174. - source_labels: [__param_target]
  175. target_label: instance
  176. - action: labelmap
  177. regex: __meta_kubernetes_service_label_(.+)
  178. - source_labels: [__meta_kubernetes_namespace]
  179. target_label: kubernetes_namespace
  180. - source_labels: [__meta_kubernetes_service_name]
  181. target_label: kubernetes_name
  182. # Example scrape config for probing ingresses via the Blackbox Exporter.
  183. #
  184. # The relabeling allows the actual ingress scrape endpoint to be configured
  185. # via the following annotations:
  186. #
  187. # * `prometheus.io/probe`: Only probe services that have a value of `true`
  188. - job_name: 'kubernetes-ingresses'
  189. metrics_path: /probe
  190. params:
  191. module: [http_2xx]
  192. kubernetes_sd_configs:
  193. - role: ingress
  194. relabel_configs:
  195. - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
  196. action: keep
  197. regex: true
  198. - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
  199. regex: (.+);(.+);(.+)
  200. replacement: ${1}://${2}${3}
  201. target_label: __param_target
  202. - target_label: __address__
  203. replacement: blackbox-exporter.example.com:9115
  204. - source_labels: [__param_target]
  205. target_label: instance
  206. - action: labelmap
  207. regex: __meta_kubernetes_ingress_label_(.+)
  208. - source_labels: [__meta_kubernetes_namespace]
  209. target_label: kubernetes_namespace
  210. - source_labels: [__meta_kubernetes_ingress_name]
  211. target_label: kubernetes_name
  212. # Example scrape config for pods
  213. #
  214. # The relabeling allows the actual pod scrape endpoint to be configured via the
  215. # following annotations:
  216. #
  217. # * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
  218. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
  219. # * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
  220. # pod's declared ports (default is a port-free target if none are declared).
  221. - job_name: 'kubernetes-pods'
  222. kubernetes_sd_configs:
  223. - role: pod
  224. relabel_configs:
  225. - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
  226. action: keep
  227. regex: true
  228. - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
  229. action: replace
  230. target_label: __metrics_path__
  231. regex: (.+)
  232. - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
  233. action: replace
  234. regex: ([^:]+)(?::\d+)?;(\d+)
  235. replacement: $1:$2
  236. target_label: __address__
  237. - action: labelmap
  238. regex: __meta_kubernetes_pod_label_(.+)
  239. - source_labels: [__meta_kubernetes_namespace]
  240. action: replace
  241. target_label: kubernetes_namespace
  242. - source_labels: [__meta_kubernetes_pod_name]
  243. action: replace
  244. target_label: kubernetes_pod_name
  245. alerting:
  246. alertmanagers:
  247. - static_configs:
  248. - targets:
  249. - alertmanager:9093

规则文件

  1. /etc/prometheus_rules $ ls -l
  2. total 0
  3. lrwxrwxrwx 1 root root 17 Apr 12 13:50 admin.rule -> ..data/admin.rule
  4. lrwxrwxrwx 1 root root 20 Apr 12 13:50 automoto.rule -> ..data/automoto.rule
  5. lrwxrwxrwx 1 root root 25 Apr 12 13:50 ceres-dev.rule.yml -> ..data/ceres-dev.rule.yml
  6. lrwxrwxrwx 1 root root 19 Apr 12 13:50 cicd-17.rule -> ..data/cicd-17.rule
  7. lrwxrwxrwx 1 root root 19 Apr 17 10:18 default.rule -> ..data/default.rule
  8. lrwxrwxrwx 1 root root 18 Apr 12 13:50 devops.rule -> ..data/devops.rule
  9. lrwxrwxrwx 1 root root 20 Apr 12 13:50 dujingya.rule -> ..data/dujingya.rule
  10. lrwxrwxrwx 1 root root 19 Apr 12 13:50 earth-2.rule -> ..data/earth-2.rule
  11. lrwxrwxrwx 1 root root 19 Apr 12 13:50 earth-3.rule -> ..data/earth-3.rule
  12. lrwxrwxrwx 1 root root 17 Apr 12 13:50 earth.rule -> ..data/earth.rule
  13. lrwxrwxrwx 1 root root 20 Apr 12 13:50 guoyiqin.rule -> ..data/guoyiqin.rule
  14. lrwxrwxrwx 1 root root 25 Apr 12 13:50 lijiaob-space.rule -> ..data/lijiaob-space.rule
  15. lrwxrwxrwx 1 root root 19 Apr 12 13:50 newcicd.rule -> ..data/newcicd.rule
  16. lrwxrwxrwx 1 root root 19 Apr 12 13:50 newteam.rule -> ..data/newteam.rule
  17. lrwxrwxrwx 1 root root 18 Apr 12 13:50 qateam.rule -> ..data/qateam.rule
  18. lrwxrwxrwx 1 root root 22 Apr 12 13:50 taijianxin.rule -> ..data/taijianxin.rule
  19. lrwxrwxrwx 1 root root 23 Apr 12 13:50 weihongweic.rule -> ..data/weihongweic.rule
  20. lrwxrwxrwx 1 root root 19 Apr 12 13:50 yaoweig.rule -> ..data/yaoweig.rule
  21. lrwxrwxrwx 1 root root 23 Apr 12 13:50 zhanghongyi.rule -> ..data/zhanghongyi.rule
  22. lrwxrwxrwx 1 root root 20 Apr 12 13:50 zhanghub.rule -> ..data/zhanghub.rule
  23. /etc/prometheus_rules $ pwd
  24. /etc/prometheus_rules
  25. /etc/prometheus_rules $ cat admin.rule
  26. groups:
  27. - name: admin.rule
  28. rules:
  29. - alert: container_cpu_usage_seconds_total_gt_9000_788a0a004db0bdc447d8f0ed31125bb7
  30. expr: (ceil(sum(rate(container_cpu_usage_seconds_total{
  31. namespace="admin",pod_name=~"^fffff-[0-9]{5,15}-[0-9a-zA-Z]{5}$"}[5m]))
  32. * 100 * 100)/100) > 90
  33. labels:
  34. tenxClusterID: CID-ca4135da3326
  35. tenxNamespace: admin
  36. tenxStrategyID: STRAID-L3PPd77yxNR6
  37. tenxStrategyName: tsfffzz
  38. tenxTargetName: fffff
  39. tenxTargetType: service
  40. annotations:
  41. condition: CPU利用率 > 90%
  42. createTime: 2017-11-28T15:56:40+08:00
  43. currentValue: CPU利用率 {
  44. { $value }}%
  45. tenxMetricType: cpu/usage_rate
  46. tokenMD5: 9f834d0df08163e97b1ebb8423003292
  47. - alert: container_cpu_usage_seconds_total_gt_9000_788a0a004db0bdc447d8f0ed31125bb7
  48. expr: (ceil(sum(rate(container_cpu_usage_seconds_total{
  49. namespace="admin",pod_name=~"^fffff-[0-9]{5,15}-[0-9a-zA-Z]{5}$"}[5m]))
  50. * 100 * 100)/100) > 90
  51. labels:
  52. tenxClusterID: CID-ca4135da3326
  53. tenxNamespace: admin
  54. tenxStrategyID: STRAID-L3PPd77yxNR6
  55. tenxStrategyName: tsfffzz
  56. tenxTargetName: fffff
  57. tenxTargetType: service
  58. annotations:
  59. condition: CPU利用率 > 90%
  60. createTime: 2017-11-28T15:56:12+08:00
  61. currentValue: CPU利用率 {
  62. { $value }}%
  63. tenxMetricType: cpu/usage_rate
  64. tokenMD5: 9f834d0df08163e97b1ebb8423003292
  65. apiVersion: extensions/v1beta1
  66. kind: Deployment
  67. metadata:
  68. labels:
  69. app: prometheus
  70. plugin: prometheus
  71. name: prometheus
  72. namespace: kube-system
  73. spec:
  74. replicas: 1
  75. selector:
  76. matchLabels:
  77. app: prometheus
  78. plugin: prometheus
  79. strategy:
  80. rollingUpdate:
  81. maxSurge: 1
  82. maxUnavailable: 1
  83. type: RollingUpdate
  84. template:
  85. metadata:
  86. labels:
  87. app: prometheus
  88. plugin: prometheus
  89. spec:
  90. containers:
  91. - args: - --config.file=/etc/prometheus/prometheus.yaml - --storage.tsdb.path=/prometheus - --storage.tsdb.retention=7d - --web.enable-lifecycle command:
  92. - /bin/prometheus image: harbor.enncloud.cn/enncloud/prometheus:2.0
  93. imagePullPolicy: IfNotPresent
  94. name: prometheus
  95. ports:
  96. - containerPort: 9090 protocol: TCP
  97. resources:
  98. limits:
  99. cpu: "2"
  100. memory: 3000Mi
  101. requests:
  102. cpu: 10m
  103. memory: 10Mi
  104. volumeMounts:
  105. - mountPath: /prometheus name: data
  106. - mountPath: /etc/prometheus name: config-volume
  107. - mountPath: /etc/prometheus_rules name: rules-volume
  108. - args: - 'while inotifywait -qq -e modify,create,delete /etc/prometheus_rules/..data/; do sh -c "curl -X POST http://localhost:9090/-/reload"; done; '
  109. image: harbor.enncloud.cn/tenx_containers/inotify:1.0
  110. imagePullPolicy: IfNotPresent
  111. name: notify
  112. volumeMounts:
  113. - mountPath: /etc/prometheus_rules name: rules-volume
  114. dnsPolicy: ClusterFirst
  115. nodeName: test-slave-116
  116. restartPolicy: Always
  117. volumes:
  118. - hostPath: path: /paas/prometheus_data
  119. name: data
  120. - configMap: name: prometheus-config
  121. name: config-volume
  122. - configMap: name: prometheus-rules
  123. name: rules-volume

发表评论

表情:
评论列表 (有 0 条评论,278人围观)

还没有评论,来说两句吧...

相关阅读