Skip to content
Snippets Groups Projects
Verified Commit e6c0f1fd authored by Rafael László's avatar Rafael László :speech_balloon:
Browse files

Update nginx monitoring

parent 454391ce
No related branches found
No related tags found
No related merge requests found
Pipeline #38977 passed
......@@ -49,58 +49,56 @@ ingress-nginx:
type: LoadBalancer
loadBalancerIP: 152.66.192.38
# TODO
# metrics:
# enabled: true
# service:
# annotations: {} # With prometheus operator you need a ServiceMonitor
# serviceMonitor:
# enabled: true
# namespace: "monitoring"
# additionalLabels:
# release: monitoring
# # TODO
# prometheusRule:
# enabled: true
# additionalLabels:
# release: monitoring
# namespace: "monitoring"
# rules:
# - alert: NGINXConfigFailed
# expr: count(nginx_ingress_controller_config_last_reload_successful == 0) > 0
# for: 1s
# labels:
# severity: critical
# annotations:
# description: bad ingress config - nginx config test failed
# summary: uninstall the latest ingress changes to allow config reloads to resume
# - alert: NGINXCertificateExpiry
# expr: (avg(nginx_ingress_controller_ssl_expire_time_seconds) by (host) - time()) < 604800
# for: 1s
# labels:
# severity: critical
# annotations:
# description: ssl certificate(s) will expire in less then a week
# summary: renew expiring certificates to avoid downtime
# - alert: NGINXTooMany500s
# expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"5.+"} ) / sum(nginx_ingress_controller_requests) ) > 5
# for: 1m
# labels:
# severity: warning
# annotations:
# description: Too many 5XXs
# summary: More than 5% of all requests returned 5XX, this requires your attention
# - alert: NGINXTooMany400s
# expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"4.+"} ) / sum(nginx_ingress_controller_requests) ) > 30
# for: 1m
# labels:
# severity: warning
# annotations:
# description: Too many 4XXs
# summary: More than 5% of all requests returned 4XX, this requires your attention
metrics:
enabled: true
service:
annotations: {} # With prometheus operator you need a ServiceMonitor
serviceMonitor:
enabled: true
namespace: "monitoring"
additionalLabels:
release: monitoring
prometheusRule:
enabled: true
additionalLabels:
release: monitoring
namespace: "monitoring"
rules:
- alert: NGINXConfigFailed
expr: count(nginx_ingress_controller_config_last_reload_successful == 0) > 0
for: 1s
labels:
severity: critical
annotations:
description: bad ingress config - nginx config test failed
summary: uninstall the latest ingress changes to allow config reloads to resume
- alert: NGINXCertificateExpiry
expr: (avg(nginx_ingress_controller_ssl_expire_time_seconds) by (host) - time()) < 604800
for: 1s
labels:
severity: critical
annotations:
description: ssl certificate(s) will expire in less then a week
summary: renew expiring certificates to avoid downtime
- alert: NGINXTooMany500s
expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"5.+"} ) / sum(nginx_ingress_controller_requests) ) > 5
for: 1m
labels:
severity: warning
annotations:
description: Too many 5XXs
summary: More than 5% of all requests returned 5XX, this requires your attention
- alert: NGINXTooMany400s
expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"4.+"} ) / sum(nginx_ingress_controller_requests) ) > 30
for: 1m
labels:
severity: warning
annotations:
description: Too many 4XXs
summary: More than 5% of all requests returned 4XX, this requires your attention
priorityClassName: "infra-ingress"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment