diff --git a/infra/ingress-nginx/values.yaml b/infra/ingress-nginx/values.yaml index ab69dc165342a17a9616c233cf1b75cbeff90cbf..cd95fb98a24c0d57674a2155ee0c8d6077781b7e 100644 --- a/infra/ingress-nginx/values.yaml +++ b/infra/ingress-nginx/values.yaml @@ -49,58 +49,56 @@ ingress-nginx: type: LoadBalancer loadBalancerIP: 152.66.192.38 - # TODO - # metrics: - # enabled: true - - # service: - # annotations: {} # With prometheus operator you need a ServiceMonitor - - # serviceMonitor: - # enabled: true - # namespace: "monitoring" - # additionalLabels: - # release: monitoring - - # # TODO - # prometheusRule: - # enabled: true - # additionalLabels: - # release: monitoring - # namespace: "monitoring" - # rules: - # - alert: NGINXConfigFailed - # expr: count(nginx_ingress_controller_config_last_reload_successful == 0) > 0 - # for: 1s - # labels: - # severity: critical - # annotations: - # description: bad ingress config - nginx config test failed - # summary: uninstall the latest ingress changes to allow config reloads to resume - # - alert: NGINXCertificateExpiry - # expr: (avg(nginx_ingress_controller_ssl_expire_time_seconds) by (host) - time()) < 604800 - # for: 1s - # labels: - # severity: critical - # annotations: - # description: ssl certificate(s) will expire in less then a week - # summary: renew expiring certificates to avoid downtime - # - alert: NGINXTooMany500s - # expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"5.+"} ) / sum(nginx_ingress_controller_requests) ) > 5 - # for: 1m - # labels: - # severity: warning - # annotations: - # description: Too many 5XXs - # summary: More than 5% of all requests returned 5XX, this requires your attention - # - alert: NGINXTooMany400s - # expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"4.+"} ) / sum(nginx_ingress_controller_requests) ) > 30 - # for: 1m - # labels: - # severity: warning - # annotations: - # description: Too many 4XXs - # summary: More than 5% of all requests returned 4XX, this requires your attention + metrics: + enabled: true + + service: + annotations: {} # With prometheus operator you need a ServiceMonitor + + serviceMonitor: + enabled: true + namespace: "monitoring" + additionalLabels: + release: monitoring + + prometheusRule: + enabled: true + additionalLabels: + release: monitoring + namespace: "monitoring" + rules: + - alert: NGINXConfigFailed + expr: count(nginx_ingress_controller_config_last_reload_successful == 0) > 0 + for: 1s + labels: + severity: critical + annotations: + description: bad ingress config - nginx config test failed + summary: uninstall the latest ingress changes to allow config reloads to resume + - alert: NGINXCertificateExpiry + expr: (avg(nginx_ingress_controller_ssl_expire_time_seconds) by (host) - time()) < 604800 + for: 1s + labels: + severity: critical + annotations: + description: ssl certificate(s) will expire in less then a week + summary: renew expiring certificates to avoid downtime + - alert: NGINXTooMany500s + expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"5.+"} ) / sum(nginx_ingress_controller_requests) ) > 5 + for: 1m + labels: + severity: warning + annotations: + description: Too many 5XXs + summary: More than 5% of all requests returned 5XX, this requires your attention + - alert: NGINXTooMany400s + expr: 100 * ( sum( nginx_ingress_controller_requests{status=~"4.+"} ) / sum(nginx_ingress_controller_requests) ) > 30 + for: 1m + labels: + severity: warning + annotations: + description: Too many 4XXs + summary: More than 5% of all requests returned 4XX, this requires your attention priorityClassName: "infra-ingress"