|
|
@@ -0,0 +1,85 @@
|
|
|
+---
|
|
|
+# Configure cluster logging:
|
|
|
+# - storage for Prometheus and AlertManager
|
|
|
+# - retention settings
|
|
|
+# - UWM
|
|
|
+# - storage for Prometheus, AlertManager, and ThanosRuler
|
|
|
+# - retention settings
|
|
|
+# - user RBAC to allow them to query and view metrics (cluster-monitoring-view) TODO
|
|
|
+#
|
|
|
+# Required variables:
|
|
|
+#
|
|
|
+# NONE
|
|
|
+#
|
|
|
+# Optional variables:
|
|
|
+#
|
|
|
+# kubeadmin_config the administrator kubeconfig file (tmp/kubeconfig-ocp4)
|
|
|
+#
|
|
|
+# enable_user_workload defaults to true
|
|
|
+# main_prom_pvc prometheusK8s PVC size (100Gi)
|
|
|
+# main_prom_sc prometheusK8s PVC storage class (odf-cluster-ceph-rbd)
|
|
|
+# main_prom_retain_time system metric retention time (21d)
|
|
|
+# main_prom_retain_size system metric retention size in GiB (80GiB)
|
|
|
+# main_alrt_pvc alertmanagerMain PVC size (10Gi)
|
|
|
+# main_alrt_sc alertmanagerMain PVC storage class (odf-cluster-ceph-rbd)
|
|
|
+# user_prom_pvc prometheus PVC size (40Gi)
|
|
|
+# user_prom_sc prometheus PVC storage class (odf-cluster-ceph-rbd)
|
|
|
+# user_prom_retain_time user metric retention time (21d)
|
|
|
+# user_prom_retain_size user metric retention size in GiB (30GiB)
|
|
|
+# user_alrt_pvc alertmanager PVC size (10Gi)
|
|
|
+# user_alrt_sc alertmanager PVC storage class (odf-cluster-ceph-rbd)
|
|
|
+# user_thanos_pvc thanos ruler PVC size (10Gi)
|
|
|
+# user_thanos_sc thanos ruler PVC storage class (odf-cluster-ceph-rbd)
|
|
|
+# user_thanos_retain_time thanos ruler retention time (21d)
|
|
|
+#
|
|
|
+# OPTIONAL TODOs:
|
|
|
+# - nodeSelector
|
|
|
+# - taints
|
|
|
+# - tolerations
|
|
|
+#
|
|
|
+# NOTES:
|
|
|
+# symptoms: disk pressure, https://access.redhat.com/solutions/5341801 and
|
|
|
+# https://access.redhat.com/solutions/6738851
|
|
|
+#
|
|
|
+- name: Apply cluster monitoring configmap
|
|
|
+ kubernetes.core.k8s:
|
|
|
+ kubeconfig: "{{ kubeadmin_config }}"
|
|
|
+ validate_certs: no
|
|
|
+ api_version: v1
|
|
|
+ kind: configmap
|
|
|
+ namespace: openshift-monitoring
|
|
|
+ name: cluster-monitoring-config
|
|
|
+ template: templates/cluster-monitoring.yml.j2
|
|
|
+
|
|
|
+- name: Apply user monitoring settings if required
|
|
|
+ block:
|
|
|
+ - name: Wait for UVM operator pod to become ready
|
|
|
+ kubernetes.core.k8s_info:
|
|
|
+ kubeconfig: "{{ kubeadmin_config }}"
|
|
|
+ validate_certs: no
|
|
|
+ api_version: v1
|
|
|
+ kind: pod
|
|
|
+ namespace: openshift-user-workload-monitoring
|
|
|
+ label_selectors:
|
|
|
+ - app.kubernetes.io/component=controller
|
|
|
+ register: uwm_op_ready
|
|
|
+ until:
|
|
|
+ - uwm_op_ready.resources is defined
|
|
|
+ - uwm_op_ready.resources | length == 1
|
|
|
+ - uwm_op_ready.resources[0].status is defined
|
|
|
+ - (uwm_op_ready.resources[0].status | community.general.json_query('conditions[?type==`Ready`].status'))[0] == 'True'
|
|
|
+ retries: 6
|
|
|
+ delay: 5
|
|
|
+
|
|
|
+ - name: Apply user monitoring configmap if required
|
|
|
+ kubernetes.core.k8s:
|
|
|
+ kubeconfig: "{{ kubeadmin_config }}"
|
|
|
+ validate_certs: no
|
|
|
+ api_version: v1
|
|
|
+ kind: configmap
|
|
|
+ namespace: openshift-user-workload-monitoring
|
|
|
+ name: user-workload-monitoring-config
|
|
|
+ template: templates/user-monitoring.yml.j2
|
|
|
+
|
|
|
+ when: enable_user_workload == True
|
|
|
+...
|