--- # some data sanity checks - assert: that: cluster is defined fail_msg: "ERROR: Variable cluster is not defined, but is required." success_msg: "OK, cluster is defined - federating {{ cluster }}" - assert: that: clusters is defined and (clusters.keys() | length) > 0 and clusters[cluster] is defined fail_msg: "ERROR: Variable clusters is not defined or is missing cluster {{ cluster }}, but is required." success_msg: "OK, clusters are defined and cluster is found." - assert: that: api_ep is defined fail_msg: "ERROR: Variable api_ep is not defined, but is required." success_msg: "OK, api_ep is defined." - assert: that: api_token is defined fail_msg: "ERROR: Variable api_token is not defined, but is required." success_msg: "OK, api_token is defined." # is there anything to do? - name: check for cluster definitions in central uri: method: GET return_content: true validate_certs: false url: "https://{{ api_ep }}/v1/clusters" headers: Authorization: Bearer {{ api_token }} Accept: application/json register: cluster_query - name: assume cluster isn't found in the result set_fact: cluster_found: false - name: unless found set_fact: cluster_found: true when: - cluster_query.json.clusters | length > 0 - (cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined - ((cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined # (this last one is because roxctl creates a cluster record but leaves its status at null until services check in) # step 1: we could have lots of fun (authentication in place) - name: check for missing init bundles include_tasks: file: init-bundles.yml when: - clusters[cluster].method in ['operator', 'helm'] - not cluster_found # no init bundles for method 'roxctl' # step 2: there's so much we can do (not really, just make sure artifacts are either present or created) - name: use corresponding method to provision the cluster include_tasks: file: "{{ clusters[cluster].method }}.yml" when: - not cluster_found # step 3: there is just you and me (wait for pods to pop up) - name: wait for sensor to show up k8s_info: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: v1 kind: pod namespace: "{{ clusters[cluster].namespace }}" label_selectors: - app=sensor register: sensor_pod until: - sensor_pod.resources is defined - (sensor_pod.resources | length) > 0 retries: 30 delay: 5 - name: wait for admission-control to show up k8s_info: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: v1 kind: pod namespace: "{{ clusters[cluster].namespace }}" label_selectors: - app=admission-control register: admctl_pod until: - admctl_pod.resources is defined - (admctl_pod.resources | length) > 0 retries: 30 delay: 5 - name: wait for collector to show up k8s_info: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: v1 kind: pod namespace: "{{ clusters[cluster].namespace }}" label_selectors: - app=collector register: collect_pod until: - collect_pod.resources is defined - (collect_pod.resources | length) > 0 retries: 30 delay: 5 # step 4: i can give you more (any sort of corrections needed? pending pods?) - name: any pending pods? k8s_info: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: v1 kind: pod namespace: "{{ clusters[cluster].namespace }}" field_selectors: - status.phase=Pending register: pending_pods - name: fix pending sensor by decreasing requests kubernetes.core.k8s_json_patch: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: apps/v1 kind: deployment name: sensor namespace: "{{ clusters[cluster].namespace }}" patch: - op: replace path: /spec/template/spec/containers/0/resources/requests/cpu value: 750m when: - (pending_pods.resources | length) > 0 - pending_pods.resources[0].metadata.labels.app == 'sensor' - name: fix pending collectors by deleting random operators kubernetes.core.k8s: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: apps/v1 kind: deployment name: "{{ item.name }}" namespace: "{{ item.namespace }}" state: absent loop: - name: machine-api-operator namespace: openshift-machine-api - name: cluster-autoscaler-operator namespace: openshift-machine-api - name: cluster-baremetal-operator namespace: openshift-machine-api - name: csi-snapshot-controller-operator namespace: openshift-cluster-storage-operator - name: csi-snapshot-controller namespace: openshift-cluster-storage-operator - name: cluster-monitoring-operator namespace: openshift-monitoring when: - (pending_pods.resources | length) > 0 - pending_pods.resources[0].metadata.labels.app == 'collector' # step 5: don't you know the time has arrived (just recheck the cluster in central - it should be healthy) - name: check that the cluster is marked as discovered uri: method: GET return_content: true validate_certs: false url: "https://{{ api_ep }}/v1/clusters" headers: Authorization: Bearer {{ api_token }} Accept: application/json register: cluster_query_fin until: - cluster_query_fin.json.clusters | length > 0 - (cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined - ((cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined retries: 30 delay: 5 - name: scale admission controller to desired number k8s: kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}" validate_certs: no api_version: apps/v1 merge_type: merge kind: deployment name: admission-control namespace: "{{ clusters[cluster].namespace }}" resource_definition: spec: replicas: "{{ (clusters[cluster].admission_instances | default(3)) | int }}" ...