123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- ---
- # some data sanity checks
- - assert:
- that: cluster is defined
- fail_msg: "ERROR: Variable cluster is not defined, but is required."
- success_msg: "OK, cluster is defined - federating {{ cluster }}"
- - assert:
- that: clusters is defined and (clusters.keys() | length) > 0 and clusters[cluster] is defined
- fail_msg: "ERROR: Variable clusters is not defined or is missing cluster {{ cluster }}, but is required."
- success_msg: "OK, clusters are defined and cluster is found."
- - assert:
- that: api_ep is defined
- fail_msg: "ERROR: Variable api_ep is not defined, but is required."
- success_msg: "OK, api_ep is defined."
- - assert:
- that: api_token is defined
- fail_msg: "ERROR: Variable api_token is not defined, but is required."
- success_msg: "OK, api_token is defined."
- # is there anything to do?
- - name: check for cluster definitions in central
- uri:
- method: GET
- return_content: true
- validate_certs: false
- url: "https://{{ api_ep }}/v1/clusters"
- headers:
- Authorization: Bearer {{ api_token }}
- Accept: application/json
- register: cluster_query
- - name: assume cluster isn't found in the result
- set_fact:
- cluster_found: false
- - name: unless found
- set_fact:
- cluster_found: true
- when:
- - cluster_query.json.clusters | length > 0
- - (cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
- - ((cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
- # (this last one is because roxctl creates a cluster record but leaves its status at null until services check in)
- # step 1: we could have lots of fun (authentication in place)
- - include_tasks:
- file: init-bundles.yml
- when:
- - clusters[cluster].method in ['operator', 'helm']
- - not cluster_found
- # no init bundles for method 'roxctl'
- # step 2: there's so much we can do (not really, just make sure artifacts are either present or created)
- - include_tasks:
- file: "{{ clusters[cluster].method }}.yml"
- when:
- - not cluster_found
- # step 3: there is just you and me (wait for pods to pop up)
- - name: wait for sensor to show up
- k8s_info:
- kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
- validate_certs: no
- api_version: v1
- kind: pod
- namespace: "{{ clusters[cluster].namespace }}"
- label_selectors:
- - app=sensor
- register: sensor_pod
- until:
- - sensor_pod.resources is defined
- - (sensor_pod.resources | length) > 0
- retries: 30
- delay: 5
- - name: wait for admission-control to show up
- k8s_info:
- kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
- validate_certs: no
- api_version: v1
- kind: pod
- namespace: "{{ clusters[cluster].namespace }}"
- label_selectors:
- - app=admission-control
- register: admctl_pod
- until:
- - admctl_pod.resources is defined
- - (admctl_pod.resources | length) > 0
- retries: 30
- delay: 5
- - name: wait for collector to show up
- k8s_info:
- kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
- validate_certs: no
- api_version: v1
- kind: pod
- namespace: "{{ clusters[cluster].namespace }}"
- label_selectors:
- - app=collector
- register: collect_pod
- until:
- - collect_pod.resources is defined
- - (collect_pod.resources | length) > 0
- retries: 30
- delay: 5
- # step 4: i can give you more (any sort of corrections needed? pending pods?)
- - name: any pending pods?
- k8s_info:
- kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
- validate_certs: no
- api_version: v1
- kind: pod
- namespace: "{{ clusters[cluster].namespace }}"
- field_selectors:
- - status.phase=Pending
- register: pending_pods
- - name: fix pending sensor by decreasing requests
- kubernetes.core.k8s_json_patch:
- kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
- validate_certs: no
- api_version: apps/v1
- kind: deployment
- name: sensor
- namespace: "{{ clusters[cluster].namespace }}"
- patch:
- - op: replace
- path: /spec/template/spec/containers/0/resources/requests/cpu
- value: 750m
- when:
- - (pending_pods.resources | length) > 0
- - pending_pods.resources[0].metadata.labels.app == 'sensor'
- - name: fix pending collectors by deleting random operators
- kubernetes.core.k8s:
- kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
- validate_certs: no
- api_version: apps/v1
- kind: deployment
- name: "{{ item.name }}"
- namespace: "{{ item.namespace }}"
- state: absent
- loop:
- - name: cluster-autoscaler-operator
- namespace: openshift-machine-api
- - name: cluster-baremetal-operator
- namespace: openshift-machine-api
- - name: csi-snapshot-controller-operator
- namespace: openshift-cluster-storage-operator
- - name: csi-snapshot-controller
- namespace: openshift-cluster-storage-operator
- when:
- - (pending_pods.resources | length) > 0
- - pending_pods.resources[0].metadata.labels.app == 'collector'
- # step 5: don't you know the time has arrived (just recheck the cluster in central - it should be healthy)
- - name: check that the cluster is marked as discovered
- uri:
- method: GET
- return_content: true
- validate_certs: false
- url: "https://{{ api_ep }}/v1/clusters"
- headers:
- Authorization: Bearer {{ api_token }}
- Accept: application/json
- register: cluster_query_fin
- until:
- - cluster_query_fin.json.clusters | length > 0
- - (cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
- - ((cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
- retries: 30
- delay: 5
- ...
|