main.yml 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. ---
  2. # some data sanity checks
  3. - assert:
  4. that: cluster is defined
  5. fail_msg: "ERROR: Variable cluster is not defined, but is required."
  6. success_msg: "OK, cluster is defined - federating {{ cluster }}"
  7. - assert:
  8. that: clusters is defined and (clusters.keys() | length) > 0 and clusters[cluster] is defined
  9. fail_msg: "ERROR: Variable clusters is not defined or is missing cluster {{ cluster }}, but is required."
  10. success_msg: "OK, clusters are defined and cluster is found."
  11. - assert:
  12. that: api_ep is defined
  13. fail_msg: "ERROR: Variable api_ep is not defined, but is required."
  14. success_msg: "OK, api_ep is defined."
  15. - assert:
  16. that: api_token is defined
  17. fail_msg: "ERROR: Variable api_token is not defined, but is required."
  18. success_msg: "OK, api_token is defined."
  19. # is there anything to do?
  20. - name: check for cluster definitions in central
  21. uri:
  22. method: GET
  23. return_content: true
  24. validate_certs: false
  25. url: "https://{{ api_ep }}/v1/clusters"
  26. headers:
  27. Authorization: Bearer {{ api_token }}
  28. Accept: application/json
  29. register: cluster_query
  30. - name: assume cluster isn't found in the result
  31. set_fact:
  32. cluster_found: false
  33. - name: unless found
  34. set_fact:
  35. cluster_found: true
  36. when:
  37. - cluster_query.json.clusters | length > 0
  38. - (cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
  39. - ((cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
  40. # (this last one is because roxctl creates a cluster record but leaves its status at null until services check in)
  41. # step 1: we could have lots of fun (authentication in place)
  42. - name: check for missing init bundles
  43. include_tasks:
  44. file: init-bundles.yml
  45. when:
  46. - clusters[cluster].method in ['operator', 'helm']
  47. - not cluster_found
  48. # no init bundles for method 'roxctl'
  49. # step 2: there's so much we can do (not really, just make sure artifacts are either present or created)
  50. - name: use corresponding method to provision the cluster
  51. include_tasks:
  52. file: "{{ clusters[cluster].method }}.yml"
  53. when:
  54. - not cluster_found
  55. # step 3: there is just you and me (wait for pods to pop up)
  56. - name: wait for sensor to show up
  57. k8s_info:
  58. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  59. validate_certs: no
  60. api_version: v1
  61. kind: pod
  62. namespace: "{{ clusters[cluster].namespace }}"
  63. label_selectors:
  64. - app=sensor
  65. register: sensor_pod
  66. until:
  67. - sensor_pod.resources is defined
  68. - (sensor_pod.resources | length) > 0
  69. retries: 30
  70. delay: 5
  71. - name: wait for admission-control to show up
  72. k8s_info:
  73. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  74. validate_certs: no
  75. api_version: v1
  76. kind: pod
  77. namespace: "{{ clusters[cluster].namespace }}"
  78. label_selectors:
  79. - app=admission-control
  80. register: admctl_pod
  81. until:
  82. - admctl_pod.resources is defined
  83. - (admctl_pod.resources | length) > 0
  84. retries: 30
  85. delay: 5
  86. - name: wait for collector to show up
  87. k8s_info:
  88. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  89. validate_certs: no
  90. api_version: v1
  91. kind: pod
  92. namespace: "{{ clusters[cluster].namespace }}"
  93. label_selectors:
  94. - app=collector
  95. register: collect_pod
  96. until:
  97. - collect_pod.resources is defined
  98. - (collect_pod.resources | length) > 0
  99. retries: 30
  100. delay: 5
  101. # step 4: i can give you more (any sort of corrections needed? pending pods?)
  102. - name: any pending pods?
  103. k8s_info:
  104. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  105. validate_certs: no
  106. api_version: v1
  107. kind: pod
  108. namespace: "{{ clusters[cluster].namespace }}"
  109. field_selectors:
  110. - status.phase=Pending
  111. register: pending_pods
  112. - name: fix pending sensor by decreasing requests
  113. kubernetes.core.k8s_json_patch:
  114. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  115. validate_certs: no
  116. api_version: apps/v1
  117. kind: deployment
  118. name: sensor
  119. namespace: "{{ clusters[cluster].namespace }}"
  120. patch:
  121. - op: replace
  122. path: /spec/template/spec/containers/0/resources/requests/cpu
  123. value: 750m
  124. when:
  125. - (pending_pods.resources | length) > 0
  126. - pending_pods.resources[0].metadata.labels.app == 'sensor'
  127. - name: fix pending collectors by deleting random operators
  128. kubernetes.core.k8s:
  129. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  130. validate_certs: no
  131. api_version: apps/v1
  132. kind: deployment
  133. name: "{{ item.name }}"
  134. namespace: "{{ item.namespace }}"
  135. state: absent
  136. loop:
  137. - name: machine-api-operator
  138. namespace: openshift-machine-api
  139. - name: cluster-autoscaler-operator
  140. namespace: openshift-machine-api
  141. - name: cluster-baremetal-operator
  142. namespace: openshift-machine-api
  143. - name: csi-snapshot-controller-operator
  144. namespace: openshift-cluster-storage-operator
  145. - name: csi-snapshot-controller
  146. namespace: openshift-cluster-storage-operator
  147. - name: cluster-monitoring-operator
  148. namespace: openshift-monitoring
  149. when:
  150. - (pending_pods.resources | length) > 0
  151. - pending_pods.resources[0].metadata.labels.app == 'collector'
  152. # step 5: don't you know the time has arrived (just recheck the cluster in central - it should be healthy)
  153. - name: check that the cluster is marked as discovered
  154. uri:
  155. method: GET
  156. return_content: true
  157. validate_certs: false
  158. url: "https://{{ api_ep }}/v1/clusters"
  159. headers:
  160. Authorization: Bearer {{ api_token }}
  161. Accept: application/json
  162. register: cluster_query_fin
  163. until:
  164. - cluster_query_fin.json.clusters | length > 0
  165. - (cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
  166. - ((cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
  167. retries: 30
  168. delay: 5
  169. ...