main.yml 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. ---
  2. # this "role" is supposed to be included, not applied, and needs the "clusters"
  3. # data structure, and the "cluster" variable pointing to one of the members of
  4. # clusters. it will be applied to that cluster in accordance with the contents
  5. # of the data structure.
  6. #
  7. # some data sanity checks
  8. - assert:
  9. that: cluster is defined
  10. fail_msg: "ERROR: Variable cluster is not defined, but is required."
  11. success_msg: "OK, cluster is defined - federating {{ cluster }}"
  12. - assert:
  13. that: clusters is defined and (clusters.keys() | length) > 0 and clusters[cluster] is defined
  14. fail_msg: "ERROR: Variable clusters is not defined or is missing cluster {{ cluster }}, but is required."
  15. success_msg: "OK, clusters are defined and cluster is found."
  16. - assert:
  17. that: api_ep is defined
  18. fail_msg: "ERROR: Variable api_ep is not defined, but is required."
  19. success_msg: "OK, api_ep is defined."
  20. - assert:
  21. that: api_token is defined
  22. fail_msg: "ERROR: Variable api_token is not defined, but is required."
  23. success_msg: "OK, api_token is defined."
  24. # is there anything to do?
  25. - name: check for cluster definitions in central
  26. uri:
  27. method: GET
  28. return_content: true
  29. validate_certs: false
  30. url: "https://{{ api_ep }}/v1/clusters"
  31. headers:
  32. Authorization: Bearer {{ api_token }}
  33. Accept: application/json
  34. register: cluster_query
  35. - name: assume cluster isn't found in the result
  36. set_fact:
  37. cluster_found: false
  38. - name: unless found
  39. set_fact:
  40. cluster_found: true
  41. when:
  42. - cluster_query.json.clusters | length > 0
  43. - (cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
  44. - ((cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
  45. # (this last one is because roxctl creates a cluster record but leaves its status at null until services check in)
  46. # step 1: we could have lots of fun (authentication in place)
  47. - name: check for missing init bundles
  48. include_tasks:
  49. file: init-bundles.yml
  50. when:
  51. - clusters[cluster].method in ['operator', 'helm']
  52. - not cluster_found
  53. # no init bundles for method 'roxctl'
  54. # step 2: there's so much we can do (not really, just make sure artifacts are either present or created)
  55. - name: use corresponding method to provision the cluster
  56. include_tasks:
  57. file: "{{ clusters[cluster].method }}.yml"
  58. when:
  59. - not cluster_found
  60. # step 3: there is just you and me (wait for pods to pop up)
  61. - name: wait for sensor to show up
  62. kubernetes.core.k8s_info:
  63. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  64. validate_certs: no
  65. api_version: v1
  66. kind: pod
  67. namespace: "{{ clusters[cluster].namespace }}"
  68. label_selectors:
  69. - app=sensor
  70. register: sensor_pod
  71. until:
  72. - sensor_pod.resources is defined
  73. - (sensor_pod.resources | length) > 0
  74. retries: 30
  75. delay: 5
  76. - name: wait for admission-control to show up
  77. kubernetes.core.k8s_info:
  78. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  79. validate_certs: no
  80. api_version: v1
  81. kind: pod
  82. namespace: "{{ clusters[cluster].namespace }}"
  83. label_selectors:
  84. - app=admission-control
  85. register: admctl_pod
  86. until:
  87. - admctl_pod.resources is defined
  88. - (admctl_pod.resources | length) > 0
  89. retries: 30
  90. delay: 5
  91. - name: wait for collector to show up
  92. kubernetes.core.k8s_info:
  93. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  94. validate_certs: no
  95. api_version: v1
  96. kind: pod
  97. namespace: "{{ clusters[cluster].namespace }}"
  98. label_selectors:
  99. - app=collector
  100. register: collect_pod
  101. until:
  102. - collect_pod.resources is defined
  103. - (collect_pod.resources | length) > 0
  104. retries: 30
  105. delay: 5
  106. # step 4: i can give you more (any sort of corrections needed? pending pods?)
  107. - name: any pending pods?
  108. kubernetes.core.k8s_info:
  109. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  110. validate_certs: no
  111. api_version: v1
  112. kind: pod
  113. namespace: "{{ clusters[cluster].namespace }}"
  114. field_selectors:
  115. - status.phase=Pending
  116. register: pending_pods
  117. - name: fix pending sensor by decreasing requests
  118. kubernetes.core.k8s_json_patch:
  119. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  120. validate_certs: no
  121. api_version: apps/v1
  122. kind: deployment
  123. name: sensor
  124. namespace: "{{ clusters[cluster].namespace }}"
  125. patch:
  126. - op: replace
  127. path: /spec/template/spec/containers/0/resources/requests/cpu
  128. value: 750m
  129. when:
  130. - (pending_pods.resources | length) > 0
  131. - pending_pods.resources[0].metadata.labels.app == 'sensor'
  132. - name: fix pending collectors by deleting random operators
  133. kubernetes.core.k8s:
  134. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  135. validate_certs: no
  136. api_version: apps/v1
  137. kind: deployment
  138. name: "{{ item.name }}"
  139. namespace: "{{ item.namespace }}"
  140. state: absent
  141. loop:
  142. - name: machine-api-operator
  143. namespace: openshift-machine-api
  144. - name: cluster-autoscaler-operator
  145. namespace: openshift-machine-api
  146. - name: cluster-baremetal-operator
  147. namespace: openshift-machine-api
  148. - name: csi-snapshot-controller-operator
  149. namespace: openshift-cluster-storage-operator
  150. - name: csi-snapshot-controller
  151. namespace: openshift-cluster-storage-operator
  152. - name: cluster-monitoring-operator
  153. namespace: openshift-monitoring
  154. when:
  155. - (pending_pods.resources | length) > 0
  156. - pending_pods.resources[0].metadata.labels.app == 'collector'
  157. # step 5: don't you know the time has arrived (just recheck the cluster in central - it should be healthy)
  158. - name: check that the cluster is marked as discovered
  159. uri:
  160. method: GET
  161. return_content: true
  162. validate_certs: false
  163. url: "https://{{ api_ep }}/v1/clusters"
  164. headers:
  165. Authorization: Bearer {{ api_token }}
  166. Accept: application/json
  167. register: cluster_query_fin
  168. until:
  169. - cluster_query_fin.json.clusters | length > 0
  170. - (cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
  171. - ((cluster_query_fin.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
  172. retries: 30
  173. delay: 5
  174. - name: scale admission controller to desired number
  175. kubernetes.core.k8s:
  176. kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
  177. validate_certs: no
  178. api_version: apps/v1
  179. merge_type: merge
  180. kind: deployment
  181. name: admission-control
  182. namespace: "{{ clusters[cluster].namespace }}"
  183. resource_definition:
  184. spec:
  185. replicas: "{{ (clusters[cluster].admission_instances | default(3)) | int }}"
  186. ...