Преглед на файлове

added a cluster federation role

Grega Bremec преди 3 години
родител
ревизия
183f728f32

+ 57 - 0
federate-clusters.yml

@@ -0,0 +1,57 @@
+---
+- name: Make sure all secured clusters are federated
+  hosts: workstation.lab.example.com
+  gather_subset: min
+  become: no
+  vars:
+    clusters:
+      ocp4:
+        name: rhacs-cluster
+        namespace: rhacs
+        method: operator
+      sno1:
+        name: sno1
+        namespace: stackrox
+        method: roxctl
+      k3s:
+        name: k3s
+        namespace: stackrox
+        method: helm
+    pull_user: '|uhc-pool-9f005376-36a2-42a1-a540-0473dc873633'
+    pull_pass: 'eyJhbGciOiJSUzUxMiJ9.eyJzdWIiOiI5ODg5YWExYzBmOWI0ZjU3Yjk3NmI5ZWEyMzIwZjU0MCJ9.sZd9TMQo0WDG65G9BMNnknbPcFB36hrDXd18_u3KxqZs7e8maC_PxAQxjpuY4aS6TDHnLC5jFb4Q5qXTJVn2B8a8p1ncO-3n2HnPt876i-UACSyeukioy8hr4WwufHQxUX2lqXXXv3znQ7jmTECAsnkZCQHU5tSiFsTdxEdfdyN6gm17ujckadnMlPYq6_SR6mKKiJTtT7HYCYrAVNsgK_4adgc-EpelKGlcDEi34hXo1jlB3DDrQi1ILBWE0fGWoW3g-YS1F0YD_74nmWINfPMc3nT8DZAit8L4VQONvgPNubu_MUFPhj_oUR1wUDtkPQ6KIvo6Qf2FGp2wK3PzbtADQsEVSd9HO47kDJtahnOya1fFgjeY5lSq-moOdjRWBgu6_3H_nedLIGYPDtAfzypbuxvupGu3hXZusyctiDmGm7JDyEnJv1uDVfaWnSa3I_MpTRUW2ee5D_Bjrey7eSb4lJFrjux-gcbUhqlXbYsizk7WXzoFkkTYLuqCCQoKRt8WR7U3Nhwst6rEwxANibENSsQPw0h8_CCFnjLqRN_6qjSsKiydFOkGTYbOKZNKZIUabFEN4aaTUbiXMWOKa2jMq-Hpk0L4J0RkN3bdAUjZkDDq4dV5eQcusHy_KcogwUJJ6x03Nns8oLAv4IgtJyyqra5aBGJLQxscEuR740Y'
+  tasks:
+    - name: find api endpoint
+      stat:
+        path: "{{ ansible_facts['user_dir'] }}/api-endpoint"
+      register: api_endpoint_is_there
+
+    - assert:
+        that: api_endpoint_is_there.stat.exists
+        fail_msg: "ERROR: api-endpoint file not found. This usually means prerequisites (such as central deployment) are not met."
+        success_msg: "OK: api-endpoint found."
+
+    - name: load api endpoint
+      set_fact:
+        api_ep: "{{ lookup('file', ansible_facts['user_dir'] + '/api-endpoint') }}"
+
+    - name: find api token
+      stat:
+        path: "{{ ansible_facts['user_dir'] }}/api-token"
+      register: api_token_is_there
+
+    - assert:
+        that: api_token_is_there.stat.exists
+        fail_msg: "ERROR: api-token file not found. This usually means prerequisites (such as central deployment) are not met."
+        success_msg: "OK: api-token found."
+
+    - name: load api token
+      set_fact:
+        api_token: "{{ lookup('file', ansible_facts['user_dir'] + '/api-token') }}"
+
+    - include_role:
+        name: federate-clusters
+      tags: federate
+      loop: "{{ clusters.keys() | list }}"
+      loop_control:
+        loop_var: cluster
+...

+ 375 - 0
roles/federate-clusters/tasks/main.yml

@@ -0,0 +1,375 @@
+---
+# some data sanity checks
+- assert:
+    that: cluster is defined
+    fail_msg: "ERROR: Variable cluster is not defined, but is required."
+    success_msg: "OK, cluster is defined - federating {{ cluster }}"
+
+- assert:
+    that: clusters is defined and (clusters.keys() | length) > 0 and clusters[cluster] is defined
+    fail_msg: "ERROR: Variable clusters is not defined or is missing cluster {{ cluster }}, but is required."
+    success_msg: "OK, clusters are defined and cluster is found."
+
+- assert:
+    that: api_ep is defined
+    fail_msg: "ERROR: Variable api_ep is not defined, but is required."
+    success_msg: "OK, api_ep is defined."
+
+- assert:
+    that: api_token is defined
+    fail_msg: "ERROR: Variable api_token is not defined, but is required."
+    success_msg: "OK, api_token is defined."
+
+# is there anything to do?
+- name: check for cluster definitions in central
+  uri:
+    method: GET
+    return_content: true
+    validate_certs: false
+    url: "https://{{ api_ep }}/v1/clusters"
+    headers:
+      Authorization: Bearer {{ api_token }}
+      Accept: application/json
+  register: cluster_query
+
+- name: assume cluster isn't found in the result
+  set_fact:
+    cluster_found: false
+
+- name: unless found
+  set_fact:
+    cluster_found: true
+  when:
+    - cluster_query.json.clusters | length > 0
+    - (cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name] is defined
+    - ((cluster_query.json.clusters | items2dict(key_name='name', value_name='status'))[clusters[cluster].name]).sensorVersion is defined
+    # (this last one is because roxctl creates a cluster record but leaves its status at null until services check in)
+
+# step 1: we could have lots of fun (authentication in place)
+- block:
+
+  - name: init bundle check
+    uri:
+      method: GET
+      return_content: true
+      validate_certs: false
+      url: "https://{{ api_ep }}/v1/cluster-init/init-bundles"
+      headers:
+        Authorization: Bearer {{ api_token }}
+        Accept: application/json
+    register: init_bundle_response
+
+  - name: assume init bundle isn't there
+    set_fact:
+      init_bundle_present: false
+
+  - name: unless found
+    set_fact:
+      init_bundle_present: true
+    when:
+      - init_bundle_response.json['items'] | length > 0
+      - (init_bundle_response.json['items'] | items2dict(key_name='name', value_name='expiresAt'))[clusters[cluster].name] is defined
+
+  - name: generate init bundle
+    uri:
+      method: POST
+      return_content: true
+      validate_certs: false
+      url: "https://{{ api_ep }}/v1/cluster-init/init-bundles"
+      headers:
+        Authorization: Bearer {{ api_token }}
+        Accept: application/json
+        Content-Type: application/json
+      body: '{"name":"{{ clusters[cluster].name | string }}"}'
+    register: init_bundle_content
+    when:
+      - not init_bundle_present
+
+  - name: store init bundle - operator
+    copy:
+      dest: "{{ ansible_facts['user_dir'] }}/{{ cluster }}-init-bundle.yaml"
+      content: "{{ init_bundle_content.json.kubectlBundle | b64decode }}"
+      owner: "{{ ansible_user }}"
+      group: "{{ ansible_user }}"
+      mode: 0600
+    when:
+      - not init_bundle_present
+      - clusters[cluster].method == 'operator'
+
+  - name: store init bundle - helm
+    copy:
+      dest: "{{ ansible_facts['user_dir'] }}/{{ cluster }}-helm-bundle.yaml"
+      content: "{{ init_bundle_content.json.helmValuesBundle | b64decode }}"
+      owner: "{{ ansible_user }}"
+      group: "{{ ansible_user }}"
+      mode: 0600
+    when:
+      - not init_bundle_present
+      - clusters[cluster].method == 'helm'
+
+  - name: make sure namespace is there
+    kubernetes.core.k8s:
+      kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+      validate_certs: no
+      api_version: v1
+      kind: Namespace
+      name: "{{ clusters[cluster].namespace }}"
+      namespace: ""
+      state: present
+
+  - name: create init bundle
+    kubernetes.core.k8s:
+      kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+      validate_certs: no
+      src: "{{ ansible_facts['user_dir'] }}/{{ cluster }}-init-bundle.yaml"
+      namespace: "{{ clusters[cluster].namespace }}"
+    when:
+      - clusters[cluster].method == 'operator'
+
+  when:
+    - clusters[cluster].method in ['operator', 'helm']
+    - not cluster_found
+
+  # no init bundles for method 'roxctl'
+
+# step 2: there's so much we can do (not really, just make sure artifacts are either present or created)
+
+# operator has its securedcluster resource
+- block:
+  - name: securedcluster cr check
+    k8s_info:
+      kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+      validate_certs: no
+      api_version: platform.stackrox.io/v1alpha1
+      kind: SecuredCluster
+      namespace: "{{ clusters[cluster].namespace }}"
+      name: "{{ clusters[cluster].name }}"
+    register: secured_cr
+
+  - name: create cr resource
+    kubernetes.core.k8s:
+      kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+      validate_certs: no
+      template: templates/securedcluster-cr.yml
+    when: secured_cr.resources | length == 0
+
+  when:
+    - clusters[cluster].method == 'operator'
+    - not cluster_found
+
+# roxctl doesn't really leave any specific signature, so check for sensor.sh
+- block:
+  - name: sensor.sh check
+    stat:
+      path: "{{ ansible_facts['user_dir'] }}/{{ cluster }}-secured/sensor.sh"
+    register: sensor_script_present
+
+  - name: check for deployments anyway as well
+    k8s_info:
+      kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+      validate_certs: no
+      api_version: apps/v1
+      kind: deployment
+      namespace: "{{ clusters[cluster].namespace }}"
+      label_selectors:
+        - app.kubernetes.io/instance=stackrox-secured-cluster-services
+    register: sensor_deployments_present
+
+  - name: create sensor.sh resources
+    ansible.builtin.command:
+      argv:
+        - /usr/local/bin/roxctl
+        - -e
+        - "{{ api_ep }}"
+        - --token-file={{ ansible_facts['user_dir'] }}/api-token
+        - sensor
+        - generate
+        - openshift
+        - --openshift-version=4
+        - --admission-controller-scan-inline=true
+        - --admission-controller-timeout=10
+        - --admission-controller-listen-on-events
+        - --admission-controller-listen-on-creates
+        - --admission-controller-listen-on-updates
+        - --central={{ api_ep }}
+        - --collection-method=kernel-module
+        - --slim-collector=true
+        - --name={{ clusters[cluster].name }}
+        - --output-dir=./{{ clusters[cluster].name }}-secured
+      chdir: "{{ ansible_facts['user_dir'] }}"
+    when: not sensor_script_present.stat.exists
+
+  - name: apply sensor.sh resources
+    ansible.builtin.command:
+      argv:
+        - /usr/bin/env
+        - REGISTRY_USERNAME={{ pull_user }}
+        - REGISTRY_PASSWORD={{ pull_pass }}
+        - KUBECONFIG={{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}
+        - "{{ ansible_facts['user_dir'] }}/{{ clusters[cluster].name }}-secured/sensor.sh"
+      chdir: "{{ ansible_facts['user_dir'] }}"
+    when: sensor_deployments_present.resources | length < 2
+
+  when:
+    - clusters[cluster].method == 'roxctl'
+    - not cluster_found
+
+# helm uses some undecipherable configmaps, so we need to use the binary
+- block:
+  - name: helm chart check
+    ansible.builtin.command:
+      argv:
+        - /usr/bin/env
+        - KUBECONFIG={{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}
+        - /usr/local/bin/helm
+        - -n
+        - stackrox
+        - list
+        - -o
+        - json
+        - --filter
+        - stackrox.*services
+      chdir: "{{ ansible_facts['user_dir'] }}"
+    register: helm_chart_status
+
+  - name: assert chart isn't there
+    set_fact:
+      helm_chart_present: false
+
+  - name: unless proven otherwise
+    set_fact:
+      helm_chart_present: true
+    when:
+      - helm_chart_status.stdout | from_json | list | length > 0
+      - (helm_chart_status.stdout | from_json | list)[0].status == "deployed"
+
+  - name: create helm vars
+    template:
+      src: templates/helm-vars.yml
+      dest: "{{ ansible_facts['user_dir'] }}/{{ clusters[cluster].name }}-helm-vars.yaml"
+      mode: 0600
+      owner: "{{ ansible_user }}"
+      group: "{{ ansible_user }}"
+    when:
+      - not helm_chart_present
+
+  - name: check the repo
+    ansible.builtin.command:
+      argv:
+        - /usr/bin/env
+        - KUBECONFIG={{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}
+        - /usr/local/bin/helm
+        - repo
+        - list
+        - -o
+        - json
+      chdir: "{{ ansible_facts['user_dir'] }}"
+    ignore_errors: yes
+    register: repo_is_there
+
+  - debug: var=repo_is_there
+
+  - name: add the repo
+    ansible.builtin.command:
+      argv:
+        - /usr/bin/env
+        - KUBECONFIG={{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}
+        - /usr/local/bin/helm
+        - repo
+        - add
+        - rhacs
+        - https://mirror.openshift.com/pub/rhacs/charts/
+      chdir: "{{ ansible_facts['user_dir'] }}"
+    when: repo_is_there.failed or (repo_is_there.stdout | from_json | list | length) == 0
+
+  - name: apply helm chart
+    ansible.builtin.command:
+      argv:
+        - /usr/bin/env
+        - KUBECONFIG={{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}
+        - /usr/local/bin/helm
+        - install
+        - -n
+        - stackrox
+        - --create-namespace
+        - stackrox-secured-cluster-services
+        - rhacs/secured-cluster-services
+        - -f 
+        - "{{ ansible_facts['user_dir'] }}/{{ clusters[cluster].name }}-helm-bundle.yaml"
+        - -f 
+        - "{{ ansible_facts['user_dir'] }}/{{ clusters[cluster].name }}-helm-vars.yaml"
+      chdir: "{{ ansible_facts['user_dir'] }}"
+    when:
+      - not helm_chart_present
+
+  when:
+    - clusters[cluster].method == 'helm'
+    - not cluster_found
+
+# step 3: there is just you and me (wait for pods to pop up)
+- name: any pending pods?
+  k8s_info:
+    kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+    validate_certs: no
+    api_version: v1
+    kind: pod
+    namespace: "{{ clusters[cluster].namespace }}"
+    field_selectors:
+      - status.phase=Pending
+  register: pending_pods
+
+- name: fix pending sensor by decreasing requests
+  kubernetes.core.k8s_json_patch:
+    kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+    validate_certs: no
+    api_version: apps/v1
+    kind: deployment
+    name: sensor
+    namespace: "{{ clusters[cluster].namespace }}"
+    patch:
+      - op: replace
+        path: /spec/template/spec/containers/0/resources/requests/cpu
+        value: 750m
+  when:
+    - (pending_pods.resources | length) > 0
+    - pending_pods.resources[0].metadata.labels.app == 'sensor'
+
+- name: fix pending collectors by deleting random operators
+  kubernetes.core.k8s:
+    kubeconfig: "{{ ansible_facts['user_dir'] }}/kubeconfig-{{ cluster }}"
+    validate_certs: no
+    api_version: apps/v1
+    kind: deployment
+    name: "{{ item.name }}"
+    namespace: "{{ item.namespace }}"
+    state: absent
+  loop:
+    - name: cluster-autoscaler-operator
+      namespace: openshift-machine-api
+    - name: cluster-baremetal-operator
+      namespace: openshift-machine-api
+    - name: csi-snapshot-controller-operator
+      namespace: openshift-cluster-storage-operator
+    - name: csi-snapshot-controller
+      namespace: openshift-cluster-storage-operator
+  when:
+    - (pending_pods.resources | length) > 0
+    - pending_pods.resources[0].metadata.labels.app == 'collector'
+
+# step 4: i can give you more (any sort of corrections needed? pending pods?)
+- name: wait for sensor to show up
+  debug:
+    msg: waiting for sensor
+
+- name: wait for admission-control to show up
+  debug:
+    msg: waiting for admission-control
+
+- name: wait for collector to show up
+  debug:
+    msg: waiting for collector
+
+
+# step 5: don't you know the time has arrived (just recheck the cluster in central - it should be healthy)
+
+...

+ 19 - 0
roles/federate-clusters/templates/helm-vars.yml

@@ -0,0 +1,19 @@
+---
+clusterName: {{ clusters[cluster].name }}
+centralEndpoint: wss://{{ api_ep }}
+collector:
+  collectionMethod: EBPF
+  slimMode: false
+admissionControl:
+  listenOnCreates: true
+  listenOnUpdates: true
+  listenOnEvents: true
+  dynamic:
+    scanInline: true
+    disableBypass: false
+    timeout: 10
+# This would have otherwise gone to "values-private.yaml"...
+imagePullSecrets:
+  username: '{{ pull_user }}'
+  password: '{{ pull_pass }}'
+...

+ 21 - 0
roles/federate-clusters/templates/securedcluster-cr.yml

@@ -0,0 +1,21 @@
+apiVersion: platform.stackrox.io/v1alpha1
+kind: SecuredCluster
+metadata:
+  name: {{ clusters[cluster].name }}
+  namespace: {{ clusters[cluster].namespace }}
+spec:
+  centralEndpoint: ""
+  clusterName: {{ clusters[cluster].name }}
+  admissionControl:
+    listenOnCreates: true
+    listenOnEvents: true
+    listenOnUpdates: true
+    bypass: BreakGlassAnnotation
+    contactImageScanners: ScanIfMissing
+    timeoutSeconds: 10
+  auditLogs:
+    collection: Auto
+  perNode:
+    collector:
+      collection: KernelModule
+      imageFlavor: Regular