From e1dc4989628829dc52e59853775785facf24c7c0 Mon Sep 17 00:00:00 2001
From: Laszlo Rafael <rlacko99@gmail.com>
Date: Sat, 10 Dec 2022 22:13:11 +0100
Subject: [PATCH] Update rke2 ansible stuff, rolling restart config

---
 ansible/k8s/tasks/rolling_restart.yaml      | 33 +++++++++++++--------
 ansible/k8s/templates/custom-config.yaml.j2 |  4 +++
 ansible/k8s/vars/main.yaml                  |  9 +++++-
 ansible/requirements.galaxy.yaml            |  2 +-
 4 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/ansible/k8s/tasks/rolling_restart.yaml b/ansible/k8s/tasks/rolling_restart.yaml
index b6d86f9..e823174 100644
--- a/ansible/k8s/tasks/rolling_restart.yaml
+++ b/ansible/k8s/tasks/rolling_restart.yaml
@@ -6,7 +6,7 @@
     {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
     cordon "{{ inventory_hostname }}" && \
     {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \
-    drain "{{ inventory_hostname }}" --ignore-daemonsets --delete-local-data
+    drain "{{ inventory_hostname }}" --ignore-daemonsets --delete-emptydir-data
   args:
     executable: /bin/bash
   register: drain
@@ -17,20 +17,12 @@
   changed_when: false
   delegate_to: "{{ active_server | default(groups[rke2_servers_group_name].0) }}"
   run_once: true
+  when: rke2_drain_node_during_upgrade
 
-- name: Stop RKE2 service on {{ inventory_hostname }}
+- name: Restart RKE2 service on {{ inventory_hostname }}
   ansible.builtin.service:
     name: "rke2-{{ rke2_type }}.service"
-    state: stopped
-
-- name: Reboot the server on kernel update
-  reboot:
-    msg: "Reboot initiated by Ansible for kernel updates"
-    connect_timeout: 5
-    reboot_timeout: 300
-    pre_reboot_delay: 0
-    post_reboot_delay: 30
-    test_command: uptime
+    state: restarted
 
 - name: Wait for all nodes to be ready again
   ansible.builtin.shell: |
@@ -58,3 +50,20 @@
   delegate_to: "{{ active_server | default(groups[rke2_servers_group_name].0) }}"
   run_once: true
   when: rke2_drain_node_during_upgrade
+
+- name: Wait for all pods to be ready again
+  ansible.builtin.shell: |
+    set -o pipefail
+    {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml get pods -A --field-selector=metadata.namespace!=kube-system | grep -iE "crash|error|init|terminating" | wc -l
+  args:
+    executable: /bin/bash
+  failed_when: "all_pods_ready.rc not in [ 0, 1 ]"
+  changed_when: false
+  register: all_pods_ready
+  until:
+    '"0" in all_pods_ready.stdout'
+  retries: 100
+  delay: 15
+  delegate_to: "{{ active_server | default(groups[rke2_servers_group_name].0) }}"
+  run_once: true
+  when: rke2_wait_for_all_pods_to_be_ready
diff --git a/ansible/k8s/templates/custom-config.yaml.j2 b/ansible/k8s/templates/custom-config.yaml.j2
index 6f2bc86..70aae17 100644
--- a/ansible/k8s/templates/custom-config.yaml.j2
+++ b/ansible/k8s/templates/custom-config.yaml.j2
@@ -2,6 +2,7 @@
 server: https://{{ rke2_api_ip }}:9345
 {% endif %}
 token: {{ rke2_token }}
+data-dir: {{ rke2_data_path }}
 {% if inventory_hostname in groups[rke2_servers_group_name] %}
 cni: {{ rke2_cni }}
 disable-kube-proxy: {{ rke2_disable_kube_proxy }}
@@ -32,6 +33,9 @@ node-label:
 {% endif %}
 snapshotter: {{ rke2_snapshooter }}
 node-name: {{ inventory_hostname }}
+{% if 'cis' in rke2_cis_profile %}
+profile: {{ rke2_cis_profile }}
+{%endif%}
 {% if rke2_server_options is defined and inventory_hostname in groups[rke2_servers_group_name] %}
 {% for option in rke2_server_options %}
 {{ option }}
diff --git a/ansible/k8s/vars/main.yaml b/ansible/k8s/vars/main.yaml
index bff8495..36ef6d1 100644
--- a/ansible/k8s/vars/main.yaml
+++ b/ansible/k8s/vars/main.yaml
@@ -56,7 +56,7 @@ rke2_server_taint: true
 # rke2_token: Set in main.yaml.secret
 
 # RKE2 version
-rke2_version: v1.24.3+rke2r1
+rke2_version: v1.25.3+rke2r1
 
 # URL to RKE2 repository
 rke2_channel_url: https://update.rke2.io/v1-release/channels
@@ -158,6 +158,10 @@ rke2_snapshooter: overlayfs
 rke2_cni: cilium
 rke2_disable_kube_proxy: true
 
+# Validate system configuration against the selected benchmark
+# (Supported value is "cis-1.23" or eventually "cis-1.6" if you are running RKE2 prior 1.25)
+rke2_cis_profile: "cis-1.6"
+
 # Download Kubernetes config file to the Ansible controller
 rke2_download_kubeconf: false
 
@@ -205,3 +209,6 @@ rke2_agent_options:
 
 # Cordon, drain the node which is being upgraded. Uncordon the node once the RKE2 upgraded
 rke2_drain_node_during_upgrade: true
+
+# Wait for all pods to be ready after rke2-service restart during rolling restart.
+rke2_wait_for_all_pods_to_be_ready: true
diff --git a/ansible/requirements.galaxy.yaml b/ansible/requirements.galaxy.yaml
index bf92599..d9d966b 100644
--- a/ansible/requirements.galaxy.yaml
+++ b/ansible/requirements.galaxy.yaml
@@ -16,4 +16,4 @@ roles:
   - name: lablabs.rke2
     src: https://github.com/lablabs/ansible-role-rke2.git
     scm: git
-    version: 1.14.2
+    version: 1.18.1
-- 
GitLab