diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ae6bf739410bcfa739f3723e2d965a4f9275930e..4746ac77fc3e9ec68a1588043e15b52066c973c0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -87,3 +87,13 @@ configure_k8s: - cd $ANSIBLE_ROOT - ansible-galaxy install -r requirements.galaxy.yaml - ansible-playbook k8s/base.yaml + +upgrade_kernel_w_drain: + stage: ansible + extends: .ansible + needs: [] + when: manual + script: + - cd $ANSIBLE_ROOT + - ansible-galaxy install -r requirements.galaxy.yaml + - ansible-playbook k8s/kernel_update.yaml diff --git a/ansible/k8s/kernel_update.yaml b/ansible/k8s/kernel_update.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5f6bfef9cbb362f3488962f1e38d91753a6c71d --- /dev/null +++ b/ansible/k8s/kernel_update.yaml @@ -0,0 +1,9 @@ + +- hosts: [k8s_02_kube] + become: true + vars_files: + - "vars/main.yaml" + - "vars/main.yaml.secret" + tasks: + - name: Rolling update k8s kernel + ansible.builtin.include_tasks: upgrade_kernel.yml diff --git a/ansible/k8s/tasks/rolling_restart.yaml b/ansible/k8s/tasks/rolling_restart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6d86f9be0a1a3cbd72af37163b10309f83985af --- /dev/null +++ b/ansible/k8s/tasks/rolling_restart.yaml @@ -0,0 +1,60 @@ +--- + +- name: Cordon and Drain the node {{ inventory_hostname }} + ansible.builtin.shell: | + set -o pipefail + {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \ + cordon "{{ inventory_hostname }}" && \ + {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \ + drain "{{ inventory_hostname }}" --ignore-daemonsets --delete-local-data + args: + executable: /bin/bash + register: drain + until: + - drain.stdout is search('drained') + retries: 100 + delay: 15 + changed_when: false + delegate_to: "{{ active_server | default(groups[rke2_servers_group_name].0) }}" + run_once: true + +- name: Stop RKE2 service on {{ inventory_hostname }} + ansible.builtin.service: + name: "rke2-{{ rke2_type }}.service" + state: stopped + +- name: Reboot the server on kernel update + reboot: + msg: "Reboot initiated by Ansible for kernel updates" + connect_timeout: 5 + reboot_timeout: 300 + pre_reboot_delay: 0 + post_reboot_delay: 30 + test_command: uptime + +- name: Wait for all nodes to be ready again + ansible.builtin.shell: | + set -o pipefail + {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml get nodes | grep " Ready" | wc -l + args: + executable: /bin/bash + changed_when: false + register: all_ready_nodes + until: + - groups[rke2_cluster_group_name] | length == all_ready_nodes.stdout | int + retries: 100 + delay: 15 + delegate_to: "{{ active_server | default(groups[rke2_servers_group_name].0) }}" + run_once: true + +- name: Uncordon the node {{ inventory_hostname }} + ansible.builtin.shell: | + set -o pipefail + {{ rke2_data_path }}/bin/kubectl --kubeconfig /etc/rancher/rke2/rke2.yaml \ + uncordon "{{ inventory_hostname }}" + args: + executable: /bin/bash + changed_when: false + delegate_to: "{{ active_server | default(groups[rke2_servers_group_name].0) }}" + run_once: true + when: rke2_drain_node_during_upgrade diff --git a/ansible/k8s/tasks/upgrade_kernel.yaml b/ansible/k8s/tasks/upgrade_kernel.yaml new file mode 100644 index 0000000000000000000000000000000000000000..915eb933fd5d8f302d505345e5029f8bb120ec63 --- /dev/null +++ b/ansible/k8s/tasks/upgrade_kernel.yaml @@ -0,0 +1,24 @@ +--- +- name: Update apt repo and cache on all Debian/Ubuntu boxes + apt: + update_cache: yes + force_apt_get: yes + cache_valid_time: 3600 + +- name: Upgrade all packages on servers + apt: + upgrade: dist # apt upgrade + force_apt_get: yes + +- name: Check if a reboot is needed on all servers + register: reboot_required_file + stat: + path: /var/run/reboot-required + get_md5: no + +- name: Rolling restart k8s if kernel updated + ansible.builtin.include_tasks: rolling_restart.yaml + with_items: "{{ groups[rke2_cluster_group_name] }}" + loop_control: + loop_var: _host_item + when: reboot_required_file.stat.exists