From fec695086ad9edc4d9bf53d7c1d06b27945f6962 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 03:18:40 +0100 Subject: [PATCH] k3s-install ready for testing --- temp/agents.tf => agents.tf | 70 ++++++++++++++++++++++------------- locals.tf | 14 ++++++- master.tf | 66 +++++++++++++++++++++++---------- temp/output.tf => output.tf | 0 temp/servers.tf => servers.tf | 46 ++++++++++++++--------- templates/agent.conf.tpl | 3 -- templates/plans.yaml.tpl | 50 +++++++++++++++++++++++++ terraform.tfvars.example | 9 +++++ variables.tf | 13 +++++++ 9 files changed, 205 insertions(+), 66 deletions(-) rename temp/agents.tf => agents.tf (60%) rename temp/output.tf => output.tf (100%) rename temp/servers.tf => servers.tf (72%) delete mode 100644 templates/agent.conf.tpl create mode 100644 templates/plans.yaml.tpl diff --git a/temp/agents.tf b/agents.tf similarity index 60% rename from temp/agents.tf rename to agents.tf index 9b107d3..31a26c3 100644 --- a/temp/agents.tf +++ b/agents.tf @@ -33,15 +33,13 @@ resource "hcloud_server" "agents" { # Install MicroOS provisioner "remote-exec" { - inline = local.MicroOS_install_commands + inline = local.microOS_install_commands } - # Issue a reboot command + # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" } - - # Wait for MicroOS to reboot and be ready provisioner "local-exec" { command = <<-EOT until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null @@ -52,16 +50,6 @@ resource "hcloud_server" "agents" { EOT } - - # Generating and uploading the agent.conf file - provisioner "file" { - content = templatefile("${path.module}/templates/agent.conf.tpl", { - server = "https://${local.first_control_plane_network_ip}:6443" - token = random_password.k3s_token.result - }) - destination = "/etc/rancher/k3s/agent.conf" - } - # Generating k3s agent config file provisioner "file" { content = yamlencode({ @@ -69,30 +57,60 @@ resource "hcloud_server" "agents" { kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) - destination = "/etc/rancher/k3s/config.yaml" + destination = "/tmp/config.yaml" } - # Run the agent + # Install k3s agent provisioner "remote-exec" { inline = [ - # set the hostname in a persistent fashion - "hostnamectl set-hostname ${self.name}", + "set -ex", # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # then we start k3s agent and join the cluster - "systemctl enable k3s-agent", + # prepare the k3s config directory + "mkdir -p /etc/rancher/k3s", + # move the config file into place + "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", + # install k3s <<-EOT - until systemctl status k3s-agent > /dev/null - do - systemctl start k3s-agent - echo "Starting k3s-agent and joining the cluster..." - sleep 2 - done + INSTALL_K3S_SKIP_START=true \ + K3S_URL=${"https://${local.first_control_plane_network_ip}:6443"} \ + K3S_TOKEN=${random_password.k3s_token.result} \ + curl -sfL https://get.k3s.io | sh - EOT ] } + # Issue a reboot command and wait for the node to reboot + provisioner "local-exec" { + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + } + provisioner "local-exec" { + command = <<-EOT + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 2 + done + EOT + } + + # Upon reboot verify that k3s agent starts correctly + provisioner "remote-exec" { + inline = [ + <<-EOT + timeout 120 bash < /dev/null; do + echo "Waiting for the k3s agent to start..." + sleep 1 + done + EOF + EOT + ] + } + + network { network_id = hcloud_network.k3s.id ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) diff --git a/locals.tf b/locals.tf index 479cb3d..68ac33c 100644 --- a/locals.tf +++ b/locals.tf @@ -18,7 +18,7 @@ locals { csi_version = var.hetzner_csi_version != null ? var.hetzner_csi_version : data.github_release.hetzner_csi.release_tag kured_version = data.github_release.kured.release_tag - MicroOS_install_commands = [ + microOS_install_commands = [ "set -ex", "apt-get install -y aria2", "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4", @@ -34,4 +34,16 @@ locals { "cp /root/config.ign /mnt/ignition/config.ign", "umount /mnt" ] + + install_k3s_server = [ + "set -ex", + # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured + "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", + # prepare the k3s config directory + "mkdir -p /etc/rancher/k3s", + # move the config file into place + "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", + # install k3s + "INSTALL_K3S_SKIP_START=true curl -sfL https://get.k3s.io | sh -", + ] } diff --git a/master.tf b/master.tf index 6d62863..6eb8991 100644 --- a/master.tf +++ b/master.tf @@ -31,15 +31,13 @@ resource "hcloud_server" "first_control_plane" { # Install MicroOS provisioner "remote-exec" { - inline = local.MicroOS_install_commands + inline = local.microOS_install_commands } - # Issue a reboot command + # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" } - - # Wait for MicroOS to reboot and be ready provisioner "local-exec" { command = <<-EOT until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null @@ -63,27 +61,42 @@ resource "hcloud_server" "first_control_plane" { advertise-address = local.first_control_plane_network_ip token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) - destination = "/etc/rancher/k3s/config.yaml" + destination = "/tmp/config.yaml" } - /* # Run the first control plane + + + # Install k3s server + provisioner "remote-exec" { + inline = local.install_k3s_server + } + + # Issue a reboot command and wait for the node to reboot + provisioner "local-exec" { + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + } + provisioner "local-exec" { + command = <<-EOT + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 2 + done + EOT + } + + # Upon reboot verify that the k3s server is starts, and wait for k3s to be ready to receive commands provisioner "remote-exec" { inline = [ - # set the hostname in a persistent fashion - "hostnamectl set-hostname ${self.name}", - # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured - "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # prepare a directory for our post-installation kustomizations + # prepare the post_install directory "mkdir -p /tmp/post_install", - # then we initiate the cluster - "systemctl enable k3s-server", - # wait for k3s to get ready + # wait for k3s to become ready <<-EOT timeout 120 bash < /dev/null; do - systemctl start k3s-server - echo "Initiating the cluster..." + until systemctl status k3s > /dev/null; do + echo "Waiting for the k3s server to start..." sleep 1 done until [ -e /etc/rancher/k3s/k3s.yaml ]; do @@ -108,7 +121,8 @@ resource "hcloud_server" "first_control_plane" { "https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/download/${local.ccm_version}/ccm-networks.yaml", "https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml", "https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml", - "./traefik.yaml" + "https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml", + "./traefik.yaml", ] patchesStrategicMerge = [ file("${path.module}/patches/kured.yaml"), @@ -132,9 +146,20 @@ resource "hcloud_server" "first_control_plane" { destination = "/tmp/post_install/traefik.yaml" } + # Upload the system upgrade controller plans config + provisioner "file" { + content = templatefile( + "${path.module}/templates/plans.yaml.tpl", + { + channel = var.k3s_upgrade_channel + }) + destination = "/tmp/post_install/plans.yaml" + } + # Deploy secrets, logging is automatically disabled due to sensitive variables provisioner "remote-exec" { inline = [ + "set -ex", "kubectl -n kube-system create secret generic hcloud --from-literal=token=${var.hcloud_token} --from-literal=network=${hcloud_network.k3s.name}", "kubectl -n kube-system create secret generic hcloud-csi --from-literal=token=${var.hcloud_token}", ] @@ -143,6 +168,7 @@ resource "hcloud_server" "first_control_plane" { # Deploy our post-installation kustomization provisioner "remote-exec" { inline = [ + "set -ex", # This ugly hack is here, because terraform serializes the # embedded yaml files with "- |2", when there is more than # one yamldocument in the embedded file. Kustomize does not understand @@ -153,8 +179,10 @@ resource "hcloud_server" "first_control_plane" { # manifests themselves "sed -i 's/^- |[0-9]\\+$/- |/g' /tmp/post_install/kustomization.yaml", "kubectl apply -k /tmp/post_install", + "echo 'Waiting for the system-upgrade-controller deployment to become available...' && kubectl -n system-upgrade wait --for=condition=available --timeout=300s deployment/system-upgrade-controller", + "kubectl apply -f /tmp/post_install/plans.yaml" ] - } */ + } network { network_id = hcloud_network.k3s.id diff --git a/temp/output.tf b/output.tf similarity index 100% rename from temp/output.tf rename to output.tf diff --git a/temp/servers.tf b/servers.tf similarity index 72% rename from temp/servers.tf rename to servers.tf index 5f1d82c..b3835e9 100644 --- a/temp/servers.tf +++ b/servers.tf @@ -32,15 +32,13 @@ resource "hcloud_server" "control_planes" { # Install MicroOS provisioner "remote-exec" { - inline = local.MicroOS_install_commands + inline = local.microOS_install_commands } - # Issue a reboot command + # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" } - - # Wait for MicroOS to reboot and be ready provisioner "local-exec" { command = <<-EOT until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null @@ -66,26 +64,40 @@ resource "hcloud_server" "control_planes" { tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) - destination = "/etc/rancher/k3s/config.yaml" + destination = "/tmp/config.yaml" } - # Run an other control plane server + # Install k3s server + provisioner "remote-exec" { + inline = local.install_k3s_server + } + + # Issue a reboot command and wait for the node to reboot + provisioner "local-exec" { + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + } + provisioner "local-exec" { + command = <<-EOT + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 2 + done + EOT + } + + # Upon reboot verify that the k3s server starts correctly provisioner "remote-exec" { inline = [ - # set the hostname in a persistent fashion - "hostnamectl set-hostname ${self.name}", - # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured - "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # then then we start k3s in server mode and join the cluster - "systemctl enable k3s-server", <<-EOT - until systemctl status k3s-server > /dev/null - do - systemctl start k3s-server - echo "Waiting on other 'learning' control planes, patience is the mother of all virtues..." - sleep 2 + timeout 120 bash < /dev/null; do + echo "Waiting for the k3s server to start..." + sleep 1 done + EOF EOT ] } diff --git a/templates/agent.conf.tpl b/templates/agent.conf.tpl deleted file mode 100644 index fad0449..0000000 --- a/templates/agent.conf.tpl +++ /dev/null @@ -1,3 +0,0 @@ -SERVER_URL="${server}" -NODE_TOKEN="${token}" -AGENT_OPTS="" diff --git a/templates/plans.yaml.tpl b/templates/plans.yaml.tpl new file mode 100644 index 0000000..337aa59 --- /dev/null +++ b/templates/plans.yaml.tpl @@ -0,0 +1,50 @@ +# Doc: https://rancher.com/docs/k3s/latest/en/upgrades/automated/ +# agent plan +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: k3s-agent + namespace: system-upgrade + labels: + k3s_upgrade: agent +spec: + concurrency: 1 + channel: https://update.k3s.io/v1-release/channels/${channel} + nodeSelector: + matchExpressions: + - {key: k3s_upgrade, operator: Exists} + - {key: k3s_upgrade, operator: NotIn, values: ["disabled", "false"]} + - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]} + serviceAccountName: system-upgrade + prepare: + image: rancher/k3s-upgrade + args: ["prepare", "k3s-server"] + drain: + force: true + skipWaitForDeleteTimeout: 60 + upgrade: + image: rancher/k3s-upgrade +--- +# server plan +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: k3s-server + namespace: system-upgrade + labels: + k3s_upgrade: server +spec: + concurrency: 1 + channel: https://update.k3s.io/v1-release/channels/${channel} + nodeSelector: + matchExpressions: + - {key: k3s_upgrade, operator: Exists} + - {key: k3s_upgrade, operator: NotIn, values: ["disabled", "false"]} + - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]} + tolerations: + - {key: node-role.kubernetes.io/master, effect: NoSchedule, operator: Exists} + - {key: CriticalAddonsOnly, effect: NoExecute, operator: Exists} + serviceAccountName: system-upgrade + cordon: true + upgrade: + image: rancher/k3s-upgrade \ No newline at end of file diff --git a/terraform.tfvars.example b/terraform.tfvars.example index d0bd3a1..d2876d1 100644 --- a/terraform.tfvars.example +++ b/terraform.tfvars.example @@ -29,3 +29,12 @@ agents_num = 2 # If you want to allow non-control-plane workloads to run on the control-plane nodes set "true" below. The default is "false". # allow_scheduling_on_control_plane = true + +# If you want to disable automatic upgrade of k3s (stable channel), you can set this to false, default is "true". +# automatically_upgrade_k3s = false + +# If you would like to specify the k3s upgrade channel from the get go, you can do so, the default is "stable". +# For a list of available channels, see https://rancher.com/docs/k3s/latest/en/upgrades/basic/ and https://update.k3s.io/v1-release/channels +# k3s_upgrade_channel = "latest" + + diff --git a/variables.tf b/variables.tf index e8734df..b8ed1d1 100644 --- a/variables.tf +++ b/variables.tf @@ -84,3 +84,16 @@ variable "allow_scheduling_on_control_plane" { default = false description = "Whether to allow non-control-plane workloads to run on the control-plane nodes" } + +variable "k3s_upgrade_channel" { + type = string + default = "stable" + description = "Allows you to specify the k3s upgrade channel" +} + +variable "automatically_upgrade_k3s" { + type = bool + default = true + description = "Whether to automatically upgrade k3s based on the selected channel" +} +