diff --git a/.gitignore b/.gitignore index 16bc502..5f02e99 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,6 @@ crash.log kubeconfig.yaml kubeconfig.yaml-e terraform.tfvars -plans.yaml -traefik_config.yaml +plans-custom.yaml +traefik-custom.yaml +kured-custom.yaml \ No newline at end of file diff --git a/README.md b/README.md index abc9f21..6d351dc 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,6 @@ spec: tls: - hosts: - example.com - secretName: example-tls rules: - host: example.com http: @@ -166,6 +165,21 @@ spec: +
+ +single-node cluster + +Running a development cluster on a single node, without any high-availability is possible as well. +In this case, we don't deploy an external load-balancer, but use [k3s service load balancer](https://rancher.com/docs/k3s/latest/en/networking/#service-load-balancer) on the host itself and open up port 80 & 443 in the firewall. + +``` terraform +control_plane_count = 1 +allow_scheduling_on_control_plane = true +agent_nodepools = {} +``` + +
+ ## Debugging First and foremost, it depends, but it's always good to have a quick look into Hetzner quickly without having to login to the UI. That is where the `hcloud` cli comes in. diff --git a/agents.tf b/agents.tf index da4c4f5..10912ec 100644 --- a/agents.tf +++ b/agents.tf @@ -44,11 +44,11 @@ resource "null_resource" "agents" { provisioner "file" { content = yamlencode({ node-name = module.agents[each.key].name - server = "https://${local.first_control_plane_network_ipv4}:6443" + server = "https://${module.control_planes[0].private_ipv4_address}:6443" token = random_password.k3s_token.result kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" - node-ip = module.agents[each.key].ipv4_address + node-ip = module.agents[each.key].private_ipv4_address node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) destination = "/tmp/config.yaml" diff --git a/control_planes.tf b/control_planes.tf index 7c31d4b..34cf4bf 100644 --- a/control_planes.tf +++ b/control_planes.tf @@ -53,7 +53,6 @@ resource "null_resource" "control_planes" { kubelet-arg = "cloud-provider=external" node-ip = module.control_planes[count.index].private_ipv4_address advertise-address = module.control_planes[count.index].private_ipv4_address - tls-san = module.control_planes[count.index].private_ipv4_address node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) diff --git a/examples/tls/ingress.yaml b/examples/tls/ingress.yaml index 9888094..3c2d2ab 100644 --- a/examples/tls/ingress.yaml +++ b/examples/tls/ingress.yaml @@ -9,7 +9,6 @@ spec: tls: - hosts: - example.com - secretName: example-tls rules: - host: example.com http: diff --git a/init.tf b/init.tf index a29de23..d900824 100644 --- a/init.tf +++ b/init.tf @@ -13,12 +13,11 @@ resource "null_resource" "first_control_plane" { token = random_password.k3s_token.result cluster-init = true disable-cloud-controller = true - disable = ["servicelb", "local-storage"] + disable = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"]) flannel-iface = "eth1" kubelet-arg = "cloud-provider=external" node-ip = module.control_planes[0].private_ipv4_address advertise-address = module.control_planes[0].private_ipv4_address - tls-san = module.control_planes[0].private_ipv4_address node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) @@ -30,7 +29,7 @@ resource "null_resource" "first_control_plane" { inline = local.install_k3s_server } - # Upon reboot verify that the k3s server is starts, and wait for k3s to be ready to receive commands + # Upon reboot start k3s and wait for it to be ready to receive commands provisioner "remote-exec" { inline = [ "systemctl start k3s", @@ -75,13 +74,12 @@ resource "null_resource" "kustomization" { content = yamlencode({ apiVersion = "kustomize.config.k8s.io/v1beta1" kind = "Kustomization" - resources = [ + resources = concat([ "https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/download/${local.ccm_version}/ccm-networks.yaml", "https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml", "https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml", "https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml", - "traefik.yaml", - ] + ], local.is_single_node_cluster ? [] : ["traefik.yaml"]), patchesStrategicMerge = [ file("${path.module}/kustomize/kured.yaml"), file("${path.module}/kustomize/ccm.yaml"), @@ -93,7 +91,7 @@ resource "null_resource" "kustomization" { # Upload traefik config provisioner "file" { - content = templatefile( + content = local.is_single_node_cluster ? "" : templatefile( "${path.module}/templates/traefik_config.yaml.tpl", { load_balancer_disable_ipv6 = var.load_balancer_disable_ipv6 @@ -127,7 +125,7 @@ resource "null_resource" "kustomization" { # Deploy our post-installation kustomization provisioner "remote-exec" { - inline = [ + inline = concat([ "set -ex", # This ugly hack is here, because terraform serializes the # embedded yaml files with "- |2", when there is more than @@ -141,8 +139,9 @@ resource "null_resource" "kustomization" { "kubectl apply -k /tmp/post_install", "echo 'Waiting for the system-upgrade-controller deployment to become available...'", "kubectl -n system-upgrade wait --for=condition=available --timeout=120s deployment/system-upgrade-controller", - "kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml", - <<-EOT + "kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml" + ], + local.is_single_node_cluster ? [] : [<<-EOT timeout 120 bash < /dev/null)" ]; do echo "Waiting for load-balancer to get an IP..." @@ -150,7 +149,7 @@ resource "null_resource" "kustomization" { done EOF EOT - ] + ]) } depends_on = [ diff --git a/locals.tf b/locals.tf index 0fe2a8c..595cf45 100644 --- a/locals.tf +++ b/locals.tf @@ -1,7 +1,7 @@ locals { - first_control_plane_network_ipv4 = module.control_planes[0].private_ipv4_address - - ssh_public_key = trimspace(file(var.public_key)) + # if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB + is_single_node_cluster = var.control_plane_count + length(keys(var.agent_nodepools)) == 1 + ssh_public_key = trimspace(file(var.public_key)) # ssh_private_key is either the contents of var.private_key or null to use a ssh agent. ssh_private_key = var.private_key == null ? null : trimspace(file(var.private_key)) # ssh_identity is not set if the private key is passed directly, but if ssh agent is used, the public key tells ssh agent which private key to use. @@ -29,7 +29,7 @@ locals { "127.0.0.1/32", ] - base_firewall_rules = [ + base_firewall_rules = concat([ # Allowing internal cluster traffic and Hetzner metadata service and cloud API IPs { direction = "in" @@ -133,7 +133,26 @@ locals { "0.0.0.0/0" ] } - ] + ], !local.is_single_node_cluster ? [] : [ + # Allow incoming web traffic for single node clusters, because we are using k3s servicelb there, + # not an external load-balancer. + { + direction = "in" + protocol = "tcp" + port = "80" + source_ips = [ + "0.0.0.0/0" + ] + }, + { + direction = "in" + protocol = "tcp" + port = "443" + source_ips = [ + "0.0.0.0/0" + ] + } + ]) common_commands_install_k3s = [ "set -ex", @@ -145,9 +164,10 @@ locals { "[ -e /etc/rancher/k3s/k3s.yaml ] && exit 0", ] - install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"]) + apply_k3s_selinux = ["/sbin/semodule -v -i /usr/share/selinux/packages/k3s.pp"] - install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"]) + install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux) + install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux) agent_nodepools = merge([ for nodepool_name, nodepool_obj in var.agent_nodepools : { diff --git a/main.tf b/main.tf index c54204f..ae01da9 100644 --- a/main.tf +++ b/main.tf @@ -13,12 +13,22 @@ resource "hcloud_network" "k3s" { ip_range = var.network_ipv4_range } +# This is the default subnet to be used by the load balancer. +resource "hcloud_network_subnet" "default" { + network_id = hcloud_network.k3s.id + type = "cloud" + network_zone = var.network_region + ip_range = "10.0.0.0/16" +} + resource "hcloud_network_subnet" "subnet" { for_each = var.network_ipv4_subnets network_id = hcloud_network.k3s.id type = "cloud" network_zone = var.network_region ip_range = each.value + + depends_on = [hcloud_network_subnet.default] } resource "hcloud_firewall" "k3s" { @@ -46,7 +56,8 @@ resource "hcloud_placement_group" "k3s" { } data "hcloud_load_balancer" "traefik" { - name = "traefik" + count = local.is_single_node_cluster ? 0 : 1 + name = "traefik" depends_on = [null_resource.kustomization] } diff --git a/modules/host/locals.tf b/modules/host/locals.tf index 08306d6..1fcef4d 100644 --- a/modules/host/locals.tf +++ b/modules/host/locals.tf @@ -10,65 +10,4 @@ locals { ssh_identity_file = var.private_key == null ? var.public_key : var.private_key # shared flags for ssh to ignore host keys, to use our ssh identity file for all connections during provisioning. ssh_args = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i ${local.ssh_identity_file}" - - microOS_install_commands = [ - "set -ex", - "apt-get update", - "apt-get install -y aria2", - "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4", - "qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda", - "sgdisk -e /dev/sda", - "parted -s /dev/sda resizepart 4 99%", - "parted -s /dev/sda mkpart primary ext2 99% 100%", - "partprobe /dev/sda && udevadm settle && fdisk -l /dev/sda", - "mount /dev/sda4 /mnt/ && btrfs filesystem resize max /mnt && umount /mnt", - "mke2fs -L ignition /dev/sda5", - "mount /dev/sda5 /mnt", - "mkdir /mnt/ignition", - "cp /root/config.ign /mnt/ignition/config.ign", - "mkdir /mnt/combustion", - "cp /root/script /mnt/combustion/script", - "umount /mnt" - ] - - ignition_config = jsonencode({ - ignition = { - version = "3.0.0" - } - passwd = { - users = [{ - name = "root" - sshAuthorizedKeys = concat([local.ssh_public_key], var.additional_public_keys) - }] - } - storage = { - files = [ - { - path = "/etc/sysconfig/network/ifcfg-eth1" - mode = 420 - overwrite = true - contents = { "source" = "data:,BOOTPROTO%3D%27dhcp%27%0ASTARTMODE%3D%27auto%27" } - }, - { - path = "/etc/ssh/sshd_config.d/kube-hetzner.conf" - mode = 420 - overwrite = true - contents = { "source" = "data:,PasswordAuthentication%20no%0AX11Forwarding%20no%0AMaxAuthTries%202%0AAllowTcpForwarding%20no%0AAllowAgentForwarding%20no%0AAuthorizedKeysFile%20.ssh%2Fauthorized_keys" } - } - ] - } - }) - - combustion_script = < /dev/null do echo "Waiting for MicroOS to reboot and become available..." @@ -51,16 +52,25 @@ resource "hcloud_server" "server" { EOT } + # Install k3s-selinux (compatible version) provisioner "remote-exec" { inline = [ - # Disable automatic reboot (after transactional updates), and configure the reboot method as kured "set -ex", - "rebootmgrctl set-strategy off", - "echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # set the hostname - "hostnamectl set-hostname ${self.name}" + "transactional-update pkg install -y k3s-selinux" ] } + + # Issue a reboot command and wait for MicroOS to reboot and be ready + provisioner "local-exec" { + command = <<-EOT + ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3 + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 3 + done + EOT + } } resource "hcloud_server_network" "server" { @@ -68,3 +78,21 @@ resource "hcloud_server_network" "server" { server_id = hcloud_server.server.id subnet_id = var.ipv4_subnet_id } + +data "template_cloudinit_config" "config" { + gzip = true + base64_encode = true + + # Main cloud-config configuration file. + part { + filename = "init.cfg" + content_type = "text/cloud-config" + content = templatefile( + "${path.module}/templates/userdata.yaml.tpl", + { + hostname = var.name + sshAuthorizedKeys = concat([local.ssh_public_key], var.additional_public_keys) + } + ) + } +} diff --git a/modules/host/templates/userdata.yaml.tpl b/modules/host/templates/userdata.yaml.tpl new file mode 100644 index 0000000..51ff107 --- /dev/null +++ b/modules/host/templates/userdata.yaml.tpl @@ -0,0 +1,58 @@ +#cloud-config + +write_files: + +# Configure the private network interface +- content: | + BOOTPROTO='dhcp' + STARTMODE='auto' + path: /etc/sysconfig/network/ifcfg-eth1 + +# Disable ssh password authentication +- content: | + PasswordAuthentication no + X11Forwarding no + MaxAuthTries 2 + AllowTcpForwarding no + AllowAgentForwarding no + AuthorizedKeysFile .ssh/authorized_keys + path: /etc/ssh/sshd_config.d/kube-hetzner.conf + +# Set reboot method as "kured" +- content: | + REBOOT_METHOD=kured + path: /etc/transactional-update.conf + +# Add ssh authorized keys +ssh_authorized_keys: +%{ for key in sshAuthorizedKeys ~} + - ${key} +%{ endfor ~} + +# Resize /var, not /, as that's the last partition in MicroOS image. +growpart: + devices: ["/var"] + +# Make sure the hostname is set correctly +hostname: ${hostname} +preserve_hostname: true + +runcmd: + +# As above, make sure the hostname is not reset +- [sed, '-i', 's/NETCONFIG_NIS_SETDOMAINNAME="yes"/NETCONFIG_NIS_SETDOMAINNAME="no"/g', /etc/sysconfig/network/config] +- [sed, '-i', 's/DHCLIENT_SET_HOSTNAME="yes"/DHCLIENT_SET_HOSTNAME="no"/g', /etc/sysconfig/network/dhcp] + +# We set Cloudflare DNS servers, followed by Google as a backup +- [sed, '-i', 's/NETCONFIG_DNS_STATIC_SERVERS=""/NETCONFIG_DNS_STATIC_SERVERS="1.1.1.1 1.0.0.1 8.8.8.8"/g', /etc/sysconfig/network/config] + +# Bounds the amount of logs that can survive on the system +- [sed, '-i', 's/#SystemMaxUse=/SystemMaxUse=3G/g', /etc/systemd/journald.conf] +- [sed, '-i', 's/#MaxRetentionSec=/MaxRetentionSec=1week/g', /etc/systemd/journald.conf] + +# Reduces the default number of snapshots from 2-10 number limit, to 4 and from 4-10 number limit important, to 2 +- [sed, '-i', 's/NUMBER_LIMIT="2-10"/NUMBER_LIMIT="4"/g', /etc/snapper/configs/root] +- [sed, '-i', 's/NUMBER_LIMIT_IMPORTANT="4-10"/NUMBER_LIMIT_IMPORTANT="3"/g', /etc/snapper/configs/root] + +# Disables unneeded services +- [systemctl, disable, '--now', 'rebootmgr.service'] diff --git a/modules/host/versions.tf b/modules/host/versions.tf index fe79022..7c8da9d 100644 --- a/modules/host/versions.tf +++ b/modules/host/versions.tf @@ -12,5 +12,9 @@ terraform { source = "tenstad/remote" version = "~> 0.0.23" } + template = { + source = "hashicorp/template" + version = "~> 2.2.0" + } } } diff --git a/output.tf b/output.tf index 762290d..4d2033e 100644 --- a/output.tf +++ b/output.tf @@ -12,7 +12,7 @@ output "agents_public_ipv4" { output "load_balancer_public_ipv4" { description = "The public IPv4 address of the Hetzner load balancer" - value = data.hcloud_load_balancer.traefik.ipv4 + value = local.is_single_node_cluster ? module.control_planes[0].ipv4_address : data.hcloud_load_balancer.traefik[0].ipv4 } output "kubeconfig_file" { diff --git a/terraform.tfvars.example b/terraform.tfvars.example index 8c9c842..b68f576 100644 --- a/terraform.tfvars.example +++ b/terraform.tfvars.example @@ -1,7 +1,15 @@ -# You need to replace these +# Only the first values starting with a * are obligatory, the rest can remain with their default values, or you +# could adapt them to your needs. +# +# Note that some values, notably "location" and "public_key" have no effect after the initial cluster has been setup. +# This is in order to keep terraform from re-provisioning all nodes at once which would loose data. If you want to update, +# those, you should instead change the value here and then manually re-provision each node one-by-one. Grep for "lifecycle". + +# * Your Hetzner project API token hcloud_token = "xxxxxxxxxxxxxxxxxxYYYYYYYYYYYYYYYYYYYzzzzzzzzzzzzzzzzzzzzz" +# * Your public key public_key = "/home/username/.ssh/id_ed25519.pub" -# Must be "private_key = null" when you want to use ssh-agent, for a Yubikey like device auth or an SSH key-pair with passphrase +# * Your private key, must be "private_key = null" when you want to use ssh-agent, for a Yubikey like device auth or an SSH key-pair with passphrase private_key = "/home/username/.ssh/id_ed25519" # These can be customized, or left with the default values @@ -10,9 +18,6 @@ private_key = "/home/username/.ssh/id_ed25519" location = "fsn1" # change to `ash` for us-east Ashburn, Virginia location network_region = "eu-central" # change to `us-east` if location is ash -# It's best to leave the network range as is, unless you know what you are doing. The default is "10.0.0.0/8". -# network_ipv4_range = "10.0.0.0/8" - # You can have up to as many subnets as you want (preferably if the form of 10.X.0.0/16), # their primary use is to logically separate the nodes. # The control_plane network is mandatory. @@ -25,13 +30,14 @@ network_ipv4_subnets = { # At least 3 server nodes is recommended for HA, otherwise you need to turn off automatic upgrade (see ReadMe). # As per rancher docs, it must be always an odd number, never even! See https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/ # For instance, 1 is ok (non-HA), 2 not ok, 3 is ok (becomes HA). -control_plane_count = 3 +control_plane_count = 3 # The type of control plane nodes, see https://www.hetzner.com/cloud, the minimum instance supported is cpx11 (just a few cents more than cx11) control_plane_server_type = "cpx11" # As for the agent nodepools, below is just an example, if you do not want nodepools, just use one, # and change the name to what you want, it need not be "agent-big" or "agent-small", also give them the subnet prefer. +# For single node clusters set this equal to {} agent_nodepools = { agent-big = { server_type = "cpx21", @@ -48,6 +54,11 @@ agent_nodepools = { # That will depend on how much load you want it to handle, see https://www.hetzner.com/cloud/load-balancer load_balancer_type = "lb11" +### The following values are fully optional + +# It's best to leave the network range as is, unless you know what you are doing. The default is "10.0.0.0/8". +# network_ipv4_range = "10.0.0.0/8" + # If you want to use a specific Hetzner CCM and CSI version, set them below, otherwise leave as is for the latest versions # hetzner_ccm_version = "" # hetzner_csi_version = "" @@ -57,6 +68,7 @@ load_balancer_type = "lb11" # traefik_acme_email = "mail@example.com" # If you want to allow non-control-plane workloads to run on the control-plane nodes set "true" below. The default is "false". +# Also good for single node clusters. # allow_scheduling_on_control_plane = true # If you want to disable automatic upgrade of k3s, you can set this to false, default is "true". diff --git a/variables.tf b/variables.tf index eb30850..b25f7e5 100644 --- a/variables.tf +++ b/variables.tf @@ -65,6 +65,7 @@ variable "load_balancer_disable_ipv6" { variable "agent_nodepools" { description = "Number of agent nodes." type = map(any) + default = {} } variable "hetzner_ccm_version" {