diff --git a/.gitignore b/.gitignore
index 16bc502..5f02e99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,5 +4,6 @@ crash.log
kubeconfig.yaml
kubeconfig.yaml-e
terraform.tfvars
-plans.yaml
-traefik_config.yaml
+plans-custom.yaml
+traefik-custom.yaml
+kured-custom.yaml
\ No newline at end of file
diff --git a/README.md b/README.md
index abc9f21..6d351dc 100644
--- a/README.md
+++ b/README.md
@@ -149,7 +149,6 @@ spec:
tls:
- hosts:
- example.com
- secretName: example-tls
rules:
- host: example.com
http:
@@ -166,6 +165,21 @@ spec:
+
+
+single-node cluster
+
+Running a development cluster on a single node, without any high-availability is possible as well.
+In this case, we don't deploy an external load-balancer, but use [k3s service load balancer](https://rancher.com/docs/k3s/latest/en/networking/#service-load-balancer) on the host itself and open up port 80 & 443 in the firewall.
+
+``` terraform
+control_plane_count = 1
+allow_scheduling_on_control_plane = true
+agent_nodepools = {}
+```
+
+
+
## Debugging
First and foremost, it depends, but it's always good to have a quick look into Hetzner quickly without having to login to the UI. That is where the `hcloud` cli comes in.
diff --git a/agents.tf b/agents.tf
index da4c4f5..10912ec 100644
--- a/agents.tf
+++ b/agents.tf
@@ -44,11 +44,11 @@ resource "null_resource" "agents" {
provisioner "file" {
content = yamlencode({
node-name = module.agents[each.key].name
- server = "https://${local.first_control_plane_network_ipv4}:6443"
+ server = "https://${module.control_planes[0].private_ipv4_address}:6443"
token = random_password.k3s_token.result
kubelet-arg = "cloud-provider=external"
flannel-iface = "eth1"
- node-ip = module.agents[each.key].ipv4_address
+ node-ip = module.agents[each.key].private_ipv4_address
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
})
destination = "/tmp/config.yaml"
diff --git a/control_planes.tf b/control_planes.tf
index 7c31d4b..34cf4bf 100644
--- a/control_planes.tf
+++ b/control_planes.tf
@@ -53,7 +53,6 @@ resource "null_resource" "control_planes" {
kubelet-arg = "cloud-provider=external"
node-ip = module.control_planes[count.index].private_ipv4_address
advertise-address = module.control_planes[count.index].private_ipv4_address
- tls-san = module.control_planes[count.index].private_ipv4_address
node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
})
diff --git a/examples/tls/ingress.yaml b/examples/tls/ingress.yaml
index 9888094..3c2d2ab 100644
--- a/examples/tls/ingress.yaml
+++ b/examples/tls/ingress.yaml
@@ -9,7 +9,6 @@ spec:
tls:
- hosts:
- example.com
- secretName: example-tls
rules:
- host: example.com
http:
diff --git a/init.tf b/init.tf
index a29de23..d900824 100644
--- a/init.tf
+++ b/init.tf
@@ -13,12 +13,11 @@ resource "null_resource" "first_control_plane" {
token = random_password.k3s_token.result
cluster-init = true
disable-cloud-controller = true
- disable = ["servicelb", "local-storage"]
+ disable = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"])
flannel-iface = "eth1"
kubelet-arg = "cloud-provider=external"
node-ip = module.control_planes[0].private_ipv4_address
advertise-address = module.control_planes[0].private_ipv4_address
- tls-san = module.control_planes[0].private_ipv4_address
node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
})
@@ -30,7 +29,7 @@ resource "null_resource" "first_control_plane" {
inline = local.install_k3s_server
}
- # Upon reboot verify that the k3s server is starts, and wait for k3s to be ready to receive commands
+ # Upon reboot start k3s and wait for it to be ready to receive commands
provisioner "remote-exec" {
inline = [
"systemctl start k3s",
@@ -75,13 +74,12 @@ resource "null_resource" "kustomization" {
content = yamlencode({
apiVersion = "kustomize.config.k8s.io/v1beta1"
kind = "Kustomization"
- resources = [
+ resources = concat([
"https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/download/${local.ccm_version}/ccm-networks.yaml",
"https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml",
"https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml",
"https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml",
- "traefik.yaml",
- ]
+ ], local.is_single_node_cluster ? [] : ["traefik.yaml"]),
patchesStrategicMerge = [
file("${path.module}/kustomize/kured.yaml"),
file("${path.module}/kustomize/ccm.yaml"),
@@ -93,7 +91,7 @@ resource "null_resource" "kustomization" {
# Upload traefik config
provisioner "file" {
- content = templatefile(
+ content = local.is_single_node_cluster ? "" : templatefile(
"${path.module}/templates/traefik_config.yaml.tpl",
{
load_balancer_disable_ipv6 = var.load_balancer_disable_ipv6
@@ -127,7 +125,7 @@ resource "null_resource" "kustomization" {
# Deploy our post-installation kustomization
provisioner "remote-exec" {
- inline = [
+ inline = concat([
"set -ex",
# This ugly hack is here, because terraform serializes the
# embedded yaml files with "- |2", when there is more than
@@ -141,8 +139,9 @@ resource "null_resource" "kustomization" {
"kubectl apply -k /tmp/post_install",
"echo 'Waiting for the system-upgrade-controller deployment to become available...'",
"kubectl -n system-upgrade wait --for=condition=available --timeout=120s deployment/system-upgrade-controller",
- "kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml",
- <<-EOT
+ "kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml"
+ ],
+ local.is_single_node_cluster ? [] : [<<-EOT
timeout 120 bash < /dev/null)" ]; do
echo "Waiting for load-balancer to get an IP..."
@@ -150,7 +149,7 @@ resource "null_resource" "kustomization" {
done
EOF
EOT
- ]
+ ])
}
depends_on = [
diff --git a/locals.tf b/locals.tf
index 0fe2a8c..595cf45 100644
--- a/locals.tf
+++ b/locals.tf
@@ -1,7 +1,7 @@
locals {
- first_control_plane_network_ipv4 = module.control_planes[0].private_ipv4_address
-
- ssh_public_key = trimspace(file(var.public_key))
+ # if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB
+ is_single_node_cluster = var.control_plane_count + length(keys(var.agent_nodepools)) == 1
+ ssh_public_key = trimspace(file(var.public_key))
# ssh_private_key is either the contents of var.private_key or null to use a ssh agent.
ssh_private_key = var.private_key == null ? null : trimspace(file(var.private_key))
# ssh_identity is not set if the private key is passed directly, but if ssh agent is used, the public key tells ssh agent which private key to use.
@@ -29,7 +29,7 @@ locals {
"127.0.0.1/32",
]
- base_firewall_rules = [
+ base_firewall_rules = concat([
# Allowing internal cluster traffic and Hetzner metadata service and cloud API IPs
{
direction = "in"
@@ -133,7 +133,26 @@ locals {
"0.0.0.0/0"
]
}
- ]
+ ], !local.is_single_node_cluster ? [] : [
+ # Allow incoming web traffic for single node clusters, because we are using k3s servicelb there,
+ # not an external load-balancer.
+ {
+ direction = "in"
+ protocol = "tcp"
+ port = "80"
+ source_ips = [
+ "0.0.0.0/0"
+ ]
+ },
+ {
+ direction = "in"
+ protocol = "tcp"
+ port = "443"
+ source_ips = [
+ "0.0.0.0/0"
+ ]
+ }
+ ])
common_commands_install_k3s = [
"set -ex",
@@ -145,9 +164,10 @@ locals {
"[ -e /etc/rancher/k3s/k3s.yaml ] && exit 0",
]
- install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"])
+ apply_k3s_selinux = ["/sbin/semodule -v -i /usr/share/selinux/packages/k3s.pp"]
- install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"])
+ install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux)
+ install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux)
agent_nodepools = merge([
for nodepool_name, nodepool_obj in var.agent_nodepools : {
diff --git a/main.tf b/main.tf
index c54204f..ae01da9 100644
--- a/main.tf
+++ b/main.tf
@@ -13,12 +13,22 @@ resource "hcloud_network" "k3s" {
ip_range = var.network_ipv4_range
}
+# This is the default subnet to be used by the load balancer.
+resource "hcloud_network_subnet" "default" {
+ network_id = hcloud_network.k3s.id
+ type = "cloud"
+ network_zone = var.network_region
+ ip_range = "10.0.0.0/16"
+}
+
resource "hcloud_network_subnet" "subnet" {
for_each = var.network_ipv4_subnets
network_id = hcloud_network.k3s.id
type = "cloud"
network_zone = var.network_region
ip_range = each.value
+
+ depends_on = [hcloud_network_subnet.default]
}
resource "hcloud_firewall" "k3s" {
@@ -46,7 +56,8 @@ resource "hcloud_placement_group" "k3s" {
}
data "hcloud_load_balancer" "traefik" {
- name = "traefik"
+ count = local.is_single_node_cluster ? 0 : 1
+ name = "traefik"
depends_on = [null_resource.kustomization]
}
diff --git a/modules/host/locals.tf b/modules/host/locals.tf
index 08306d6..1fcef4d 100644
--- a/modules/host/locals.tf
+++ b/modules/host/locals.tf
@@ -10,65 +10,4 @@ locals {
ssh_identity_file = var.private_key == null ? var.public_key : var.private_key
# shared flags for ssh to ignore host keys, to use our ssh identity file for all connections during provisioning.
ssh_args = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i ${local.ssh_identity_file}"
-
- microOS_install_commands = [
- "set -ex",
- "apt-get update",
- "apt-get install -y aria2",
- "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4",
- "qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda",
- "sgdisk -e /dev/sda",
- "parted -s /dev/sda resizepart 4 99%",
- "parted -s /dev/sda mkpart primary ext2 99% 100%",
- "partprobe /dev/sda && udevadm settle && fdisk -l /dev/sda",
- "mount /dev/sda4 /mnt/ && btrfs filesystem resize max /mnt && umount /mnt",
- "mke2fs -L ignition /dev/sda5",
- "mount /dev/sda5 /mnt",
- "mkdir /mnt/ignition",
- "cp /root/config.ign /mnt/ignition/config.ign",
- "mkdir /mnt/combustion",
- "cp /root/script /mnt/combustion/script",
- "umount /mnt"
- ]
-
- ignition_config = jsonencode({
- ignition = {
- version = "3.0.0"
- }
- passwd = {
- users = [{
- name = "root"
- sshAuthorizedKeys = concat([local.ssh_public_key], var.additional_public_keys)
- }]
- }
- storage = {
- files = [
- {
- path = "/etc/sysconfig/network/ifcfg-eth1"
- mode = 420
- overwrite = true
- contents = { "source" = "data:,BOOTPROTO%3D%27dhcp%27%0ASTARTMODE%3D%27auto%27" }
- },
- {
- path = "/etc/ssh/sshd_config.d/kube-hetzner.conf"
- mode = 420
- overwrite = true
- contents = { "source" = "data:,PasswordAuthentication%20no%0AX11Forwarding%20no%0AMaxAuthTries%202%0AAllowTcpForwarding%20no%0AAllowAgentForwarding%20no%0AAuthorizedKeysFile%20.ssh%2Fauthorized_keys" }
- }
- ]
- }
- })
-
- combustion_script = < /dev/null
do
echo "Waiting for MicroOS to reboot and become available..."
@@ -51,16 +52,25 @@ resource "hcloud_server" "server" {
EOT
}
+ # Install k3s-selinux (compatible version)
provisioner "remote-exec" {
inline = [
- # Disable automatic reboot (after transactional updates), and configure the reboot method as kured
"set -ex",
- "rebootmgrctl set-strategy off",
- "echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf",
- # set the hostname
- "hostnamectl set-hostname ${self.name}"
+ "transactional-update pkg install -y k3s-selinux"
]
}
+
+ # Issue a reboot command and wait for MicroOS to reboot and be ready
+ provisioner "local-exec" {
+ command = <<-EOT
+ ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3
+ until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null
+ do
+ echo "Waiting for MicroOS to reboot and become available..."
+ sleep 3
+ done
+ EOT
+ }
}
resource "hcloud_server_network" "server" {
@@ -68,3 +78,21 @@ resource "hcloud_server_network" "server" {
server_id = hcloud_server.server.id
subnet_id = var.ipv4_subnet_id
}
+
+data "template_cloudinit_config" "config" {
+ gzip = true
+ base64_encode = true
+
+ # Main cloud-config configuration file.
+ part {
+ filename = "init.cfg"
+ content_type = "text/cloud-config"
+ content = templatefile(
+ "${path.module}/templates/userdata.yaml.tpl",
+ {
+ hostname = var.name
+ sshAuthorizedKeys = concat([local.ssh_public_key], var.additional_public_keys)
+ }
+ )
+ }
+}
diff --git a/modules/host/templates/userdata.yaml.tpl b/modules/host/templates/userdata.yaml.tpl
new file mode 100644
index 0000000..51ff107
--- /dev/null
+++ b/modules/host/templates/userdata.yaml.tpl
@@ -0,0 +1,58 @@
+#cloud-config
+
+write_files:
+
+# Configure the private network interface
+- content: |
+ BOOTPROTO='dhcp'
+ STARTMODE='auto'
+ path: /etc/sysconfig/network/ifcfg-eth1
+
+# Disable ssh password authentication
+- content: |
+ PasswordAuthentication no
+ X11Forwarding no
+ MaxAuthTries 2
+ AllowTcpForwarding no
+ AllowAgentForwarding no
+ AuthorizedKeysFile .ssh/authorized_keys
+ path: /etc/ssh/sshd_config.d/kube-hetzner.conf
+
+# Set reboot method as "kured"
+- content: |
+ REBOOT_METHOD=kured
+ path: /etc/transactional-update.conf
+
+# Add ssh authorized keys
+ssh_authorized_keys:
+%{ for key in sshAuthorizedKeys ~}
+ - ${key}
+%{ endfor ~}
+
+# Resize /var, not /, as that's the last partition in MicroOS image.
+growpart:
+ devices: ["/var"]
+
+# Make sure the hostname is set correctly
+hostname: ${hostname}
+preserve_hostname: true
+
+runcmd:
+
+# As above, make sure the hostname is not reset
+- [sed, '-i', 's/NETCONFIG_NIS_SETDOMAINNAME="yes"/NETCONFIG_NIS_SETDOMAINNAME="no"/g', /etc/sysconfig/network/config]
+- [sed, '-i', 's/DHCLIENT_SET_HOSTNAME="yes"/DHCLIENT_SET_HOSTNAME="no"/g', /etc/sysconfig/network/dhcp]
+
+# We set Cloudflare DNS servers, followed by Google as a backup
+- [sed, '-i', 's/NETCONFIG_DNS_STATIC_SERVERS=""/NETCONFIG_DNS_STATIC_SERVERS="1.1.1.1 1.0.0.1 8.8.8.8"/g', /etc/sysconfig/network/config]
+
+# Bounds the amount of logs that can survive on the system
+- [sed, '-i', 's/#SystemMaxUse=/SystemMaxUse=3G/g', /etc/systemd/journald.conf]
+- [sed, '-i', 's/#MaxRetentionSec=/MaxRetentionSec=1week/g', /etc/systemd/journald.conf]
+
+# Reduces the default number of snapshots from 2-10 number limit, to 4 and from 4-10 number limit important, to 2
+- [sed, '-i', 's/NUMBER_LIMIT="2-10"/NUMBER_LIMIT="4"/g', /etc/snapper/configs/root]
+- [sed, '-i', 's/NUMBER_LIMIT_IMPORTANT="4-10"/NUMBER_LIMIT_IMPORTANT="3"/g', /etc/snapper/configs/root]
+
+# Disables unneeded services
+- [systemctl, disable, '--now', 'rebootmgr.service']
diff --git a/modules/host/versions.tf b/modules/host/versions.tf
index fe79022..7c8da9d 100644
--- a/modules/host/versions.tf
+++ b/modules/host/versions.tf
@@ -12,5 +12,9 @@ terraform {
source = "tenstad/remote"
version = "~> 0.0.23"
}
+ template = {
+ source = "hashicorp/template"
+ version = "~> 2.2.0"
+ }
}
}
diff --git a/output.tf b/output.tf
index 762290d..4d2033e 100644
--- a/output.tf
+++ b/output.tf
@@ -12,7 +12,7 @@ output "agents_public_ipv4" {
output "load_balancer_public_ipv4" {
description = "The public IPv4 address of the Hetzner load balancer"
- value = data.hcloud_load_balancer.traefik.ipv4
+ value = local.is_single_node_cluster ? module.control_planes[0].ipv4_address : data.hcloud_load_balancer.traefik[0].ipv4
}
output "kubeconfig_file" {
diff --git a/terraform.tfvars.example b/terraform.tfvars.example
index 8c9c842..b68f576 100644
--- a/terraform.tfvars.example
+++ b/terraform.tfvars.example
@@ -1,7 +1,15 @@
-# You need to replace these
+# Only the first values starting with a * are obligatory, the rest can remain with their default values, or you
+# could adapt them to your needs.
+#
+# Note that some values, notably "location" and "public_key" have no effect after the initial cluster has been setup.
+# This is in order to keep terraform from re-provisioning all nodes at once which would loose data. If you want to update,
+# those, you should instead change the value here and then manually re-provision each node one-by-one. Grep for "lifecycle".
+
+# * Your Hetzner project API token
hcloud_token = "xxxxxxxxxxxxxxxxxxYYYYYYYYYYYYYYYYYYYzzzzzzzzzzzzzzzzzzzzz"
+# * Your public key
public_key = "/home/username/.ssh/id_ed25519.pub"
-# Must be "private_key = null" when you want to use ssh-agent, for a Yubikey like device auth or an SSH key-pair with passphrase
+# * Your private key, must be "private_key = null" when you want to use ssh-agent, for a Yubikey like device auth or an SSH key-pair with passphrase
private_key = "/home/username/.ssh/id_ed25519"
# These can be customized, or left with the default values
@@ -10,9 +18,6 @@ private_key = "/home/username/.ssh/id_ed25519"
location = "fsn1" # change to `ash` for us-east Ashburn, Virginia location
network_region = "eu-central" # change to `us-east` if location is ash
-# It's best to leave the network range as is, unless you know what you are doing. The default is "10.0.0.0/8".
-# network_ipv4_range = "10.0.0.0/8"
-
# You can have up to as many subnets as you want (preferably if the form of 10.X.0.0/16),
# their primary use is to logically separate the nodes.
# The control_plane network is mandatory.
@@ -25,13 +30,14 @@ network_ipv4_subnets = {
# At least 3 server nodes is recommended for HA, otherwise you need to turn off automatic upgrade (see ReadMe).
# As per rancher docs, it must be always an odd number, never even! See https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/
# For instance, 1 is ok (non-HA), 2 not ok, 3 is ok (becomes HA).
-control_plane_count = 3
+control_plane_count = 3
# The type of control plane nodes, see https://www.hetzner.com/cloud, the minimum instance supported is cpx11 (just a few cents more than cx11)
control_plane_server_type = "cpx11"
# As for the agent nodepools, below is just an example, if you do not want nodepools, just use one,
# and change the name to what you want, it need not be "agent-big" or "agent-small", also give them the subnet prefer.
+# For single node clusters set this equal to {}
agent_nodepools = {
agent-big = {
server_type = "cpx21",
@@ -48,6 +54,11 @@ agent_nodepools = {
# That will depend on how much load you want it to handle, see https://www.hetzner.com/cloud/load-balancer
load_balancer_type = "lb11"
+### The following values are fully optional
+
+# It's best to leave the network range as is, unless you know what you are doing. The default is "10.0.0.0/8".
+# network_ipv4_range = "10.0.0.0/8"
+
# If you want to use a specific Hetzner CCM and CSI version, set them below, otherwise leave as is for the latest versions
# hetzner_ccm_version = ""
# hetzner_csi_version = ""
@@ -57,6 +68,7 @@ load_balancer_type = "lb11"
# traefik_acme_email = "mail@example.com"
# If you want to allow non-control-plane workloads to run on the control-plane nodes set "true" below. The default is "false".
+# Also good for single node clusters.
# allow_scheduling_on_control_plane = true
# If you want to disable automatic upgrade of k3s, you can set this to false, default is "true".
diff --git a/variables.tf b/variables.tf
index eb30850..b25f7e5 100644
--- a/variables.tf
+++ b/variables.tf
@@ -65,6 +65,7 @@ variable "load_balancer_disable_ipv6" {
variable "agent_nodepools" {
description = "Number of agent nodes."
type = map(any)
+ default = {}
}
variable "hetzner_ccm_version" {