Using Kubespray to deploy a k8s cluster to Terraform provisioned AWS EC2 instances

We did a cluster installation previously using kubeadm and a lot of manual work. I remember how much of a hassle it was so lets do one with bit less “hands on” and with more suffering.

Perhaps
  • Use shell scripts to run kubespray
  • Do some minor config changes
###  **************STANDARD AWS START******************  ###
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "3.41.0"
}
}
}
variable "access_key" {
description = "The access_key of our user defined in IMA"
type = string
sensitive = true
}
variable "secret_key" {
description = "The secret_key of our user defined in IMA"
type = string
sensitive = true
}
provider "aws" {
region = "eu-central-1"
access_key = var.access_key
secret_key = var.secret_key
}
### **************STANDARD AWS END****************** ###
variable "datasource_ami_owner" {
default = "amazon"
type = string
}
variable "datasource_ami_name_filter" {
default = "amzn2-ami-hvm*"
type = string
}
data "aws_ami" "base_image" {
most_recent = true
owners = ["${var.datasource_ami_owner}"]
filter {
name = "name"
values = ["${var.datasource_ami_name_filter}"]
}
}
resource "aws_security_group" "sec_group_block" {
name = "custom_sec_group"
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
ipv6_cidr_blocks = ["::/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
ipv6_cidr_blocks = ["::/0"]
}
tags = {
Name = "custom_sec_group"
}
}
resource "aws_instance" "test_cluster_m1" {
ami = data.aws_ami.base_image.id
instance_type = "t2.small"
key_name = "terraformec2"
vpc_security_group_ids = [aws_security_group.sec_group_block.id]
tags = {
Name = "test_cluster_m1"
}
}
resource "aws_instance" "test_cluster_w1" {
ami = data.aws_ami.base_image.id
instance_type = "t2.small"
key_name = "terraformec2"
vpc_security_group_ids = [aws_security_group.sec_group_block.id]
tags = {
Name = "test_cluster_w1"
}
}
resource "aws_instance" "test_cluster_w2" {
ami = data.aws_ami.base_image.id
instance_type = "t2.small"
key_name = "terraformec2"
vpc_security_group_ids = [aws_security_group.sec_group_block.id]
tags = {
Name = "test_cluster_w2"
}
}
##eip
resource "aws_eip" "eip_m1" {
vpc = true
}
resource "aws_eip" "eip_w1" {
vpc = true
}
resource "aws_eip" "eip_w2" {
vpc = true
}
resource "aws_eip_association" "eip_assoc_m1" {
instance_id = aws_instance.test_cluster_m1.id
allocation_id = aws_eip.eip_m1.id
}
resource "aws_eip_association" "eip_assoc_w1" {
instance_id = aws_instance.test_cluster_w1.id
allocation_id = aws_eip.eip_w1.id
}
resource "aws_eip_association" "eip_assoc_w2" {
instance_id = aws_instance.test_cluster_w2.id
allocation_id = aws_eip.eip_w2.id
}
###########
output "instance_test_cluster_w1_ip" {
value = aws_instance.test_cluster_w1.public_ip
}
output "instance_test_cluster_w2_ip" {
value = aws_instance.test_cluster_w2.public_ip
}
output "instance_test_cluster_m1_ip" {
value = aws_instance.test_cluster_m1.public_ip
}
echo " "
echo "Run kubespray setup"
echo "**************************************"
m1_ip="$(terraform output instance_test_cluster_m1_ip)"
w1_ip="$(terraform output instance_test_cluster_w1_ip)"
w2_ip="$(terraform output instance_test_cluster_w2_ip)"
cd .. && echo "moving to $(pwd)"
sh ./spray_dem_kubes.sh "${m1_ip}" \
"${w1_ip}" \
"${w2_ip}"
vi inventory/${cluster_name}/group_vars/all/all.yml \
&& vi inventory/${cluster_name}/group_vars/k8s_cluster/k8s-cluster.yml \
&& vi inventory/${cluster_name}/inventory.ini \
&& vi inventory/${cluster_name}/hosts.yaml
ansible-playbook -i inventory/${cluster_name}/hosts.yaml  --private-key=terraformec2.pem --become --become-user=root --user=ec2-user cluster.yml

Problems

Etcd may create a problem occasionally if you have different public and private ips. Hop on in to the master node and check the error. We can do so with;

systemctl status etcd#and then if needed
journalctl -xe
Jun 06 13:37:41 node1 etcd[24785]: 2021-06-06 13:37:41.712220 C | etcdmain: --initial-advertise-peer-urls has https://18.195.248.104:2380 but missing from --initial-cluster=etcd1=https://172.31.39.183:2380,
Jun 06 13:37:41 node1 dockerd[7744]: time="2021-06-06T13:37:41.735905556Z" level=info msg="ignoring event" container=37eb7499bc5c1a26339ced02ecf9aed01ef4084f5b8b6b625e74ab856524c746 module=libcontainerd nam
Jun 06 13:37:41 node1 containerd[6640]: time="2021-06-06T13:37:41.737190264Z" level=info msg="shim disconnected" id=37eb7499bc5c1a26339ced02ecf9aed01ef4084f5b8b6b625e74ab856524c746
Jun 06 13:37:41 node1 containerd[6640]: time="2021-06-06T13:37:41.737541582Z" level=error msg="copy shim log" error="read /proc/self/fd/17: file already closed"
systemctl restart etcd

Just a software everything fighting battles against mostly myself, and gaining small victories lately.