How To Install Graylog in a Kubernetes Cluster Using Helm Charts

The following narrative is based on the assumption that a Kubernetes (current stable version 20.10) has been setup using MetalLB Ingress controller. This should also work with Traefik or other load balancers.

# Create a separate namespace for this project
kubectl create namespace graylog

# Change into the graylog namespace
kubectl config set-context --current --namespace=graylog
kubectl config view --minify | grep namespace: # Validate it

# Optional: delete previous test instances of graylog that have been deployed via Helm
helm delete "graylog" --namespace graylog
kubectl delete pvc --namespace graylog --all

# How to switch execution context back to the 'default' namespace
kubectl config set-context --current --namespace=default

# Optional: installing mongdb prior to Graylog
helm install "mongodb" bitnami/mongodb --namespace "graylog" \
  --set persistence.size=100Gi
# Sample output:
NAME: mongodb
LAST DEPLOYED: Thu Aug 29 00:07:36 2021
NAMESPACE: graylog
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
** Please be patient while the chart is being deployed **
MongoDB® can be accessed on the following DNS name(s) and ports from within your cluster:
    mongodb.graylog.svc.cluster.local
To get the root password run:
    export MONGODB_ROOT_PASSWORD=$(kubectl get secret --namespace graylog mongodb -o jsonpath="{.data.mongodb-root-password}" | base64 --decode)
To connect to your database, create a MongoDB® client container:
    kubectl run --namespace graylog mongodb-client --rm --tty -i --restart='Never' --env="MONGODB_ROOT_PASSWORD=$MONGODB_ROOT_PASSWORD" --image docker.io/bitnami/mongodb:4.4.8-debian-10-r9 --command -- bash
Then, run the following command:
    mongo admin --host "mongodb" --authenticationDatabase admin -u root -p $MONGODB_ROOT_PASSWORD
To connect to your database from outside the cluster execute the following commands:
    kubectl port-forward --namespace graylog svc/mongodb 27017:27017 &
    mongo --host 127.0.0.1 --authenticationDatabase admin -p $MONGODB_ROOT_PASSWORD

# REQUIRED: Pre-install ElasticSearch version 7.10 as highest being supported by Graylog 4.1.3
# Source: https://artifacthub.io/packages/helm/elastic/elasticsearch/7.10.2
helm repo add elastic https://helm.elastic.co
helm repo update
helm install elasticsearch elastic/elasticsearch --namespace "graylog" \
  --set imageTag=7.10.2 \
  --set data.persistence.size=100Gi
# Sample output:
NAME: elasticsearch
LAST DEPLOYED: Sun Aug 29 04:35:30 2021
NAMESPACE: graylog
STATUS: deployed
REVISION: 1
NOTES:
1. Watch all cluster members come up.
  $ kubectl get pods --namespace=graylog -l app=elasticsearch-master -w
2. Test cluster health using Helm test.
  $ helm test elasticsearch

# Installation of Graylog with mongodb bundled, while integrating with a pre-deployed elasticSearch instance
#
# This install command assumes that the protocol preference for transporting logs is TCP
# Also, the current helm chart does not allow mixing TCP with UDP; therefore, this approach is conveniently
# matching business requirements where a reliable transmission TCP protocol is necessary to record security data.
helm install graylog kongz/graylog --namespace "graylog" \
  --set graylog.image.repository="graylog/graylog:4.1.3-1" \
  --set graylog.persistence.size=200Gi \
  --set graylog.service.type=LoadBalancer \
  --set graylog.service.port=80 \
  --set graylog.service.loadBalancerIP=10.10.100.88 \
  --set graylog.service.externalTrafficPolicy=Local \
  --set graylog.service.ports[0].name=gelf \
  --set graylog.service.ports[0].port=12201 \
  --set graylog.service.ports[1].name=syslog \
  --set graylog.service.ports[1].port=514 \
  --set graylog.rootPassword="SOMEPASSWORD" \
  --set tags.install-elasticsearch=false \
  --set graylog.elasticsearch.version=7 \
  --set graylog.elasticsearch.hosts=http://elasticsearch-master.graylog.svc.cluster.local:9200

# Optional: add these lines if the mongodb component has been installed separately
  --set tags.install-mongodb=false \
  --set graylog.mongodb.uri=mongodb://mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local:27017/graylog?replicaSet=rs0 \

# Moreover, the graylog chart version 1.8.4 doesn't seem to set externalTrafficPolicy as expected.
# Set externalTrafficPolicy = local to preserve source client IPs
kubectl patch svc graylog-web -n graylog -p '{"spec":{"externalTrafficPolicy":"Local"}}'

# Sometimes, the static EXTERNAL-IP would be assigned to graylog-master, where graylog-web EXTERNAL-IP would
# remain in the status of <pending> indefinitely.
# Workaround: set services to share a single external IP
kubectl patch svc graylog-web -p '{"metadata":{"annotations":{"metallb.universe.tf/allow-shared-ip":"graylog"}}}'
kubectl patch svc graylog-master -p '{"metadata":{"annotations":{"metallb.universe.tf/allow-shared-ip":"graylog"}}}'
kubectl patch svc graylog-master -n graylog -p '{"spec": {"type": "LoadBalancer", "externalIPs":["10.10.100.88"]}}'
kubectl patch svc graylog-web -n graylog -p '{"spec": {"type": "LoadBalancer", "externalIPs":["10.10.100.88"]}}'

# Test sending logs to server via TCP
graylog-server=graylog.kimconnect.com
echo -e '{"version": "1.1","host":"kimconnect.com","short_message":"Short message","full_message":"This is a\n\nlong message","level":9000,"_user_id":9000,"_ip_address":"1.1.1.1","_location":"LAX"}\0' | nc -w 1 $graylog-server 514

# Test via UDP
graylog-server=graylog.kimconnect.com
echo -e '{"version": "1.1","host":"kimconnect.com","short_message":"Short message","full_message":"This is a\n\nlong message","level":9000,"_user_id":9000,"_ip_address":"1.1.1.1","_location":"LAX"}\0' | nc -u -w 1 $graylog-server 514

# Optional: graylog Ingress
cat > graylog-ingress.yaml <<EOF
kind: Ingress
apiVersion: networking.k8s.io/v1
metadata:
  name: graylog-ingress
  namespace: graylog
  annotations:
    kubernetes.io/ingress.class: "nginx"
    # set these for SSL
    # ingress.kubernetes.io/rewrite-target: /
    # acme http01
    # acme.cert-manager.io/http01-edit-in-place: "true"
    # acme.cert-manager.io/http01-ingress-class: "true"
    # kubernetes.io/tls-acme: "true"  
spec:
  rules:
  - host: graylog.kimconnect.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: graylog-web
            port:
              number: 80
      - path: /
        pathType: Prefix
        backend:
          service:
            name: graylog-web
            port:
              number: 12201
      - path: /
        pathType: Prefix
        backend:
          service:
            name: graylog-web
            port:
              number: 514              
EOF
kubectl apply -f graylog-ingress.yaml

Troubleshooting Notes:

# Sample commands to patch graylog service components
kubectl patch svc graylog-web -p '{"spec":{"type":"LoadBalancer"}}' # Convert ClusterIP to LoadBalancer to gain ingress
kubectl patch svc graylog-web -p '{"spec":{"externalIPs":["10.10.100.88"]}}' # Add externalIPs
kubectl patch svc graylog-master -n graylog -p '{"spec":{"loadBalancerIP":""}}' # Remove loadBalancer IPs
kubectl patch svc graylog-master -n graylog -p '{"status":{"loadBalancer":{"ingress":[]}}}' # Purge ingress IPs
kubectl patch svc graylog-web -n graylog -p '{"status":{"loadBalancer":{"ingress":[{"ip":"10.10.100.88"}]}}}'
kubectl patch svc graylog-web -n graylog -p '{"status":{"loadBalancer":{"ingress":[]}}}'

# Alternative solution: mixing UDP with TCP
# The current chart version only allows this when service Type = ClusterIP (default)
helm upgrade graylog kongz/graylog --namespace "graylog" \
  --set graylog.image.repository="graylog/graylog:4.1.3-1" \
  --set graylog.persistence.size=200Gi \
  --set graylog.service.externalTrafficPolicy=Local \
  --set graylog.service.port=80 \
  --set graylog.service.ports[0].name=gelf \
  --set graylog.service.ports[0].port=12201 \
  --set graylog.service.ports[0].protocol=UDP \
  --set graylog.service.ports[1].name=syslog \
  --set graylog.service.ports[1].port=514 \
  --set graylog.service.ports[1].protocol=UDP \
  --set graylog.rootPassword="SOMEPASSWORD" \
  --set tags.install-elasticsearch=false \
  --set graylog.elasticsearch.version=7 \
  --set graylog.elasticsearch.hosts=http://elasticsearch-master.graylog.svc.cluster.local:9200

# Error message occurs when combing TCP with UDP; hence, a ClusterIP must be specified
Error: UPGRADE FAILED: cannot patch "graylog-web" with kind Service: Service "graylog-web" is invalid: spec.ports: Invalid value: []core.ServicePort{core.ServicePort{Name:"graylog", Protocol:"TCP", AppProtocol:(*string)(nil), Port:80, TargetPort:intstr.IntOrString{Type:0, IntVal:9000, StrVal:""}, NodePort:32518}, core.ServicePort{Name:"gelf", Protocol:"UDP", AppProtocol:(*string)(nil), Port:12201, TargetPort:intstr.IntOrString{Type:0, IntVal:12201, StrVal:""}, NodePort:0}, core.ServicePort{Name:"gelf2", Protocol:"TCP", AppProtocol:(*string)(nil), Port:12222, TargetPort:intstr.IntOrString{Type:0, IntVal:12222, StrVal:""}, NodePort:31523}, core.ServicePort{Name:"syslog", Protocol:"TCP", AppProtocol:(*string)(nil), Port:514, TargetPort:intstr.IntOrString{Type:0, IntVal:514, StrVal:""}, NodePort:31626}}: may not contain more than 1 protocol when type is 'LoadBalancer'

# Set array type value instead of string
Error: UPGRADE FAILED: error validating "": error validating data: ValidationError(Service.spec.externalIPs): invalid type for io.k8s.api.core.v1.ServiceSpec.externalIPs: got "string", expected "array"
# Solution:
--set "array={a,b,c}" OR --set service[0].port=80

# Graylog would not start and this was the error:
com.github.joschi.jadconfig.ValidationException: Parent directory /usr/share/graylog/data/journal for Node ID file at /usr/share/graylog/data/journal/node-id is not writable

# Workaround
graylogData=/mnt/k8s/graylog-journal-graylog-0-pvc-04dd9c7f-a771-4041-b549-5b4664de7249/
chown -fR 1100:1100 $graylogData

NAME: graylog
LAST DEPLOYED: Thu Aug 29 03:26:00 2021
NAMESPACE: graylog
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
To connect to your Graylog server:
1. Get the application URL by running these commands:
  Graylog Web Interface uses JavaScript to get detail of each node. The client JavaScript cannot communicate to node when service type is `ClusterIP`.
  If you want to access Graylog Web Interface, you need to enable Ingress.
    NOTE: Port Forward does not work with web interface.
2. The Graylog root users
  echo "User: admin"
  echo "Password: $(kubectl get secret --namespace graylog graylog -o "jsonpath={.data['graylog-password-secret']}" | base64 --decode)"
To send logs to graylog:
  NOTE: If `graylog.input` is empty, you cannot send logs from other services. Please make sure the value is not empty.
        See https://github.com/KongZ/charts/tree/main/charts/graylog#input for detail

k describe pod graylog-0
Events:
  Type     Reason            Age                   From               Message
  ----     ------            ----                  ----               -------
  Warning  FailedScheduling  11m                   default-scheduler  0/4 nodes are available: 4 pod has unbound immediate PersistentVolumeClaims.
  Warning  FailedScheduling  11m                   default-scheduler  0/4 nodes are available: 4 pod has unbound immediate PersistentVolumeClaims.
  Normal   Scheduled         11m                   default-scheduler  Successfully assigned graylog/graylog-0 to linux03
  Normal   Pulled            11m                   kubelet            Container image "alpine" already present on machine
  Normal   Created           11m                   kubelet            Created container setup
  Normal   Started           10m                   kubelet            Started container setup
  Normal   Started           4m7s (x5 over 10m)    kubelet            Started container graylog-server
  Warning  Unhealthy         3m4s (x4 over 9m14s)  kubelet            Readiness probe failed: Get "http://172.16.90.197:9000/api/system/lbstatus": dial tcp 172.16.90.197:9000: connect: connection refused
  Normal   Pulled            2m29s (x6 over 10m)   kubelet            Container image "graylog/graylog:4.1.3-1" already present on machine
  Normal   Created           2m19s (x6 over 10m)   kubelet            Created container graylog-server
  Warning  BackOff           83s (x3 over 2m54s)   kubelet            Back-off restarting failed container

Readiness probe failed: Get http://api/system/lbstatus: dial tcp 172.16.90.197:9000: connect: connection refused

# Set external IP
# This only works on LoadBalancer, not ClusterIP
# kubectl patch svc graylog-web -p '{"spec":{"externalIPs":["10.10.100.88"]}}'
# kubectl patch svc graylog-master -p '{"spec":{"externalIPs":[]}}'

kubectl patch service graylog-web --type='json' -p='[{"op": "add", "path": "/metadata/annotations/kubernetes.io~1ingress.class", "value":"nginx"}]'

# Set annotation to allow shared IPs between 2 different services
kubectl annotate service graylog-web metallb.universe.tf/allow-shared-ip=graylog
kubectl annotate service graylog-master metallb.universe.tf/allow-shared-ip=graylog

metadata:
  name: $serviceName-tcp
  annotations:
    metallb.universe.tf/address-pool: default
    metallb.universe.tf/allow-shared-ip: psk

# Ingress
appName=graylog
domain=graylog.kimconnect.com
deploymentName=graylog-web
containerPort=9000
cat <<EOF> $appName-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: $appName-ingress
  annotations:
    kubernetes.io/ingress.class: "nginx"
    # ingress.kubernetes.io/rewrite-target: /
    # acme http01
    # acme.cert-manager.io/http01-edit-in-place: "true"
    # acme.cert-manager.io/http01-ingress-class: "true"
    # kubernetes.io/tls-acme: "true"
spec:
  rules:
  - host: $domain
    http:
      paths:
      - backend:
          service:
            name: $deploymentName
            port:
              number: 9000
        path: /
        pathType: Prefix
EOF
kubectl apply -f $appName-ingress.yaml

# delete pvc's
namespace=graylog
kubectl delete pvc data-graylog-elasticsearch-data-0 -n $namespace
kubectl delete pvc data-graylog-elasticsearch-master-0 -n $namespace
kubectl delete pvc datadir-graylog-mongodb-0 -n $namespace
kubectl delete pvc journal-graylog-0 -n $namespace

# delete all pvc's in namespace the easier way
namespace=graylog
kubectl get pvc -n $namespace | awk '$1 {print$1}' | while read vol; do kubectl delete pvc/${vol} -n $namespace; done

2021-08-20 20:19:41,048 INFO    [cluster] - Exception in monitor thread while connecting to server mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local:27017 - {}
com.mongodb.MongoSocketException: mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local
        at com.mongodb.ServerAddress.getSocketAddresses(ServerAddress.java:211) ~[graylog.jar:?]
        at com.mongodb.internal.connection.SocketStream.initializeSocket(SocketStream.java:75) ~[graylog.jar:?]
        at com.mongodb.internal.connection.SocketStream.open(SocketStream.java:65) ~[graylog.jar:?]
        at com.mongodb.internal.connection.InternalStreamConnection.open(InternalStreamConnection.java:128) ~[graylog.jar:?]
        at com.mongodb.internal.connection.DefaultServerMonitor$ServerMonitorRunnable.run(DefaultServerMonitor.java:117) [graylog.jar:?]
        at java.lang.Thread.run(Thread.java:748) [?:1.8.0_302]
Caused by: java.net.UnknownHostException: mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local
        at java.net.InetAddress.getAllByName0(InetAddress.java:1281) ~[?:1.8.0_302]
        at java.net.InetAddress.getAllByName(InetAddress.java:1193) ~[?:1.8.0_302]
        at java.net.InetAddress.getAllByName(InetAddress.java:1127) ~[?:1.8.0_302]
        at com.mongodb.ServerAddress.getSocketAddresses(ServerAddress.java:203) ~[graylog.jar:?]
        ... 5 more

2021-08-20 20:19:42,981 INFO    [cluster] - No server chosen by com.mongodb.client.internal.MongoClientDelegate$1@69419d59 from cluster description ClusterDescription{type=REPLICA_SET, connectionMode=MULTIPLE, serverDescriptions=[ServerDescription{address=mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local:27017, type=UNKNOWN, state=CONNECTING, exception={com.mongodb.MongoSocketException: mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local}, caused by {java.net.UnknownHostException: mongodb-mongodb-replicaset-0.mongodb-mongodb-replicaset.graylog.svc.cluster.local}}]}. Waiting for 30000 ms before timing out - {}

# Alternative version - that doesn't work
# helm repo add groundhog2k https://groundhog2k.github.io/helm-charts/
# helm install graylog groundhog2k/graylog --namespace "graylog" \
#   --set image.tag=4.1.3-1 \
#   --set settings.http.publishUri='http://127.0.0.1:9000/' \
#   --set service.type=LoadBalancer \
#   --set service.loadBalancerIP=192.168.100.88 \
#   --set elasticsearch.enabled=true \
#   --set mongodb.enabled=true

# helm upgrade graylog groundhog2k/graylog --namespace "graylog" \
#   --set image.tag=4.1.3-1 \
#   --set settings.http.publishUri=http://localhost:9000/ \
#   --set service.externalTrafficPolicy=Local \
#   --set service.type=LoadBalancer \
#   --set service.loadBalancerIP=192.168.100.88 \
#   --set elasticsearch.enabled=true \
#   --set mongodb.enabled=true \
#   --set storage.className=nfs-client \
#   --set storage.requestedSize=200Gi

# kim@linux01:~$ k logs graylog-0
# 2021-08-29 03:47:09,345 ERROR: org.graylog2.bootstrap.CmdLineTool - Invalid configuration
# com.github.joschi.jadconfig.ValidationException: Couldn't run validator method
#         at com.github.joschi.jadconfig.JadConfig.invokeValidatorMethods(JadConfig.java:227) ~[graylog.jar:?]
#         at com.github.joschi.jadconfig.JadConfig.process(JadConfig.java:100) ~[graylog.jar:?]
#         at org.graylog2.bootstrap.CmdLineTool.processConfiguration(CmdLineTool.java:420) [graylog.jar:?]
#         at org.graylog2.bootstrap.CmdLineTool.run(CmdLineTool.java:236) [graylog.jar:?]
#         at org.graylog2.bootstrap.Main.main(Main.java:45) [graylog.jar:?]
# Caused by: java.lang.reflect.InvocationTargetException
#         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_302]
#         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_302]
#         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_302]
#         at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_302]
#         at com.github.joschi.jadconfig.ReflectionUtils.invokeMethodsWithAnnotation(ReflectionUtils.java:53) ~[graylog.jar:?]
#         at com.github.joschi.jadconfig.JadConfig.invokeValidatorMethods(JadConfig.java:221) ~[graylog.jar:?]
#         ... 4 more
# Caused by: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - For input string: "!s"
#         at java.net.URLDecoder.decode(URLDecoder.java:194) ~[?:1.8.0_302]
#         at com.mongodb.ConnectionString.urldecode(ConnectionString.java:1035) ~[graylog.jar:?]
#         at com.mongodb.ConnectionString.urldecode(ConnectionString.java:1030) ~[graylog.jar:?]
#         at com.mongodb.ConnectionString.<init>(ConnectionString.java:336) ~[graylog.jar:?]
#         at com.mongodb.MongoClientURI.<init>(MongoClientURI.java:256) ~[graylog.jar:?]
#         at org.graylog2.configuration.MongoDbConfiguration.getMongoClientURI(MongoDbConfiguration.java:59) ~[graylog.jar:?]
#         at org.graylog2.configuration.MongoDbConfiguration.validate(MongoDbConfiguration.java:64) ~[graylog.jar:?]
#         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_302]
#         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_302]
#         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_302]
#         at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_302]
#         at com.github.joschi.jadconfig.ReflectionUtils.invokeMethodsWithAnnotation(ReflectionUtils.java:53) ~[graylog.jar:?]
#         at com.github.joschi.jadconfig.JadConfig.invokeValidatorMethods(JadConfig.java:221) ~[graylog.jar:?]

How To Configure Alternative Storage for a Kubernetes (K8s) Worker Node

The below illustration is assuming that one has a local RAID mount being added to a worker node due to it’s lack of local storage to run kubelets and docker containers

# On K8s controller, remove worker node
kubectl drain node linux03 --ignore-damonsets
kubectl delete node linux03

# On the worker node uninstall docker & kubelet
sudo apt-get remove docker-ce docker-ce-cli containerd.io kubelet

# Check the health of its RAID mount /dev/md0
mdadm --detail /dev/md0

# Sample expected output:
           Version : 1.2
     Creation Time : Fri Aug 13 23:46:13 2021
        Raid Level : raid10
        Array Size : 1953257472 (1862.77 GiB 2000.14 GB)
     Used Dev Size : 976628736 (931.39 GiB 1000.07 GB)
      Raid Devices : 4
     Total Devices : 4
       Persistence : Superblock is persistent
     Intent Bitmap : Internal
       Update Time : Sat Aug 28 23:39:08 2021
             State : clean
    Active Devices : 4
   Working Devices : 4
    Failed Devices : 0
     Spare Devices : 0
            Layout : near=2
        Chunk Size : 512K
Consistency Policy : bitmap
              Name : linux03:0  (local to host linux03)
              UUID : 
            Events : 1750
    Number   Major   Minor   RaidDevice State
       0       8       97        0      active sync set-A   /dev/sdg1
       1       8       81        1      active sync set-B   /dev/sdf1
       2       8       17        2      active sync set-A   /dev/sdb1
       3       8        1        3      active sync set-B   /dev/sda1

# Check the logical mount
mount=/nfs-share
df -hT -P $mount

# Sample expected output:
root@linux03:/home/kimconnect# df -hT -P $mount
Filesystem     Type  Size  Used Avail Use% Mounted on
/dev/md0       ext4  1.8T   77M  1.7T   1% /nfs-share

# Prepare docker & kubelet redirected links
source1=/nfs-share/linux03/docker
source2=/nfs-share/linux03/kubelet
destinationdirectory=/var/lib/
sudo mkdir -p $source1
sudo mkdir -p $source2

# Optional: remove existing docker & kubelet directories
rm -rf /var/lib/kubelet
rm -rf /var/lib/docker

# Create links
sudo ln -sfn $source1 $destinationdirectory
sudo ln -sfn $source2 $destinationdirectory

# Verify
ls -la /var/lib

# Expected output:
root@linux03:/home/kim# ls /var/lib -la
total 180
drwxr-xr-x 45 root      root      4096 Aug 28 00:38 .
drwxr-xr-x 13 root      root      4096 Feb  1  2021 ..
drwxr-xr-x  4 root      root      4096 Feb  1  2021 AccountsService
drwxr-xr-x  5 root      root      4096 Aug 28 00:24 apt
drwxr-xr-x  2 root      root      4096 Sep 10  2020 boltd
drwxr-xr-x  2 root      root      4096 Aug 27 21:21 calico
drwxr-xr-x  8 root      root      4096 Aug 28 00:34 cloud
drwxr-xr-x  4 root      root      4096 Aug 27 23:52 cni
drwxr-xr-x  2 root      root      4096 Aug 27 19:38 command-not-found
drwx--x--x 11 root      root      4096 Aug 27 20:24 containerd
drwxr-xr-x  2 root      root      4096 Aug 27 19:57 dbus
drwxr-xr-x  2 root      root      4096 Apr 10  2020 dhcp
lrwxrwxrwx  1 root      root        25 Aug 27 23:24 docker -> /nfs-share/linux03/docker
drwxr-xr-x  3 root      root      4096 Aug 27 21:15 dockershim
drwxr-xr-x  7 root      root      4096 Aug 28 00:24 dpkg
drwxr-xr-x  3 root      root      4096 Feb  1  2021 fwupd
drwxr-xr-x  2 root      root      4096 Apr 20  2020 git
drwxr-xr-x  4 root      root      4096 Aug 27 19:39 grub
drwxr-xr-x  2 root      root      4096 Aug 27 19:51 initramfs-tools
lrwxrwxrwx  1 root      root        26 Aug 28 00:38 kubelet -> /nfs-share/linux03/kubelet
### truncated for brevity ###

# Reinstall docker & kubernetes
version=1.20.10-00
apt-get install -qy --allow-downgrades --allow-change-held-packages kubeadm=$version kubelet=$version kubectl=$version docker-ce docker-ce-cli containerd.io nfs-common
apt-mark hold kubeadm kubelet kubectl

I may consider making another illustration for NFS mounts. It may not be necessary as the instructions would be mostly similar. The difference would be that one must ensure that the worker node automatically mounts the nfs share upon reboots. The command to make symbolic soft-links would be the same.

PowerShell: Quick Snippet to Purge All ‘Orphaned’ Records of Resources in VMM

The Commands:

# How to purge all erroneous records of resources from the VMM Library

# ISOs
Get-SCISO | where {$_.State -eq "missing"} | Remove-SCISO

# Custom Scripts
Get-SCScript | where {$_.State -eq "missing"} | Remove-SCScript

# Drivers
Get-SCDriverPackage | where {$_.State -eq "missing"} | Remove-SCDriverPackage

# Applications
Get-SCApplicationPackage | where {$_.State -eq "missing"} | Remove-SCApplicationPackage

# Custom Resources
Get-SCCustomResource | where {$_.State -eq "missing"} | Remove-SCCustomResource

# Virtual Disks
Get-SCVirtualHardDisk | where {$_.State -eq "missing"} | Remove-SCVirtualHardDisk

Sample Outputs:

PS C:\Windows\system32> Get-SCISO | where {$_.State -eq "missing"} | Remove-SCISO
Release               :
State                 : Missing
LibraryShareId        : 00000000-0000-0000-0000-000000000000
SharePath             : C:\Windows\system32\vmguest.iso
FileShare             :
Directory             : C:\Windows\system32
Size                  : 0
IsOrphaned            : False
FamilyName            :
Namespace             :
ReleaseTime           :
HostVolumeId          :
HostVolume            :
Classification        :
HostId                : 
HostType              : VMHost
HostName              : hv1.intranet.kimconnect.com
VMHost                : hv1.intranet.kimconnect.com
LibraryServer         :
CloudId               :
Cloud                 :
LibraryGroup          :
GrantedToList         : {}
UserRoleID            : 00000000-0000-0000-0000-000000000000
UserRole              :
Owner                 :
ObjectType            : ISO
Accessibility         : Public
Name                  : vmguest
IsViewOnly            : False
Description           :
AddedTime             : 7/22/1920 9:04:52 AM
ModifiedTime          : 7/22/1920 9:04:52 AM
Enabled               : True
MostRecentTask        :
ServerConnection      : Microsoft.SystemCenter.VirtualMachineManager.Remoting.ServerConnection
ID                    : 862c2f67-4c2c-4588-8a4f-16ed3c64366f
MarkedForDeletion     : True
IsFullyCached         : True
MostRecentTaskIfLocal :

### Truncated similar outputs ### 

# Checking custom resources
PS C:\Windows\system32> Get-SCCustomResource|select name
Name
----
SAV_x86_en-US_4.9.305.198.cr
WebDeploy_x86_en-US_3.1237.1764.cr
WebDeploy_x64_en-US_3.1237.1764.cr
SAV_x64_en-US_4.9.305.198.cr

Problem: NextCloud Would Not Start Due to Versioning Variance

This issue has occurred when NextCloud has been upgraded after deployment. Its source docker container may specify an older version as compared to the running instance. This discrepancy will cause the pod to fail to re-create or start as a new container as show below:

# Pod scheduling status yields 'Error'
kimconnect@k8sController:~$ k get pod
NAME                                              READY   STATUS    RESTARTS   AGE
clamav-0                                          1/1     Running   0          6d23h
collabora-collabora-code-69d74c979f-jp4p2         1/1     Running   0          6d19h
nextcloud-6cf9c65d85-42dx7                        1/2     Error     1          6s
nextcloud-db-postgresql-0                         1/1     Running   0          7d1h

# Further examination of the problem...
kimconnect@k8sController:~$ k describe pod nextcloud-6cf9c65d85-l9b99
Name:         nextcloud-6cf9c65d85-l9b99
Namespace:    default
Priority:     0
Node:         workder05/10.10.100.95
Start Time:   Fri, 20 Aug 2021 23:48:23 +0000
Labels:       app.kubernetes.io/component=app
              app.kubernetes.io/instance=nextcloud
              app.kubernetes.io/name=nextcloud
              pod-template-hash=6cf9c65d85
Annotations:  cni.projectcalico.org/podIP: 172.16.90.126/32
              cni.projectcalico.org/podIPs: 172.16.90.126/32
Status:       Running
IP:           172.16.90.126
IPs:
  IP:           172.16.90.126
Controlled By:  ReplicaSet/nextcloud-6cf9c65d85
Containers:
  nextcloud:
    Container ID:   docker://4c202d2155dea39739db815feae271fb8f14438f44092049f3d55c70fbf819c0
    Image:          nextcloud:stable-fpm
    Image ID:       docker-pullable://nextcloud@sha256:641b1dc10b681e1245c6f5d6d366fa1cd7e018ff787cf690c1aa372ddc108671
    Port:           <none>
    Host Port:      <none>
    State:          Waiting
      Reason:       CrashLoopBackOff
    Last State:     Terminated
      Reason:       Error
      Exit Code:    1
      Started:      Fri, 20 Aug 2021 23:54:03 +0000
      Finished:     Fri, 20 Aug 2021 23:54:03 +0000
    Ready:          False
    Restart Count:  6
    Environment:
      POSTGRES_HOST:              nextcloud-db-postgresql.default.svc.cluster.local
      POSTGRES_DB:                nextcloud
      POSTGRES_USER:              <set to the key 'db-username' in secret 'nextcloud-db'>      Optional: false
      POSTGRES_PASSWORD:          <set to the key 'db-password' in secret 'nextcloud-db'>      Optional: false
      NEXTCLOUD_ADMIN_USER:       <set to the key 'nextcloud-username' in secret 'nextcloud'>  Optional: false
      NEXTCLOUD_ADMIN_PASSWORD:   <set to the key 'nextcloud-password' in secret 'nextcloud'>  Optional: false
      NEXTCLOUD_TRUSTED_DOMAINS:  kimconnect.com
      NEXTCLOUD_DATA_DIR:         /var/www/html/data
    Mounts:
      /usr/local/etc/php-fpm.d/memory_limit from nextcloud-phpconfig (rw,path="memory_limit")
      /usr/local/etc/php-fpm.d/post_max_size from nextcloud-phpconfig (rw,path="post_max_size")
      /usr/local/etc/php-fpm.d/upload_max_filesize from nextcloud-phpconfig (rw,path="upload_max_filesize")
      /usr/local/etc/php-fpm.d/upload_max_size from nextcloud-phpconfig (rw,path="upload_max_size")
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-bdhxv (ro)
      /var/www/ from nextcloud-data (rw,path="root")
      /var/www/html from nextcloud-data (rw,path="html")
      /var/www/html/config from nextcloud-data (rw,path="config")
      /var/www/html/custom_apps from nextcloud-data (rw,path="custom_apps")
      /var/www/html/data from nextcloud-data (rw,path="data")
      /var/www/html/themes from nextcloud-data (rw,path="themes")
      /var/www/tmp from nextcloud-data (rw,path="tmp")
  nextcloud-nginx:
    Container ID:   docker://1fae573d1a0591058ad55f939b4762f01c7a5f6e7275d2348ff1bd287e077fe5
    Image:          nginx:alpine
    Image ID:       docker-pullable://nginx@sha256:e20c21e530f914fb6a95a755924b1cbf71f039372e94ac5ddcf8c3b386a44615
    Port:           80/TCP
    Host Port:      0/TCP
    State:          Running
      Started:      Fri, 20 Aug 2021 23:48:26 +0000
    Ready:          True
    Restart Count:  0
    Environment:    <none>
    Mounts:
      /etc/nginx/nginx.conf from nextcloud-nginx-config (rw,path="nginx.conf")
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-bdhxv (ro)
      /var/www/ from nextcloud-data (rw,path="root")
      /var/www/html from nextcloud-data (rw,path="html")
      /var/www/html/config from nextcloud-data (rw,path="config")
      /var/www/html/custom_apps from nextcloud-data (rw,path="custom_apps")
      /var/www/html/data from nextcloud-data (rw,path="data")
      /var/www/html/themes from nextcloud-data (rw,path="themes")
      /var/www/tmp from nextcloud-data (rw,path="tmp")
Conditions:
  Type              Status
  Initialized       True
  Ready             False
  ContainersReady   False
  PodScheduled      True
Volumes:
  nextcloud-data:
    Type:       PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
    ClaimName:  nextcloud-claim
    ReadOnly:   false
  nextcloud-phpconfig:
    Type:      ConfigMap (a volume populated by a ConfigMap)
    Name:      nextcloud-phpconfig
    Optional:  false
  nextcloud-nginx-config:
    Type:      ConfigMap (a volume populated by a ConfigMap)
    Name:      nextcloud-nginxconfig
    Optional:  false
  default-token-bdhxv:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-bdhxv
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                 node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type     Reason     Age                  From               Message
  ----     ------     ----                 ----               -------
  Normal   Scheduled  10m                  default-scheduler  Successfully assigned default/nextcloud-6cf9c65d85-l9b99 to linux05
  Normal   Pulled     10m                  kubelet            Container image "nginx:alpine" already present on machine
  Normal   Created    10m                  kubelet            Created container nextcloud-nginx
  Normal   Started    10m                  kubelet            Started container nextcloud-nginx
  Normal   Created    9m47s (x4 over 10m)  kubelet            Created container nextcloud
  Normal   Started    9m46s (x4 over 10m)  kubelet            Started container nextcloud
  Normal   Pulled     8m55s (x5 over 10m)  kubelet            Container image "nextcloud:stable-fpm" already present on machine
  Warning  BackOff    18s (x51 over 10m)   kubelet            Back-off restarting failed container

# Checking the logs
kimconnect@k8sController:~$ k logs nextcloud-6cf9c65d85-l9b99 nextcloud
Can't start Nextcloud because the version of the data (21.0.4.1) is higher than the docker image version (20.0.8.1) and downgrading is not supported. Are you sure you have pulled the newest image version?

Solution:

# a. Create a backup copy of version.php
  sudo mount $nfsServer:/volume1/nextcloud /mnt/nextcloud
  cd /mnt/nextcloud/html
  cp version.php version.php.bak

# b. Edit the version.php file with this content
  vim version.php
########
# <?php
# $OC_Version = array(21,0,4,1); # change this value to array(20,0,8,1)
# $OC_VersionString = '21.0.4'; # change this value to '20.0.8'
# $OC_Edition = '';
# $OC_Channel = 'stable';
# $OC_VersionCanBeUpgradedFrom = array (
#   'nextcloud' =>
#   array (
#     '20.0' => true,
#     '21.0' => true,
#   ),
#   'owncloud' =>
#   array (
#     '10.5' => true,
#   ),
# );
# $OC_Build = '2021-08-03T15:44:43+00:00 c52fea0b16690b492f6c4175e1ae71d488936244';
# $vendor = 'nextcloud';
########

# c. Recreate the failed pod and verify that it's in 'running status'

kimconnect@k8sController:~$ k delete pod nextcloud-6cf9c65d85-l9b99
pod "nextcloud-6cf9c65d85-l9b99" deleted
kimconnect@k8sController:~$ k get pod
NAME                                              READY   STATUS    RESTARTS   AGE
clamav-0                                          1/1     Running   0          6d23h
collabora-collabora-code-69d74c979f-jp4p2         1/1     Running   0          6d19h
nextcloud-6cf9c65d85-dmg2s                        2/2     Running   0          17s
nextcloud-db-postgresql-0                         1/1     Running   0          7d1h

# d. Revert changes to version.php

cd /mnt/nextcloud/html
mv version.php version.php.old
mv version.php.bak version.php

How To Move WordPress Site To Kubernetes Cluster

a. Create backups of source files and database

  - Logon to Current Hosting Provider to make backups
  - Files:
    - Assuming cPanel:
      - Login to cPanel
      - Click on 'File Manager'
      - Select public_html or the directory containing WordPress files
      - Select Compress from the top-right menu
      - Select 'Bzip2ed Tar Archive' (better compression than Gzip)
      - Click 'Compress File(s)' and wait for the process to finish
      - Right-click the newly generated public_html.tar.bz2 from cPanel File Manager > select Download
      - Find the file in a default download directory (e.g. /home/$(whoami)/Downloads/public_html.tar.bz2)
  - Database:
    - Assuming cPanel with phpMyAdmin
      - Click 'phpMyAdmin' from the 'DATABASES' control group
      - Click 'Export'
      - Set Export method = Quick, Format = Custom
      - Click Go
      - Find the *.sql file being downloaded into a default download directory (e.g. /home/$(whoami)/Downloads/localhost.sql)

b. Install Bitnami WordPress in a Kubernetes Cluster

# Add helm chart if not already available
helm repo add bitnami https://charts.bitnami.com/bitnami

# Install WordPress with Dynamic NFS Provisioning
# Documentation: https://hub.kubeapps.com/charts/bitnami/wordpress/10.0.1
# Set variables
appName=kimconnectblog
domainName=blog.kimconnect.com
wordpressusername=kimconnect
wordpressPassword=SOMEPASSWORDHERE
rootPassword=SOMEPASSWORDHERE2
storageClass=nfs-client
# Install
helm install $appName bitnami/wordpress \
  --set persistence.accessMode=ReadWriteMany,persistence.storageClass=nfs-client \
  --set mariadb.primary.persistence.storageClass=nfs-client \
  --set wordpressUsername=$wordpressusername,wordpressPassword=$wordpressPassword \
  --set mariadb.auth.rootPassword=$rootPassword \
  --set mariadb.auth.password=$rootPassword \
  --set ingress.enabled=true,ingress.hostname=$domainName
# Patch the deployed ingress with an existing SSL cert
# Assuming the $appName-cert has already been generated
appName=kimconnectblog
domainName=blog.kimconnect.com
certName=$appName-cert
serviceName=$appName-wordpress
servicePort=80
cat <<EOF > $appName-patch.yaml
spec:
  tls:
  - hosts:
    - $domainName
    secretName: $certName
  rules:
  - host: $domainName
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: $serviceName
            port:
              number: $servicePort         
EOF
kubectl patch ingress/$appName-wordpress -p "$(cat $appName-patch.yaml)"

c. Import files and database onto new hosting server

  - Database:
    - Access DB server and import sql dump
      podName=kimconnectblog-mariadb-0
      kubectl exec --stdin --tty $podName -- /bin/bash
      rootPassword=SOMEPASSWORD
      echo "show databases;" | mysql -u root -p$rootPassword
      MariaDB [(none)]> show databases;exit;
        +--------------------+
        | Database           |
        +--------------------+
        | bitnami_wordpress  |
        | information_schema |
        | mysql              |
        | performance_schema |
        | test               |
        +--------------------+
        5 rows in set (0.009 sec)
      oldDb=kimconne_blog
      sqlDump=/bitnami/mariadb/data/kimconnect.sql
      mysql -uroot -p$rootPassword test < $sqlDump
      grantUser=bn_wordpress # this is the default Bitnami WordPress user
      echo "GRANT ALL PRIVILEGES ON $oldDb.* TO $grantUser;" | mysql -uroot -p$rootPassword
      #echo "create database $databaseName;" | mysql -uroot -p$rootPassword
      #mysql -uroot -p$rootPassword $oldDb -sNe 'show tables' | while read table; do mysql -uroot -p$rootPassword -sNe "RENAME TABLE $oldDb.$table TO $newDb.$table"; done
      #echo "create user kimconne_blog@localhost;grant all privileges on kimconne_blog.* to 'kimconne_blog';"| mysql -uroot -p$rootPassword
      #ALTER USER 'kimconne_blog'@'localhost' IDENTIFIED BY 'SOMEPASSWORDHERE';
  - Files:
    - Assuming nfs:
      nfsShare=k8s
      nfsServer=10.10.10.5
      sharePath=/volume1/$nfsShare
      mountPoint=/mnt/$nfsShare
      sudo mkdir $mountPoint
      sudo mount -t nfs $nfsServer:$sharePath $mountPoint # Test mounting
      sudo mount | grep $nfsShare # validate mount
      # Assuming Kubernetes NFS
      # sudo mv /home/$(whoami)/Downloads/localhost.sql $mountPoint/path_to_default-data-sitename-mariadb/data/localhost.sql
      # sudo mv /home/$(whoami)/Downloads/public_html.tar.bz2 $mountPoint/public_html.tar.bz2
      bz2File=/mnt/k8s/kimconnectblog/public_html.tar.bz2
      containerPath=/mnt/k8s/default-kimconnectblog-wordpress-pvc-9f1dd4bd-81f3-489f-9b76-bf70f4fd291c/wordpress/wp-content
      tar -xf $bz2File -C $containerPath
      cd $containerPath
      mv public_html/wp-content wp-content
      vim wp-config.php # edit wp config to match the imported database and its prefix

Dynamics NFS Provisioning in Kubernetes Cluster

Step 1: Creating NFS Server

A. Create NFS Share on File Server
There are many ways to perform this task. Here’s an illustration of a manual method of enabling a standard Ubuntu server to serve as an NFS server.

Here’s a related blog with updated instructions: https://kimconnect.com/how-to-install-nfs-server-on-ubuntu-21-04/

# Install prerequisites:
apt install nfs-utils

# Create nfs share:
shareName=/export/kubernetes
sudo mkdir $shareName
sudo chown -R nobody: $shareName
sudo systemctl enable nfs-server
sudo systemctl start nfs-server
vim /etc/export
### Add this line
/export/kubernetes *(rw,sync,no_subtree_check,no_root_squash,no_all_squash,insecure)
###
sudo exportfs -rav
sudo exportfs -v

B. Testing access from a client
# Install prerequisite
sudo apt install nfs-common
# Mount, create/delete a file, and unmount
# Set variables
nfsShare=kubernetes # assuming that the 'pihole' share has already been created on the server
nfsServer=192.168.100.21 # assuming NAS servername is resolved to its correct IP
sharePath=/volume1/$nfsShare
mountPoint=/mnt/$nfsShare
sudo mkdir $mountPoint
sudo mount -t nfs $nfsServer:$sharePath $mountPoint # Test mounting
sudo mount | grep $nfsShare
touch $mountPoint/test.txt
ls $mountPoint
rm $mountPoint/test.txt
ls $mountPoint
sudo umount -f -l $mountPoint # or sudo umount $mountPoint

Step 2a: Install Dynamic NFS Provisioner Using Helm

# Check current helm repo
kim@linux01:~$ helm repo list
NAME                            URL
bitnami                         https://charts.bitnami.com/bitnami
ingress-nginx                   https://kubernetes.github.io/ingress-nginx
rancher-stable                  https://releases.rancher.com/server-charts/stable
jetstack                        https://charts.jetstack.io
k8s-at-home                     https://k8s-at-home.com/charts/
nextcloud                       https://nextcloud.github.io/helm/
chrisingenhaag                  https://chrisingenhaag.github.io/helm/
wiremind                        https://wiremind.github.io/wiremind-helm-charts

# Add repo
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/

# The easy way
nfsServer=192.168.100.21
nfsShare=/volume1/k8s
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
helm install nfs-subdir-external-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
  --set nfs.server=$nfsServer \
  --set nfs.path=$nfsShare

# Sample output
NAME: nfs-subdir-external-provisioner
LAST DEPLOYED: Sun Aug  1 21:16:05 2021
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None

# Possible error:
Error: chart requires kubeVersion: >=1.9.0-0 <1.20.0-0 which is incompatible with Kubernetes v1.20.2

# Workaround: downgrade Kubernetes - not recommended!
version=1.20.0-00
sudo apt install -qy kubeadm=$version kubectl=$version kubelet=$version kubernetes-cni=$version --allow-downgrades

# If everything works out, storage class 'nfs-client' will become available
kim@linux01:~$ k get storageclasses.storage.k8s.io
NAME         PROVISIONER                                     RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
nfs-class    kubernetes.io/nfs                               Retain          Immediate           true                   181d
nfs-client   cluster.local/nfs-subdir-external-provisioner   Delete          Immediate           true                   25m

# set default storage class
defaultStorageClassName=nfs-client
kubectl patch storageclass $defaultStorageClassName -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

# Check storage classes for the suffix '(default)'
kim@linux01:~$ kubectl get storageclass
NAME                   PROVISIONER                                     RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
nfs-class              kubernetes.io/nfs                               Retain          Immediate           true                   181d
nfs-client (default)   cluster.local/nfs-subdir-external-provisioner   Delete          Immediate           true                   42m

# Test creating nfs claim
cat > test-pvc.yaml <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: pvc-nfs-pv1
spec:
  storageClassName: nfs-client # this variable must match the helm nfs-subdir-external-provisioner's default!
  accessModes:
     - ReadWriteMany
  resources:
    requests:
      storage: 500Mi
EOF
kubectl apply -f test-pvc.yaml

# Check result
kim@linux01:~$ k get pv
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                      STORAGECLASS   REASON   AGE
pvc-8ed4fc70-71c4-48c7-85a9-57175cfc21e7   500Mi      RWX            Delete           Bound    default/pvc-nfs-pv1        nfs-client              10s

kim@linux01:~$ k get pvc pvc-nfs-pv1
NAME          STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
pvc-nfs-pv1   Bound    pvc-8ed4fc70-71c4-48c7-85a9-57175cfc21e7   500Mi      RWX            nfs-client     91s

kim@linux01:~$ k delete -f test-pvc.yaml
persistentvolumeclaim "pvc-nfs-pv1" deleted

Step 2b: Manual Installation of Dynamics NFS Provisioner

# Pull the source code
workingDirectory=~/nfs-dynamic-provisioner
mkdir $workingDirectory && cd $workingDirectory
git clone https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner
cd nfs-subdir-external-provisioner/deploy

# Deploying the service accounts, accepting defaults
k create -f rbac.yaml

# Editing storage class
vim class.yaml

##############################################
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: managed-nfs-ssd # set this value
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner # or choose another name, must match deployment's env PROVISIONER_NAME'
parameters:
  archiveOnDelete: "true" # value of true means retaining data upon pod terminations
allowVolumeExpansion: "true" # this attribute doesn't exist by default
##############################################

# Deploying storage class
k create -f class.yaml

# Sample output
stoic@masternode:~/nfs-dynamic-provisioner/nfs-subdir-external-provisioner/deploy$ k get storageclasses.storage.k8s.io
NAME                   PROVISIONER                                     RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
managed-nfs-ssd        k8s-sigs.io/nfs-subdir-external-provisioner     Delete          Immediate           false                  33s
nfs-class              kubernetes.io/nfs                               Retain          Immediate           true                   193d
nfs-client (default)   cluster.local/nfs-subdir-external-provisioner   Delete          Immediate           true                   12d

# Example of patching an applied object
kubectl patch storageclass managed-nfs-ssd -p '{"allowVolumeExpansion":true}'
kubectl patch storageclass managed-nfs-ssd -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' # Set storage class as default

# Editing deployment of dynamic nfs provisioning service pod
vim deployment.yaml

##############################################
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nfs-client-provisioner
  labels:
    app: nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: default
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: nfs-client-provisioner
  template:
    metadata:
      labels:
        app: nfs-client-provisioner
    spec:
      serviceAccountName: nfs-client-provisioner
      containers:
        - name: nfs-client-provisioner
          image: k8s.gcr.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2
          volumeMounts:
            - name: nfs-client-root
              mountPath: /persistentvolumes
          env:
            - name: PROVISIONER_NAME
              value: k8s-sigs.io/nfs-subdir-external-provisioner
            - name: NFS_SERVER
              value: X.X.X.X # change this value
            - name: NFS_PATH
              value: /nfs-share # change this value
      volumes:
        - name: nfs-client-root
          nfs:
            server: 192.168.100.93 # change this value
            path: /nfs-share # change this value
##############################################

# Creating nfs provisioning service pod
k create -f deployment.yaml

# Troubleshooting: example where the deployment was pending variables to be created by rbac.yaml
stoic@masternode: $ k describe deployments.apps nfs-client-provisioner
Name:               nfs-client-provisioner
Namespace:          default
CreationTimestamp:  Sat, 14 Aug 2021 00:09:24 +0000
Labels:             app=nfs-client-provisioner
Annotations:        deployment.kubernetes.io/revision: 1
Selector:           app=nfs-client-provisioner
Replicas:           1 desired | 0 updated | 0 total | 0 available | 1 unavailable
StrategyType:       Recreate
MinReadySeconds:    0
Pod Template:
  Labels:           app=nfs-client-provisioner
  Service Account:  nfs-client-provisioner
  Containers:
   nfs-client-provisioner:
    Image:      k8s.gcr.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2
    Port:       <none>
    Host Port:  <none>
    Environment:
      PROVISIONER_NAME:  k8s-sigs.io/nfs-subdir-external-provisioner
      NFS_SERVER:        X.X.X.X
      NFS_PATH:          /nfs-share
    Mounts:
      /persistentvolumes from nfs-client-root (rw)
  Volumes:
   nfs-client-root:
    Type:      NFS (an NFS mount that lasts the lifetime of a pod)
    Server:    X.X.X.X
    Path:      /nfs-share
    ReadOnly:  false
Conditions:
  Type             Status  Reason
  ----             ------  ------
  Progressing      True    NewReplicaSetCreated
  Available        False   MinimumReplicasUnavailable
  ReplicaFailure   True    FailedCreate
OldReplicaSets:    <none>
NewReplicaSet:     nfs-client-provisioner-7768c6dfb4 (0/1 replicas created)
Events:
  Type    Reason             Age    From                   Message
  ----    ------             ----   ----                   -------
  Normal  ScalingReplicaSet  3m47s  deployment-controller  Scaled up replica set nfs-client-provisioner-7768c6dfb4 to 1

# Get the default nfs storage class
echo $(kubectl get sc -o=jsonpath='{range .items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")]}{@.metadata.name}{"\n"}{end}')

##### OLD NOTES: Feel free the ignore the below chicken scratch #######

# The less-easy way: manually install the provisioner
git clone https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner/
cd nfs-subdir-external-provisioner/deploy

NS=$(kubectl config get-contexts|grep -e "^\*" |awk '{print $5}')
NAMESPACE=${NS:-default}
sed -i'' "s/namespace:.*/namespace: $NAMESPACE/g" ./rbac.yaml ./deployment.yaml
kubectl apply -f ./rbac.yaml

vim deployment.yaml
###
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nfs-client-provisioner
  labels:
    app: nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: default
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: nfs-client-provisioner
  template:
    metadata:
      labels:
        app: nfs-client-provisioner
    spec:
      serviceAccountName: nfs-client-provisioner
      containers:
        - name: nfs-client-provisioner
          image: quay.io/external_storage/nfs-client-provisioner:latest
          volumeMounts:
            - name: nfs-client-root
              mountPath: /persistentvolumes
          env:
            - name: PROVISIONER_NAME
              value: nfs-storage
            - name: NFS_SERVER
              value: 192.168.100.21
            - name: NFS_PATH
              value: /kubernetes
      volumes:
        - name: nfs-client-root
          nfs:
            server: 192.168.100.21
            path: /kubernetes

k apply -f deployment.yaml

vim class.yaml
######
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: managed-nfs-storage
provisioner: nfs-storage # or choose another name, must match deployment's env PROVISIONER_NAME'
parameters:
  pathPattern: "${.PVC.namespace}/${.PVC.annotations.nfs.io/storage-path}" # waits for nfs.io/storage-path annotation, if not specified will accept as empty string.
  onDelete: delete

# Create Persistent Volume Claim
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: test-claim
  annotations:
    nfs.io/storage-path: "test-path" # not required, depending on whether this annotation was shown in the storage class description
spec:
  storageClassName: managed-nfs-storage
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 1Gi

k apply -f class.yaml

kubectl create -f test-claim.yaml
kubectl create -f test-pod.yaml

How To Setup ClamAV Antivirus Scanner in Kubernetes

Assumptions:

  • A Kubernetes cluster is already setup
  • These are installed prior: Helm, MetalLB Load Balancer,
  • A static IP has already been excluded by the external DHCP server’s scope
  • Chosen IP also is within scope (IP Range) of the ConfigMap of metallb-system
# Installation
instanceName=clamav
helm repo add wiremind https://wiremind.github.io/wiremind-helm-charts
helm install $instanceName wiremind/clamav

# Set static IP for the service
appName=clamav
externalIPs=10.10.10.151
kubectl patch svc $appName -p '{"spec":{"externalIPs":["'$externalIPs'"]}}'

# Reverse static IP assignment
kubectl patch svc clamav -p '{"spec":{"externalIPs":[]}}'

# How to Uninstall
# helm uninstall clamav

# Application
# NextCloud's module can make use of this service: https://docs.nextcloud.com/server/latest/admin_manual/configuration_server/antivirus_configuration.html#configuring-clamav-on-nextcloud
Result:

rambo@masterbox:~$ helm install $instanceName wiremind/clamav
NAME: clamav
LAST DEPLOYED: Thu Jul 29 22:51:20 2021
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
1. To connect to your ClamAV instance from outside the cluster execute the following commands:
  export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=clamav,app.kubernetes.io/instance=clamav" -o jsonpath="{.items[0].metadata.name}")
  echo 127.0.0.1:3310
  kubectl port-forward $POD_NAME 3310:3310

bruce@masterbox:~$ k describe statefulsets.apps clamav 
Name:               clamav
Namespace:          default
CreationTimestamp:  Thu, 29 Jul 2021 22:51:22 +0000
Selector:           app.kubernetes.io/instance=clamav,app.kubernetes.io/name=clamav
Labels:             app.kubernetes.io/instance=clamav
                    app.kubernetes.io/managed-by=Helm
                    app.kubernetes.io/name=clamav
                    app.kubernetes.io/version=1.8
                    helm.sh/chart=clamav-2.0.0
Annotations:        meta.helm.sh/release-name: clamav
                    meta.helm.sh/release-namespace: default
Replicas:           1 desired | 1 total
Update Strategy:    RollingUpdate
  Partition:        0
Pods Status:        1 Running / 0 Waiting / 0 Succeeded / 0 Failed
Pod Template:
  Labels:  app.kubernetes.io/instance=clamav
           app.kubernetes.io/name=clamav
  Containers:
   clamav:
    Image:        mailu/clamav:1.8
    Port:         3310/TCP
    Host Port:    0/TCP
    Liveness:     tcp-socket :clamavport delay=300s timeout=1s period=10s #success=1 #failure=3
    Readiness:    tcp-socket :clamavport delay=90s timeout=1s period=10s #success=1 #failure=3
    Environment:  <none>
    Mounts:
      /data from clamav-data (rw)
  Volumes:
   clamav-data:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:     
    SizeLimit:  <unset>
Volume Claims:  <none>
Events:         <none>

kim@masterbox:~$ k describe service clamav 
Name:              clamav
Namespace:         default
Labels:            app.kubernetes.io/instance=clamav
                   app.kubernetes.io/managed-by=Helm
                   app.kubernetes.io/name=clamav
                   helm.sh/chart=clamav-2.0.0
Annotations:       meta.helm.sh/release-name: clamav
                   meta.helm.sh/release-namespace: default
Selector:          app.kubernetes.io/instance=clamav,app.kubernetes.io/name=clamav
Type:              ClusterIP
IP Families:       <none>
IP:                10.104.143.167
IPs:               10.104.143.167
Port:              clamavport  3310/TCP
TargetPort:        3310/TCP
Endpoints:         172.16.90.171:3310
Session Affinity:  None
Events:            <none>

PowerShell: Overcome Issues with Error 13932 in SCVMM When Refreshing Virtual Machines

Dealing with Clusters

# refreshCluster.ps1
# Function to refresh a cluster in VMM in anticipation of errors with unregistered SMB/CIFS shares

$clustername='cluster-101.kimconnect.com'
$runasAccount='domain\hyperv-admin'

function refreshCluster($clusterName,$runasAccount){    
  # Function to Register FileShare to a Cluster in SCVMM
  function registerFileShareToCluster{
    param(
      $clustername,
      $fileSharePath,
      $runasAccount
    )
    $ErrorActionPreference='Stop'
    try{
      Import-Module -Name "virtualmachinemanager"
      # Preempt this error
      # Error (26193)
      # No Run As account is associated with the host
      if($runasAccount){
        $runas = Get-SCRunAsAccount -Name $runasAccount
        $hostCluster = Get-SCVMHostCluster -Name $clustername
        Set-SCVmHostCluster -VMHostCluster $hostCluster -VMHostManagementCredential $runas
      }
      <# Got this error
      Set-SCVmHostCluster : A Hardware Management error has occurred trying to contact server
      :n:CannotProcessFilter :HRESULT 0x8033801a:No instance found with given property values. .
      WinRM: URL: [http://serverFQDN:5985], Verb: [INVOKE], Method: [AddToLocalAdminGroup], Resource:
      [http://schemas.microsoft.com/wbem/wsman/1/wmi/root/scvmm/AgentManagement]
      (Error ID: 2927, Detailed Error: Unknown error (0x8033801a))
  
      Check that WinRM is installed and running on server. For more information use the command
      "winrm helpmsg hresult" and http://support.microsoft.com/kb/2742275.
  
      To restart the job, run the following command:
      PS> Restart-Job -Job (Get-VMMServer localhost | Get-Job | where { $_.ID -eq })
      At line:1 char:1
      + Set-SCVmHostCluster -VMHostCluster $hostCluster -VMHostManagementCred ...
      + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          + CategoryInfo          : ReadError: (:) [Set-SCVMHostCluster], CarmineException
          + FullyQualifiedErrorId : 2927,Microsoft.SystemCenter.VirtualMachineManager.Cmdlets.SetHostClusterCmdlet
      #>
      <# preempt this error:
      Register-SCStorageFileShare : A parameter cannot be found that matches parameter name 'VMHostManagementCredential'.
      At line:1 char:87
      + ... ePath -VMHostCluster $hostCluster -VMHostManagementCredential $runasA ...
      +                                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~
          + CategoryInfo          : InvalidArgument: (:) [Register-SCStorageFileShare], ParameterBindingException
          + FullyQualifiedErrorId : NamedParameterNotFound,Microsoft.SystemCenter.VirtualMachineManager.Cmdlets.RegisterSCSt
        orageFileShareCmdlet
      #>
      Register-SCStorageFileShare -FileSharePath $fileSharePath -VMHostCluster $hostCluster
      <# This error can safely be ignored
      Error (26233)
      Capacity/Free space cannot be calculated for \\NAS\CIFSSHARE. Failed to retrieve information with Win32 error code 64.
      #>  
  
      # This snippet is to register the SMB server onto VMM as a resource. It's optional
      # $servername='servername'
      # $shareName='test'
      # $addedShare = Get-SCStorageFileShare -Name "$servername\$sharename"
      # Register-SCStorageFileShare -StorageFileShare $addedShare -VMHostCluster $hostCluster
      # Set-SCVMHostCluster -RunAsynchronously -VMHostCluster $hostCluster -VMHostManagementCredential $runas
      return $true
    }catch{
      write-warning $_
      return $false
    }
  
  }
  # $clustername='CLUSTER-9999'
  # $fileSharePaths=@(
  #   '\\FILESERVER01\SHARE01',
  #   '\\FILESERVER01\SHARE02'
  # )
  # $runasAccount='domain\hyperv-admin'
  # foreach($fileSharePath in $fileSharePaths){  
  #   $success=registerFileShareToCluster $clusterName $fileSharePath $runasAccount
  #   if($success){
  #     write-host "$fileSharePath added successfully" -ForegroundColor Green
  #   }else{
  #     write-host "$fileSharePath was NOT added" -ForegroundColor Yellow
  #   }
  # }
  
  #Add-PSSnapin Microsoft.SystemCenter.VirtualMachineManager
  Import-Module -Name "virtualmachinemanager"
  #$guestVMs=Get-ClusterResource -cluster $clustername|?{$_.resourcetype.name -eq 'virtual machine'}
  $guestVMs=Get-ClusterGroup -Cluster $clustername|?{$_.GroupType -eq 'VirtualMachine'}
  foreach ($vm in $guestVMs){
      #[string]$vmName=$_.OwnerGroup.Name
      $vmName=$vm.Name
      $status=((Get-SCVirtualMachine -Name $vmName).StatusString|out-string).trim()
      if($status -notmatch 'Stopped|Running' -and !(!$status)){
          try{            
            try{
              Read-SCVirtualMachine $vmName -EA Stop
              write-host "$vmName refresh initiated." -ForegroundColor Green
            }catch [Microsoft.VirtualManager.Utils.CarmineException]{
              $errorMessage=$_
                $smbPath=[regex]::match($errorMessage,'\\\\(.*)\\').Value
                if($smbPath){
                    write-host "Add this SMB/CIFS path the cluster: $smbPath"
                    $smbRegistered=registerFileShareToCluster $clustername $smbPath $runasAccount
                    if($smbRegistered){
                      $null=Refresh-VM -VM $vmName -RunAsynchronously -Force;
                      write-host "$vmName refresh initiated." -ForegroundColor Yellow
                      #$null=Read-SCVirtualMachine -vm $vmName -Force -RunAsynchronously; # This statement missed 'stopped' VMs
                    }else{
                      write-warning "Unable to register $smbPath"
                    }
                }else{
                  write-host $errorMessage
                }
            }
          }catch{
            write-host $_
          }
      }else{
          write-host "$vmName status $(if($status){$status}else{'Unknown'})." -ForegroundColor Gray         
      }
    }
}
refreshCluster $clustername $runasAccount

Dealing with Individual Hyper-V Hosts

# refreshHost.ps1
$hostName='hyperv-2000.kimconnect.com'
$runasAccount='domain\hyperv-admin'

function refreshHost($hostname,$runasAccount){
  
  # Sub-routine to add Share Path to Hyper-V Host
  function addFileSharePathToHost($hostName,$sharePath,$runasAccount){
    try{
      $vmHost = Get-SCVMHost -ComputerName $hostName -EA Stop
      Register-SCStorageFileShare -FileSharePath $sharePath -VMHost $vmHost -EA Stop
      return $true
    }catch [Microsoft.VirtualManager.Utils.CarmineException]{
      $errorMessage=$_
      if($errorMessage -like "*Error ID: 26193*"){
        $runas = Get-SCRunAsAccount -Name $runasAccount
        Set-SCVmHost -VMHost $vmHost -VMHostManagementCredential $runas
        Register-SCStorageFileShare -FileSharePath $sharePath -VMHost $vmHost
        return $true
      }else{
        write-warning "$errorMessage"
        return $false
      }
    }catch{
      write-warning $_
      return $false
    }
    #Set-SCVMHost -VMHost $vmHost -RunAsynchronously -BaseDiskPaths $sharePath #-VMPaths "C:\ProgramData\Microsoft\Windows\Hyper-V"
  }
  
  $unsupportedSharedFiles=Get-SCVMHost $hostname | Get-SCVirtualMachine | ? {$_.Status -eq 'UnsupportedSharedFiles'} | Select Name,State,VMHost
  foreach($vmName in $unsupportedSharedFiles.Name){
    try{
      Read-SCVirtualMachine $vmName -EA Stop
      write-host "$vmName refresh initiated." -ForegroundColor Green
    }catch [Microsoft.VirtualManager.Utils.CarmineException]{
      $errorMessage=$_
        $smbPath=[regex]::match($errorMessage,'\\\\(.*)\\').Value
        if($smbPath){
            write-host "Add this SMB/CIFS path the cluster: $smbPath"
            $smbRegistered=addFileSharePathToHost $hostname $smbPath $runasAccount
            if($smbRegistered){
              $null=Refresh-VM -VM $vmName -RunAsynchronously -Force;
              write-host "$vmName refresh initiated." -ForegroundColor Yellow
              #$null=Read-SCVirtualMachine -vm $vmName -Force -RunAsynchronously; # This statement missed 'stopped' VMs
            }else{
              write-warning "Unable to register $smbPath"
            }
        }else{
          write-host $errorMessage
        }
    }
  }  
}
refreshHost $hostName $runasAccount

Kubernetes – Pausing Applications by Scaling Deployments or Stateful Sets

# Pause application
kubectl scale deploy nextcloud --replicas=0
kubectl scale statefulsets nextcloud-db-postgresql --replicas=0
kubectl scale deploy pihole --replicas=0

# Resume application
kubectl scale deploy nextcloud --replicas=1
kubectl scale statefulsets nextcloud-db-postgresql --replicas=1
kubectl scale deploy pihole --replicas=1

# Alternate for deployments scaling
kubectl scale deploy -n default --replicas=0 --all
kubectl scale deploy -n default --replicas=1 --all

Installing VMWare Tools on Linux Guest Virtual Machines

Installation Process

# Installing VMWare Tools
mkdir /mnt/cdrom
mount /dev/cdrom /mnt/cdrom
cp /mnt/cdrom/VMwareTools-*.tar.gz /tmp/
cd /tmp
tar -zxvf VMwareTools-*.tar.gz
cd vmware-tools-distrib
./vmware-install.pl

Sample Output

admin@testlinux:/tmp/vmware-tools-distrib# ./vmware-install.pl 
open-vm-tools packages are available from the OS vendor and VMware recommends 
using open-vm-tools packages. See http://kb.vmware.com/kb/2073803 for more 
information.
Do you still want to proceed with this installation? [no] yes

INPUT: [yes]

Creating a new VMware Tools installer database using the tar4 format.

Installing VMware Tools.

In which directory do you want to install the binary files? 
[/usr/bin] 

INPUT: [/usr/bin]  default

What is the directory that contains the init directories (rc0.d/ to rc6.d/)? 
[/etc] 

INPUT: [/etc]  default

What is the directory that contains the init scripts? 
[/etc/init.d] 

INPUT: [/etc/init.d]  default

In which directory do you want to install the daemon files? 
[/usr/sbin] 

INPUT: [/usr/sbin]  default

In which directory do you want to install the library files? 
[/usr/lib/vmware-tools] 

INPUT: [/usr/lib/vmware-tools]  default

The path "/usr/lib/vmware-tools" does not exist currently. This program is 
going to create it, including needed parent directories. Is this what you want?
[yes] 

INPUT: [yes]  default

In which directory do you want to install the common agent library files? 
[/usr/lib] 

INPUT: [/usr/lib]  default

In which directory do you want to install the common agent transient files? 
[/var/lib] 

INPUT: [/var/lib]  default

In which directory do you want to install the documentation files? 
[/usr/share/doc/vmware-tools] 

INPUT: [/usr/share/doc/vmware-tools]  default

The path "/usr/share/doc/vmware-tools" does not exist currently. This program 
is going to create it, including needed parent directories. Is this what you 
want? [yes] 

INPUT: [yes]  default

The installation of VMware Tools 10.3.22 build-15902021 for Linux completed 
successfully. You can decide to remove this software from your system at any 
time by invoking the following command: "/usr/bin/vmware-uninstall-tools.pl".

Before running VMware Tools for the first time, you need to configure it by 
invoking the following command: "/usr/bin/vmware-config-tools.pl". Do you want 
this program to invoke the command for you now? [yes] 

INPUT: [yes]  default

Initializing...

Segmentation fault

Making sure services for VMware Tools are stopped.

Stopping VMware Tools services in the virtual machine:
   Guest operating system daemon:                                      done
   VMware User Agent (vmware-user):                                    done
   Unmounting HGFS shares:                                             done
   Guest filesystem driver:                                            done


The installation status of vmsync could not be determined. 
Skippinginstallation.

The installation status of vmci could not be determined. Skippinginstallation.

The installation status of vsock could not be determined. Skippinginstallation.


The installation status of vmxnet3 could not be determined. 
Skippinginstallation.

The installation status of pvscsi could not be determined. 
Skippinginstallation.

The installation status of vmmemctl could not be determined. 
Skippinginstallation.

The VMware Host-Guest Filesystem allows for shared folders between the host OS 
and the guest OS in a Fusion or Workstation virtual environment.  Do you wish 
to enable this feature? [no] 

INPUT: [no]  default

The vmxnet driver is no longer supported on kernels 3.3 and greater. Please 
upgrade to a newer virtual NIC. (e.g., vmxnet3 or e1000e)

The vmblock enables dragging or copying files between host and guest in a 
Fusion or Workstation virtual environment.  Do you wish to enable this feature?
[no] 

INPUT: [no]  default


Skipping configuring automatic kernel modules as no drivers were installed by 
this installer.

Do you want to enable Guest Authentication (vgauth)? Enabling vgauth is needed 
if you want to enable Common Agent (caf). [yes] 

INPUT: [yes]  default

Do you want to enable Common Agent (caf)? [no] 

INPUT: [no]  default

No X install found.


Skipping rebuilding initrd boot image for kernel as no drivers to be included 
in boot image were installed by this installer.

Generating the key and certificate files.
Successfully generated the key and certificate files.
The configuration of VMware Tools 10.3.22 build-15902021 for Linux for this 
running kernel completed successfully.

You must restart your X session before any mouse or graphics changes take 
effect.

To enable advanced X features (e.g., guest resolution fit, drag and drop, and 
file and text copy/paste), you will need to do one (or more) of the following:
1. Manually start /usr/bin/vmware-user
2. Log out and log back into your desktop session
3. Restart your X session.

Found VMware Tools CDROM mounted at /mnt/cdrom. Ejecting device /dev/sr0 ...
No eject (or equivilant) command could be located.
Eject Failed:  If possible manually eject the Tools installer from the guest 
cdrom mounted at /mnt/cdrom before canceling tools install on the host.
Enjoy,

--the VMware team

Uninstallation Process

# Uninstalling VMWare Tools
cd
rm /tmp/VMwareTools-*.tar.gz
rm -rf /tmp/vmware-tools-distrib

Kubernetes: How to Set Node Affinity

Kubernetes pods’ Quality of Service (QoS) can be controlled by setting node affinity. Here are some quick commands to perform this task:

# List nodes
kubectl get nodes

# Assign label to node
ssdNode=linux03
kubectl label nodes $ssdNode disktype=ssd

# Check node labels
kubectl get nodes --show-labels

# Add this value to a helm deployment
helm upgrade nextcloud-db bitnami/postgresql \
  --set primary.nodeSelector.disktype=ssd,postgresqlPassword=PASSWORDHERE,persistence.existingClaim=nextcloud-claim,persistence.size=100Gi,persistence.subPath='postgres',resources.requests.memory=8Gi,resources.requests.cpu=3500m

# Alternatively, this can be added to the deployment spec
apiVersion: v1
kind: Pod
metadata:
  name: nginx
  labels:
    env: test
spec:
  containers:
  - name: nginx
    image: nginx
    imagePullPolicy: IfNotPresent
  nodeSelector:
    disktype: ssd

More examples:

# Add node label
nodename=worker02
label=disktype
value=ssd
kubectl label nodes $nodename $label=$value

# Remove node label
nodename=worker02
label=disktype
kubectl label nodes $nodename $label-

# Show current labels
kubectl get nodes --show-labels

# How to use label using helm
appName=nextcloud
appRepo=nextcloud/nextcloud
helm upgrade $appName $appRepo \
  --set nodeSelector.disktype=ssd

# How to use label in pod yaml
apiVersion: v1
kind: Pod
metadata:
  name: nginx
  labels:
    env: test
spec:
  containers:
  - name: nginx
    image: nginx
    imagePullPolicy: IfNotPresent
  nodeSelector:
    disktype: ssd

# How to use label in deployment plans
apiVersion: apps/v1
kind: Deployment
metadata:
  name: streamer-v4
  labels:
    app: streamer-v4
spec:
  replicas: 2
  selector:
    matchLabels:
      app: streamer-v4
  template:
    metadata:
      labels:
        app: streamer-v4
    spec:
      containers:
      - name: streamer-v4
        image: nginx
        ports:
        - containerPort: 8880
      nodeSelector:
        disktype: ssd

# Generally, add this fragment to yaml files
spec:
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: disktype
            operator: In
            values:
            - ssd

Resolving a Corrupted Data Store in ESXi

Scenario:

A USB disk has been mounted as a data store and subsequently removed. This error would occur via the UI

SSH session into the console of ESXi host would yield some errors such as this:

Rescan complete, however some dead paths were not removed because they were in use by the system. Please use the 'storage core device world list' command to see the VMkernel worlds still using these paths.

A fix to that would to be ensure that guest VMs are no longer attached to the ‘dead’ data store. Once that is done, the physically detached USB drive would be automatically unmounted and and removed as a listed data store on a rescan. Also these two commands would be necessary:

/etc/init.d/hostd restart
/etc/init.d/vpxa restart

Although the data store has been removed, it’s stale pointer toward a physically removed USB device would still show vie the ls /dev/disks command. Thus, rebooting the ESXi host would be the last step to effectively remove a ‘dead’ USB mounted data store from the machine.

 

How To Install Rancher Onto a Kubernetes Cluster

Step 1: Add Rancher Repo into Helm

Source: https://rancher.com/docs/rancher/v2.x/en/installation/install-rancher-on-k8s/

# Add Rancher repo
helm repo add rancher-stable https://releases.rancher.com/server-charts/stable

# Create a namespace for Rancher
kubectl create namespace cattle-system

Step 2: Install CertManager

# Install the CustomResourceDefinition resources separately
kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/v1.0.4/cert-manager.crds.yaml

# Create the namespace for cert-manager
kubectl create namespace cert-manager

# Add the Jetstack Helm repository
helm repo add jetstack https://charts.jetstack.io

# Update your local Helm chart repository cache
helm repo update

# Install the cert-manager Helm chart
helm install \
  cert-manager jetstack/cert-manager \
  --namespace cert-manager \
  --version v1.0.4

Step 3: Install Rancher

hostname=rancher.kimconnect.com
helm install rancher rancher-stable/rancher \
  --namespace cattle-system \
  --set hostname=$hostname

# Ran into this problem
Error: chart requires kubeVersion: < 1.20.0-0 which is incompatible with Kubernetes v1.20.2

# Workaround: Install K3s instead of K8s
curl -sfL https://get.k3s.io | sh -
# OR
curl https://get.k3s.io | INSTALL_K3S_VERSION=v1.19.7+k3s1 sh - # Install a specific version

How to Deploy PiHole in a Kubernetes Cluster

Step 0: Create NFS Share

Ensure that the NFS share has been created with these setttings:
– NFS share name: pihole
– Client Access: world accessible (*) or allowed ingress from the Kubernetes nodes IPs and/or subnets (e.g. x.x.x.x/netmask)
– Privilege: Read/Write (rw)
– Other options: sync,no_subtree_check,no_root_squash,no_all_squash,insecure
– Shared directory ACL: 755 with entity ‘nobody:www-data‘ as owner of pihole share & files. As long as the NFS options are set correctly, it’s unnecessary to manually configure ACL’s

Step 1: Validate that NFS is accessible

# Include nfs client
sudo apt -y install nfs-common

# Set variables
nfsShare=pihole # assuming that the 'pihole' share has already been created on the server
nfsServer=192.168.1.21 # assuming NAS servername is resolved to its correct IP
mountPoint=/mnt/$nfsShare

# Test Mount, create/delete a file, and unmount
sudo mkdir $mountPoint
sudo mount -t nfs $nfsServer:/$nfsShare $mountPoint # Test mounting
sudo touch $mountPoint/test.txt
ls $mountPoint
sudo rm $mountPoint/test.txt
ls $mountPoint
sudo umount -f -l $mountPoint # Unmounting

Step 2: Create Storage Class ONLY if It Does Not Already Exist

# Create custom storage class - if it doesn't exist
storageClassName=nfs-class
nfsStorageClassExists=$(kubectl get storageclasses $storageClassName)
if [ -z "$nfsStorageClassExists" ]
then
    cat > $storageClassName.yaml <<EOF
class=nfs
storageClassName=$class-class
cat > $storageClassName.yaml <<EOF
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: $storageClassName
provisioner: kubernetes.io/$class
reclaimPolicy: Retain
allowVolumeExpansion: true
EOF
kubectl apply -f $storageClassName.yaml
else
    echo "Storage class $storageClassName already exists."
fi

Step 3: Create 2 Persistent Volumes

# Set variables
appName=pihole
pv1=$appName-pv
pv1Label=$appName
pv1Size=1.5Gi
storageClassName=nfs-class
nfsServer=192.168.1.21
nfs1=$appName

# Create pv
cat > $pv1.yaml << EOF
apiVersion: v1
kind: PersistentVolume
metadata:
  name: $pv1
  labels:
    directory: $pv1Label
spec:
  storageClassName: $storageClassName
  nfs: 
    path: /$nfs1 
    server: $nfsServer
  persistentVolumeReclaimPolicy: Retain
  capacity:
    storage: $pv1Size 
  accessModes:
  - ReadWriteMany 
EOF
kubectl apply -f $pv1.yaml

Step 4: Create 2 Persistent Volume Claims

appName=pihole
pvc1Label=$appName
pvc1=$appName-claim
pv1Size=1.5Gi
storageClassName=nfs-class

# Create pvc
cat > $pvc1.yaml << EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: $pvc1
spec:
  storageClassName: $storageClassName
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: $pvc1Size
  selector:
    matchLabels:
      directory: $pvc1Label      
EOF
kubectl apply -f $pvc1.yaml

Step 5: Implement MetalLB Load Balancer – If Not Already Exists

# Set strictARP, ipvs mode
kubectl get configmap kube-proxy -n kube-system -o yaml | \
sed -e "s/strictARP: false/strictARP: true/" | sed -e "s/mode: \"\"/mode: \"ipvs\"/" | \
kubectl apply -f - -n kube-system
 
# Apply the manifests provided by the author, David Anderson (https://www.dave.tf/) - an awesome dude
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/namespace.yaml
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/metallb.yaml
# On first install only
kubectl create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)"
 
# Customize for this system
ipRange=192.168.1.2-192.168.1.99
loadBalancerFile=metallb-config.yaml
cat > $loadBalancerFile << EOF
apiVersion: v1
kind: ConfigMap
metadata:
  namespace: metallb-system
  name: config
data:
  config: |
    address-pools:
    - name: default
      protocol: layer2
      addresses:
      - $ipRange
EOF
kubectl apply -f $loadBalancerFile

Step 6: Define Services

Services defined in this section is dependent on the metallb load balancer plugin. Moreover, the sticky sessions feature is important for HTTP services. There are two methods of setting sticky sessions: (1) ‘service.spec.sessionAffinity: ClientIP’ and (2) Ingress session affinity based on cookie with this sequence:

Create NGINX controller deployment
Create NGINX service
Create Ingress
Redirect hostname to the NGINX service external IP

For simplicity sake, we would use option (1) as the preferred method to achieve this client to node consistency of client sessions.

# Set variables
appName=pihole
serviceName=$appName-service
externalIp=192.168.1.50

# Generate tcp & udp services for pihole
cat > pihole-svc-udp.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: $appName-svc-udp
  annotations:
    metallb.universe.tf/address-pool: default
    metallb.universe.tf/allow-shared-ip: psk
spec:
  type: LoadBalancer
  loadBalancerIP: $externalIp
  # sessionAffinity: ClientIP
  # externalTrafficPolicy: Local # This is to preserve the client source IP
  ports:
    - port: 53
      protocol: UDP
      targetPort: dns-udp
      name: dns-udp
  selector:
    app: $appName
EOF
cat > pihole-svc-tcp.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: $appName-svc-tcp
  annotations:
    metallb.universe.tf/address-pool: default
    metallb.universe.tf/allow-shared-ip: psk
spec:
  type: LoadBalancer
  loadBalancerIP: $externalIp
  sessionAffinity: ClientIP # This is necessary for multi-replica deployments
  # externalTrafficPolicy: Local # This is to preserve the client source IP
  ports:
    - port: 80
      targetPort: http
      protocol: TCP
      name: http
    - port: 53
      targetPort: dns-tcp
      protocol: TCP
      name: dns-tcp
  selector:
    app: $appName
EOF
kubectl apply -f pihole-svc-tcp.yaml
kubectl apply -f pihole-svc-udp.yaml

Step 7: Pihole Config Map

cat > piholeConfigMap.yml <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
  name: pihole-env
  namespace: default
data:
  02-lan: |
    addn-hosts=/etc/pihole/lan.list
  adlist : |
    https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts 
    https://mirror1.malwaredomains.com/files/justdomains 
    http://sysctl.org/cameleon/hosts 
    https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist 
    https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt 
    https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt 
    https://hosts-file.net/ad_servers.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/Audio-fingerprint-pages/master/AudioFp.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/BarbBlock-filter-list/master/HOSTS.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/Canvas-fingerprinting-pages/master/Canvas.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/Canvas-Font-Fingerprinting-pages/master/Canvas.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/WebRTC-tracking/master/WebRTC.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/CKs-FilterList/master/HOSTS/Ads-tracker.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/CKs-FilterList/master/HOSTS/coinminer.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/CKs-FilterList/master/HOSTS/Malware.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/CKs-FilterList/master/filters/nsablocklist.txt 
    https://raw.githubusercontent.com/CHEF-KOCH/CKs-FilterList/master/uMatrix/CK's-uMatrix-FilterList.txt 
    http://phishing.mailscanner.info/phishing.bad.sites.conf 
    https://ransomwaretracker.abuse.ch/downloads/RW_DOMBL.txt 
    https://ransomwaretracker.abuse.ch/downloads/CW_C2_DOMBL.txt 
    https://ransomwaretracker.abuse.ch/downloads/LY_C2_DOMBL.txt 
    https://ransomwaretracker.abuse.ch/downloads/TC_C2_DOMBL.txt 
    https://ransomwaretracker.abuse.ch/downloads/TL_C2_DOMBL.txt 
    https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt 
    https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt 
    https://zerodot1.gitlab.io/CoinBlockerLists/list.txt 
    https://zerodot1.gitlab.io/CoinBlockerLists/list_browser.txt 
    https://zerodot1.gitlab.io/CoinBlockerLists/list_optional.txt 
    https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt 
    https://raw.githubusercontent.com/w13d/adblockListABP-PiHole/master/Spotify.txt 
    https://smokingwheels.github.io/Pi-hole/allhosts
  reglist: |
    ^(.+[-.])??adse?rv(er?|ice)?s?[0-9][-.]
    ^analytics?[-.]
    ^banners?[-.]
    ^count(ers?)?[0-9][-.] ^pixels?[-.]
    ^beacons?[0-9][-.]
    ^stat(s|istics)?[0-9][-.]
    ^telemetry[-.]
    ^track(ers?|ing)?[0-9]*[-.]
    ^traff(ic)?[-.]
    ^adim(age|g)s?[0-9][-.]
    ^adtrack(er|ing)?[0-9][-.]
    ^advert(s|is(ing|ements?))?[0-9][-_.]
    ^aff(iliat(es?|ion))?[-.]
    ^(.+[-.])??m?ad[sxv]?[0-9][-.]
    (^r[[:digit:]]+(.|-+)[[:alnum:]]+-+[[:alnum:]]+-+[[:alnum:]]+.)(googlevideo|gvt1).com$
EOF
kubectl apply -f piholeConfigMap.yml

Step 8: Deployment Plan

# Set variables
appName=pihole
replicas=1
imageSource=pihole/pihole:latest
pv1=$appName-volume
pvc1=$appName-claim
dns1=8.8.8.8
dns2=192.168.1.1
setIp=192.168.1.50
hostAlias1=pihole.kimconnect.com
hostAlias2=pihole
timeZone=America/Los_Angeles
adminPassword=nopassword # this only applies to first time deployment

# Create deployment file
cat > $appName.yaml << EOF
kind: Deployment
apiVersion: apps/v1
metadata:
  name: $appName
  labels:
    app: $appName
spec:
  replicas: $replicas
  strategy:
    type: Recreate
  selector: 
    matchLabels:
      app: $appName # This must be identical to the pod name (template label)
  template:    
    metadata:
      labels:
        app: $appName
        # name: $appName
    spec:
      # securityContext:
      #   runAsUser: 0
      #   runAsGroup: 0
      #   fsGroup: 0
      # hostNetwork: true
      affinity:
        podAntiAffinity: # this is an important constraint to ensure that each pod is scheduled on a different node to avoid problems with 'ports in use'
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
                - key: "app"
                  operator: In
                  values:
                  - $appName
            topologyKey: "kubernetes.io/hostname"
      restartPolicy: Always
      # hostAliases:
      #   - ip: $setIp
      #     hostnames:
      #     - "$hostAlias1"
      #   - ip: 127.0.0.1
      #     hostnames:
      #     - "$appName"
      # hostname: $appName
      containers:
      - name: $appName
        image: $imageSource
        securityContext:
          allowPrivilegeEscalation: true
          privileged: true
          capabilities:
            add:
              - NET_ADMIN
              - CHOWN
        imagePullPolicy: Always
        ports:
          - containerPort: 80
            name: http
          - containerPort: 53
            protocol: TCP
            name: dns-tcp
          - containerPort: 53
            protocol: UDP
            name: dns-udp
        lifecycle:
          postStart:
            exec:
              command: ["/bin/sh", "-c", "sleep 30 && chown pihole:www-data /etc/pihole/gravity.db"]
        env:
        - name: 'DNS1'
          value: '$dns1'
        - name: 'DNS2'
          value: '$dns2'  
        - name: TZ
          value: "$timeZone"
        - name: WEBPASSWORD
          value: "$adminPassword"
        volumeMounts:
        - name: $pv1
          mountPath: "/etc/pihole"
          subPath: "pihole"
        - name: $pv1
          mountPath: "/etc/dnsmasq.d"
          subPath: "dnsmasq"
        # - name: adlist
        #   mountPath: "/etc/pihole/adlists.list"
        #   subPath: "adlists.list"
        # - name: reglist
        #   mountPath: "/etc/pihole/regex.list"
        #   subPath: "regex.list"
        # - name: 02-lan
        #   mountPath: "/etc/dnsmasq.d/02-lan.conf"
        #   subPath: "02-lan.conf"
      volumes:
      - name: $pv1
        persistentVolumeClaim:
          claimName: $pvc1
      # - name: reglist
      #   configMap:
      #     name: pihole-env
      #     items:
      #     - key: reglist
      #       path: regex.list
      # - name: adlist
      #   configMap:
      #     name: pihole-env
      #     items:
      #     - key: adlist
      #       path: adlists.list
      # - name: 02-lan
      #   configMap:
      #     name: pihole-env
      #     items:
      #     - key: 02-lan
      #       path: 02-lan.conf
EOF
kubectl apply -f $appName.yaml

How to Change Pihole Password in Kubernetes

brucelee@controller:~$ k exec --stdin --tty pihole-75684d64cb-mzmq2 -- /bin/bash
root@pihole-75684d64cb-mzmq2:/# pihole -a -p
Enter New Password (Blank for no password): 
Confirm Password: 
  [✓] New password set

How to Scale Up or Down Replicas

# Note: pihole database file currently isn't meant for multi-access; thus, it's not advisable to set replicas higher than 1
brucelee@controller:~$ kubectl scale deployment pihole --replicas=1
deployment.apps/pihole scaled
brucelee@controller:~$ k get pod -o wide
NAME                           READY   STATUS        RESTARTS   AGE   IP              NODE      NOMINATED NODE   READINESS GATES
pihole-75684d64cb-mzmq2        1/1     Running       0          34h   172.16.90.221   linux03   <none>           <none>
pihole-75684d64cb-tnv74        0/1     Terminating   0          34h   <none>          linux02   <none>           <none>

Troubleshooting

# This setting has caused error: 'dnsmasq: failed to create listening socket for port 53: Address already in use' - it's not recommended for K8s clusters with the metallb load balancer.
deployment.spec.template.spec.hostNetwork: true
# How to view logs of a container: kubectl logs {containername}
# How to view logs of previously terminated container in a pod: kubectl logs {containername} {podname} --previous
dragoncoin@controller:~$ kubectl logs pihole-7d96dc7986-jc4tj -c pihole --previous
[s6-init] making user provided files available at /var/run/s6/etc...exited 0.
[s6-init] ensuring user provided files have correct perms...exited 0.
[fix-attrs.d] applying ownership & permissions fixes...
[fix-attrs.d] 01-resolver-resolv: applying... 
[fix-attrs.d] 01-resolver-resolv: exited 0.
[fix-attrs.d] done.
[cont-init.d] executing container initialization scripts...
[cont-init.d] 20-start.sh: executing... 
 ::: Starting docker specific checks & setup for docker pihole/pihole
Assigning random password: 7y_qSVCx

  [i] Installing configs from /etc/.pihole...
  [i] Existing dnsmasq.conf found... it is not a Pi-hole file, leaving alone!
  [✓] Copying 01-pihole.conf to /etc/dnsmasq.d/01-pihole.conf
chown: changing ownership of '/etc/pihole/pihole-FTL.conf': Operation not permitted
chown: cannot access '': No such file or directory
chmod: cannot access '': No such file or directory
chown: changing ownership of '/etc/pihole': Operation not permitted
chown: cannot access '/etc/pihole/dhcp.leases': No such file or directory
Converting DNS1 to PIHOLE_DNS_
Converting DNS2 to PIHOLE_DNS_
Setting DNS servers based on PIHOLE_DNS_ variable
::: Pre existing WEBPASSWORD found
DNSMasq binding to default interface: eth0
Added ENV to php:
			"PHP_ERROR_LOG" => "/var/log/lighttpd/error.log",
			"ServerIP" => "0.0.0.0",
			"VIRTUAL_HOST" => "0.0.0.0",
Using IPv4 and IPv6
::: Preexisting ad list /etc/pihole/adlists.list detected ((exiting setup_blocklists early))
https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts

dnsmasq: failed to create listening socket for port 53: Address already in use
::: Testing pihole-FTL DNS: [cont-init.d] 20-start.sh: exited 1.
[cont-finish.d] executing container finish scripts...
[cont-finish.d] done.
[s6-finish] waiting for services.
[s6-finish] sending all processes the TERM signal.
[s6-finish] sending all processes the KILL signal and exiting.

# Resolutions of errors above:
1. dnsmasq: failed to create listening socket for port 53: Address already in use => remove 'hostNetwork: true' in deployment
2. chown: changing ownership of '/etc/pihole/pihole-FTL.conf': Operation not permitted => chmod 777 for all files on the NFS directory, recursively
# Error when pihole cannot own gravity.db
Error, something went wrong!
While executing INSERT OT IGNORE: attempt to write a readonly database
Added 0 out of 1 domains

# View the ACL of files on the NAS
root@ovm:/export# stat -c "%U:%G %a %n" pihole/pihole/*
nobody:nogroup 600 pihole/pihole/custom.list
nobody:nogroup 644 pihole/pihole/dns-servers.conf
nobody:nogroup 644 pihole/pihole/GitHubVersions
nobody:nogroup 664 pihole/pihole/gravity.db
nobody:nogroup 644 pihole/pihole/list.1.raw.githubusercontent.com.domains
nobody:nogroup 644 pihole/pihole/localbranches
nobody:nogroup 644 pihole/pihole/local.list
nobody:nogroup 644 pihole/pihole/localversions
nobody:nogroup 777 pihole/pihole/migration_backup
nobody:nogroup 644 pihole/pihole/pihole-FTL.conf
nobody:nogroup 644 pihole/pihole/pihole-FTL.db
nobody:nogroup 666 pihole/pihole/setupVars.conf
nobody:nogroup 666 pihole/pihole/setupVars.conf.update.bak

# Check the logs again on the K8s controller
dragoncoin@controller:~$ k logs pihole-7d584d94b8-bnzcv -c pihole
chown: changing ownership of '/etc/pihole/pihole-FTL.conf': Operation not permitted
chown: cannot access '': No such file or directory
chmod: cannot access '': No such file or directory
chown: changing ownership of '/etc/pihole': Operation not permitted
chown: cannot access '/etc/pihole/dhcp.leases': No such file or directory
chown: changing ownership of '/etc/pihole/gravity.db': Operation not permitted

# Solution:
The proper fix is to set correct permissions on pihole files as shown below:
root    root      644  /etc/pihole/adlists.list
root    root      644  /etc/pihole/adlists.list.old
root    root      644  /etc/pihole/black.list
root    root      644  /etc/pihole/blacklist.txt
pihole  pihole    644  /etc/pihole/dhcp.leases
root    root      777  /etc/pihole/dnsmasq.d
root    root      644  /etc/pihole/dns-servers.conf
root    root      644  /etc/pihole/GitHubVersions
root    root      644  /etc/pihole/gravity.list
root    root      644  /etc/pihole/install.log
root    root      600  /etc/pihole/list.0.raw.githubusercontent.com.domains
root    root      600  /etc/pihole/list.1.mirror1.malwaredomains.com.domains
root    root      600  /etc/pihole/list.2.sysctl.org.domains
root    root      600  /etc/pihole/list.3.zeustracker.abuse.ch.domains
root    root      600  /etc/pihole/list.4.s3.amazonaws.com.domains
root    root      600  /etc/pihole/list.5.s3.amazonaws.com.domains
root    root      600  /etc/pihole/list.6.hosts-file.net.domains
root    root      600  /etc/pihole/list.7.dehakkelaar.nl.domains
root    root      600  /etc/pihole/list.8.gitlab.com.domains
root    root      644  /etc/pihole/list.preEventHorizon
root    root      644  /etc/pihole/localbranches
root    root      644  /etc/pihole/local.list
root    root      644  /etc/pihole/localversions
root    root      644  /etc/pihole/logrotate
root    root      644  /etc/pihole/macvendor.db
pihole  pihole    664  /etc/pihole/pihole-FTL.conf
pihole  pihole    644  /etc/pihole/pihole-FTL.db
root    root      644  /etc/pihole/pihole-FTL.db.bak
pihole  www-data  664  /etc/pihole/regex.list
root    root      644  /etc/pihole/setupVars.conf
root    root      644  /etc/pihole/setupVars.conf.update.bak
root    root      644  /etc/pihole/whitelist.txt

Unfortunately, username 'pihole' with id 999 may not exist on a NFS server - it could also be associated with another username. Also, by default, shares are owned by root upon container instantiation. In the case of pihole container, root has automatically chmod 644 /etc/pihole/gravity.db.

# workaround option (1) - Manual
# Thus, this is the previously improvised workaround, which is a manual process:
piholeShare=/export/pihole # OpenMediaVault share named 'pihole' would be mounted here
chmod 777 $piholeShare/pihole/gravity.db
# chmod 777 -R $piholeShare # Recursively set all files/folders with read/write permissions for everyone
# chown nobody:www-data -R $piholeShare # Set object owner as 'nobody' as it's the account to maquerade the NFS service

# workaround option (b) - Manual
# Enter the running container
containerName=pihole-5b68f98875-p7wgl
kubectl exec --stdin --tty $containerName -- /bin/bash

root@pihole:/# ls -la /etc/pihole/gravity.db
-rwxrwxrwx 1 pihole pihole 164777984 Feb  6 14:30 /etc/pihole/gravity.db
root@pihole:/# id pihole
uid=999(pihole) gid=999(pihole) groups=999(pihole)
root@pihole:/# chmod 777 /etc/pihole/gravity.db

# workaround option (c) - Automatic
Update: a better fix to this issue is to add a lifecycle into the POD deployment plan to execute a command after a pod has been generated. Note that a sleep timer is to delay execution of this command so that it runs after processes specified by the container 'entrypoint'. This is a workaround to asynchronous exec between entrypoint and lifecycle processes.
      containers:
        lifecycle:
          postStart:
            exec:
              command: ["/bin/sh", "-c", "sleep 30 && chown pihole:www-data /etc/pihole/gravity.db"]
# How to test name resolution toward the new Pihole DNS server
user1@workstation:~$ dig @192.168.1.50 google.com

; <<>> DiG 9.16.1-Ubuntu <<>> @192.168.1.50 google.com
; (1 server found)
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 12494
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 4096
;; QUESTION SECTION:
;google.com.			IN	A

;; ANSWER SECTION:
google.com.		246	IN	A	142.250.68.110

;; Query time: 4 msec
;; SERVER: 192.168.1.50#53(192.168.1.50)
;; WHEN: Mon Feb 01 23:49:40 PST 2021
;; MSG SIZE  rcvd: 55
Issue: DNS record pointing to Pihole IP address would show as 'Manually Blacklisted by Wildcard'

Resolution:
- Pihole a-record (e.g. pihole.kimconnect.com) pointing directly to the IP address of Pihole isn't allowed; although, http://ip.address would display a message to prompt users to go to http://ip.address/admin. Hence, users could use a direct URL toward the admin panel such as http://pihole.kimconnect.com/admin.
- Alternatively, a hard-code edit to file /etc/lighttpd/lighttpd.conf with this content would suffice:
  url.redirect = ("^/$" => "/admin")

Alternative Pihole Configuration – Retain Client Source IPs

Currently, MetalLb cannot combine TCP and UDP into the same service. Hence, we’ve had to create 2 services as detailed previously. Those 2 services have shared the same loadBalancerIP by invoking these properties: metallb.universe.tf/allow-shared-ip: $appName and externalTrafficPolicy: Local. Note that the $appName must match between the 2 services as well as the deployment app selector for this to work. Otherwise, one of the services will not be granted an external (shared) IP.

appName=pihole
piholeWebIp=192.168.1.51
piholeDnsIp=192.168.1.50

cat > $appName-svc-udp.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: $appName-svc-udp
  annotations:
    #metallb.universe.tf/address-pool: default
    metallb.universe.tf/allow-shared-ip: $appName
  labels:
    app: $appName
  managedFields:
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:status:
        f:loadBalancer:
          f:ingress: {}
    manager: controller
    operation: Update
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:metadata:
        f:labels:
          .: {}
          f:run: {}
      f:spec:
        f:ports:
          .: {}
          k:{"port":53,"protocol":"UDP"}:
            .: {}
            f:port: {}
            f:protocol: {}
            f:targetPort: {}
        f:selector:
          .: {}
          f:run: {}
        f:sessionAffinity: {}
        f:type: {}
    manager: kubectl-expose
    operation: Update
  - apiVersion: v1
    fieldsType: FieldsV1
    fieldsV1:
      f:spec:
        f:externalTrafficPolicy: {}
    manager: kubectl-patch
    operation: Update
  name: $appName-svc-udp
  namespace: default
spec:
  type: LoadBalancer
  loadBalancerIP: $piholeDnsIp
  clusterIP: 10.109.175.203 # this is an arbitrary address for the cluster, a required item
  clusterIPs:
  - 10.109.175.203
  externalTrafficPolicy: Local
  healthCheckNodePort: 32153
  ports:
  - nodePort: 31293
    port: 53
    protocol: UDP
    targetPort: dns-udp
    name: dns-udp
  selector:
    app: $appName
  sessionAffinity: None
status:
  loadBalancer:
    ingress:
    - ip: $piholeDnsIp
EOF
kubectl apply -f $appName-svc-udp.yaml

cat > $appName-svc-tcp.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: $appName-svc-tcp
  annotations:
    #metallb.universe.tf/address-pool: default
    metallb.universe.tf/allow-shared-ip: $appName
spec:
  type: LoadBalancer
  loadBalancerIP: $piholeDnsIp
  sessionAffinity: ClientIP # This is necessary for multi-replica deployments
  externalTrafficPolicy: Local
  ports:
    - port: 80
      targetPort: http
      protocol: TCP
      name: http
    - port: 53 
      # Transmission packets over 512 bytes (zone transfers, DNSSEC, and TXT records) would be switched over to TCP as there's no transfer limit with the TCP protocol. Hence, this 53/TCP port is required
      targetPort: dns-tcp
      protocol: TCP
      name: dns-tcp      
  selector:
    app: $appName
EOF
kubectl apply -f $appName-svc-tcp.yaml
# Sample Result:
brucelee@controller:~$ kubectl get service
NAME             TYPE           CLUSTER-IP       EXTERNAL-IP      PORT(S)                     AGE
kubernetes       ClusterIP      10.96.0.1        <none>           443/TCP                     19d
pihole-svc-tcp   LoadBalancer   10.109.87.95     192.168.1.50   
  80:32607/TCP,53:32411/TCP   16h
pihole-svc-udp   LoadBalancer   10.109.175.203   192.168.1.50   
  53:31293/UDP                16h

Kubernetes Container Deployment with NFS Persistent Volumes

Introduction:

Update: we have provided a practical application of the knowledge conveyed in this article with a new document on How to Deploy Pihole in Kubernetes here.

Now continuing with the contents of this blog…

One of the first questions for container storage provisioning is ‘why NFS? ‘ The answer to such inquiries is almost always ‘it depends.’ Here’s an overview of common storage protocols to determine the appropriate type for a Kubernetes system:

SMB/CIFS
– Abbreviation: SMB is Server Messaging Protocol/Common Internet File System
– Ports: 137/UDP  138/TCP  139/TCP 445/TCP
– Very chatty protocol – ideal for Windows environment
– More secured than NFS. Provides some security features
– Less scalable, ‘normal’ speed, and complex to setup (in Linux environments)

NFS
– Abbreviation: Network File System
– Ports: 111/TCP 111/UDP 2049/TCP 2049/UDP 1110/TCP 1110/UDP 4045/TCP 4045/UDP
– Less chatty – ideal for Linux environments
– Not a secured protocol – IP filtering is required as an access barrier as there are no username/password authentications
– Very scalable, fast, easy to setup

iSCSI
– Abbreviation: Internet Small Computer System Interface
– Ports: 860/TCP 860/UDP 3260/TCP 3260/UDP
– Less chatty – ideal for dedicated subnets for storage communcations
– Secured protocol – IP filtering as well as CHAP authentication (username/password)
– Less scalable, fast, more complex to setup (with networking knowledge necessary)

Step 0: Prepare Network File System (NFS) Share

There are many vendors of network storage appliances available on the market, many of which would support iSCSI, CIFS/SMB, NFS, FTP, SFTP, and even Rsync. Instructions on how to create a new NFS share would vary on each of those appliances.

In this example, we’re using OpenMediaVault (OVM), a derivative of FreeNAS with a difference in base OS of Debian Linux vs FreeBSD. Here’s a screenshot of a NFS share from within the OVM interface.

The important note here is that such a share would be set at the OS layer with Access Control List (ACL) of 750 (with nfs daemon as the owner) or 777 (world access) to enable read/write access. Moreover, NFS permissions would be RW,subtree_check,insecure with client access allowed from the subnet where the external IP of the Kubernetes cluster would ingress/egress. That would be the same network as the worker nodes (e.g. 192.168.80.0/24, an arbitrary private subnet to be configured in this lab for K8)

Step 1: Test the NFS Share Access On Each Node

# Install prerequisite on the Master node
sudo apt-get install nfs-common
 
# Set variables
nfsShare=test # assuming that the 'test' share has already been created on the server
nfsServer=NasServerNameOrIP
mountPoint=/mnt/test
sudo mkdir $mountPoint
sudo mount -t nfs $nfsServer:/$nfsShare $mountPoint

# Example of success
# Nothing, no feedback output from CLI

# Example of failure which will require fixing the file share on the NFS server
brucelee@controller:$ sudo mount -t nfs $nfsServer:/$nfsShare $mountPoint
mount.nfs: access denied by server while mounting FILESERVER:/test
# Create an index file to be used by NGINX - Do this only once
echo "NFS Persistent Volume Test in Kubernetes is Successful!" >> $mountPoint/index.html
cat $mountPoint/index.html

# Example of success
brucelee@controller:/mnt$ cat $mountPoint/index.html
NFS Persistent Volume Test in Kubernetes is Successful!

Step 2: Create Storage Class Name ‘nfs-class’

# Check storage classes - default installation of K8 will have no custom storage classes
kim@linux01:~$ kubectl get storageclasses
No resources found

# Create custom storage class - this will fail if nfs-class has already been manually created prior, which is a desired outcome.
storageClassName=nfs-class
cat > $storageClassName.yaml <<EOF
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: $storageClassName
provisioner: kubernetes.io/nfs
reclaimPolicy: Retain
allowVolumeExpansion: true
EOF
kubectl apply -f $storageClassName.yaml

Step 3: Create a Persistent Volume

# Set variables
pvName=test-nfs-volume
storageClassName=nfs-class
storageSize=100Gi
nfsServer=192.168.80.80
nfsShare=test

# Create yammal file
cat > $pvName.yaml << EOF
apiVersion: v1
kind: PersistentVolume
metadata:
  name: $pvName
spec:
  storageClassName: $storageClassName
  capacity:
    storage: $storageSize 
  accessModes:
  - ReadWriteMany 
  nfs: 
    path: /$nfsShare 
    server: $nfsServer
  persistentVolumeReclaimPolicy: Retain # Other options: Recycle = rm -rf /test/* , Delete = eteled
EOF

# Apply the thing
kubectl apply -f $pvName.yaml

Step 4: Create a Persistent Volume Claim

pvClaimName=test-nfs-claim
storageClassName=nfs-class
claimSize=100Gi
cat > $pvClaimName.yaml << EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: $pvClaimName
spec:
  storageClassName: $storageClassName
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: $claimSize
EOF
kubectl apply -f $pvClaimName.yaml

Step 5: Create Deployment Plan

# Set variables
deploymentName=test-nfs-deployment
replicas=2
appName=test
imageSource=nginx:alpine
containerPort=80
containerMountPath=/usr/share/nginx/html
pvName=test-nfs-volume
pvClaimName=test-nfs-claim

# Create deployment file
cat > $deploymentName.yaml << EOF
kind: Deployment
apiVersion: apps/v1
metadata:
  name: $deploymentName
spec:
  replicas: $replicas
  selector: 
    matchLabels:
      app: $appName # This must be identical to the pod name (template label)
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 1    
  template:    
    metadata:
      labels:
        app: $appName
    spec:
      hostNetwork: true # This allows a direct ingress to any node. When this value is set, the container must not be binding to ports that are in use by the worker nodes (e.g. 53/tcp 53/udp for dns)
      containers:
      - name: $appName
        image: $imageSource
        ports:
          - containerPort: $containerPort
            name: $appName
        volumeMounts:
          - mountPath: $containerMountPath
            name: $pvName # this must matches volume name
      volumes:
        - name: $pvName
          persistentVolumeClaim:
            claimName: $pvClaimName
EOF

# Apply deployment plan
kubectl apply -f $deploymentName.yaml

Step 6: Implement MetalLB Load Balancer

# Set strictARP, ipvs mode
kubectl get configmap kube-proxy -n kube-system -o yaml | \
sed -e "s/strictARP: false/strictARP: true/" | sed -e "s/mode: \"\"/mode: \"ipvs\"/" | \
kubectl apply -f - -n kube-system
 
# Apply the manifests provided by the author, David Anderson (https://www.dave.tf/) - an awesome dude
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/namespace.yaml
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/metallb.yaml
# On first install only
kubectl create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)"
 
# Sample output:
brucelee@controller:~$ kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/namespace.yaml
namespace/metallb-system created
brucelee@controller:~$ kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/metallb.yaml
podsecuritypolicy.policy/controller created
podsecuritypolicy.policy/speaker created
serviceaccount/controller created
serviceaccount/speaker created
clusterrole.rbac.authorization.k8s.io/metallb-system:controller created
clusterrole.rbac.authorization.k8s.io/metallb-system:speaker created
role.rbac.authorization.k8s.io/config-watcher created
role.rbac.authorization.k8s.io/pod-lister created
clusterrolebinding.rbac.authorization.k8s.io/metallb-system:controller created
clusterrolebinding.rbac.authorization.k8s.io/metallb-system:speaker created
rolebinding.rbac.authorization.k8s.io/config-watcher created
rolebinding.rbac.authorization.k8s.io/pod-lister created
daemonset.apps/speaker created
deployment.apps/controller created
brucelee@controller:~$ kubectl create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)"
secret/memberlist created
 
# Customize for this system
ipRange=192.168.1.80-192.168.1.89
loadBalancerFile=metallb-config.yaml
cat > $loadBalancerFile << EOF
apiVersion: v1
kind: ConfigMap
metadata:
  namespace: metallb-system
  name: config
data:
  config: |
    address-pools:
    - name: default
      protocol: layer2
      addresses:
      - $ipRange
EOF
kubectl apply -f $loadBalancerFile
 
# Sample output
brucelee@controller:~$ kubectl apply -f $fileName
configmap/config created

Step 7: Create a Service Cluster

serviceName=test-service
appName=test
nodePort=30000
containerPort=80
servicePort=80
cat > test-service.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: $serviceName
spec:
  type: LoadBalancer # Other options: ClusterIP, LoadBalancer
  selector:
    app: $appName # This name must match the template.metadata.labels.app value
  ports:
  - protocol: TCP
    port: $servicePort
    targetPort: $containerPort
    # nodePort: $nodePort # optional field: by default, Kubernetes control plane will allocate a port from 30000-32767 range
EOF
kubectl apply -f test-service.yaml
clusterIP=$(kubectl get service test-service --output yaml|grep 'clusterIP: '|awk '{print $2}')
echo "clusterIP: $clusterIP"
curl $clusterIP
kubectl get service test-service

Troubleshooting

A) Pod stuck in ContainerCreating status

brucelee@controller:~$ k get pod
NAME                                   READY   STATUS              RESTARTS   AGE
test-nfs-deployment-54b78bc4c6-4pdz8   0/1     ContainerCreating   0          86s
test-nfs-deployment-54b78bc4c6-sgbw8   0/1     ContainerCreating   0          86s

brucelee@controller:~$ kubectl describe pods
--- Truncated for brevity ---
Events:
  Type     Reason       Age                From               Message
  ----     ------       ----               ----               -------
  Normal   Scheduled    4m9s               default-scheduler  Successfully assigned default/test-nfs-deployment-54b78bc4c6-sgbw8 to linux03
  Warning  FailedMount  2m6s               kubelet            Unable to attach or mount volumes: unmounted volumes=[test-nfs-volume], unattached volumes=[test-nfs-volume default-token-bdhxv]: timed out waiting for the condition
  Warning  FailedMount  2m (x9 over 4m8s)  kubelet            MountVolume.SetUp failed for volume "test-nfs-volume" : mount failed: exit status 32
Mounting command: mount
Mounting arguments: -t nfs 192.168.100.21:/test /var/lib/kubelet/pods/8aa113c6-1b1e-4329-ad37-f9f04fd72e78/volumes/kubernetes.io~nfs/test-nfs-volume
Output: mount: /var/lib/kubelet/pods/8aa113c6-1b1e-4329-ad37-f9f04fd72e78/volumes/kubernetes.io~nfs/test-nfs-volume: bad option; for several filesystems (e.g. nfs, cifs) you might need a /sbin/mount.<type> helper program.

Resolution:
- Check on the NFS server share to ensure that its been set with RW,insecure and such folder has been set with at least 750 permissions (777 preferred)
- Check the Pod Deployment template:spec:hostNetwork: true has been set
- Run this on each node: sudo apt-get install nfs-common

B) Error when the name of spec:containers:volumeMounts.name doesn't match spec:volumes:name

The Deployment is invalid: spec.template.spec.containers[0].volumeMounts[0].name: Not found:

C) Error when no storage class 'nfs-class' has NOT been defined

brucelee@controller:~$ k describe persistentvolumeclaims
Name:          test-nfs-claim
Namespace:     default
StorageClass:  nfs
Status:        Pending
Volume:        
Labels:        <none>
Annotations:   <none>
Finalizers:    [kubernetes.io/pvc-protection]
Capacity:      
Access Modes:  
VolumeMode:    Filesystem
Used By:       test-nfs-deployment-6d4bff899f-5t2m2
               test-nfs-deployment-6d4bff899f-kds6l
Events:
  Type     Reason              Age                      From                         Message
  ----     ------              ----                     ----                         -------
  Warning  ProvisioningFailed  4m21s (x763 over 3h14m)  persistentvolume-controller  storageclass.storage.k8s.io "nfs" not found

Step 8: Cleanup

# Cleanup: must be in the correct sequence!
kubectl delete services test-service
kubectl delete deployment test-nfs-deployment
kubectl delete persistentvolumeclaims test-nfs-claim
kubectl delete pv test-nfs-volume

How to Run OpenMediaVault with Pass-through Disks in VMWare ESXi

Assumptions:

1. The ESXi host in this lab is available with three (3) 1TB hard drives intended to be provisioned toward a NAS guest VM
2. There’s no data on any of the three disks referenced above – or any data on those devices have been approved for permanent deletion

Step 1: Install OpenMediaVault as a Guest VM

  • Download for the latest version of OpenMediaVault from this list: https://sourceforge.net/projects/openmediavault/files/
  • Access ESXi UI to add a new VM with Debian AMD64 as base OS type
    Install the OS/App using the wizard
  • Update the OS: sudo apt update -y && sudo apt upgrade -y
  • Note that the default credential is: admin / openmediavault
  • Change OpenMediaVault password with this command: omv-firstaid

Step 2: Remove Existing Disks (If necessary)

The following instruction is meant to work on any storage controller, regardless whether the host hardware would support “SCSI Inquiry Vital Data Product (VPD) page code 0x83” (source: https://kb.vmware.com/s/article/1017530). As long as ESXi has recognized the disks, then the ESXi OS would handle the hardware abstraction layer (HAL) to provide direct pointer access to attached disks from a guest virtual machine (VM).

# Check the list of mounted devices
[root@esx2:~] esxcli storage vmfs extent list
Volume Name  VMFS UUID                            Extent Number  Device Name                                                               Partition
-----------  -----------------------------------  -------------  ------------------------------------------------------------------------  ---------
ESX2-1TB     6009f013-14225b60-137f-309c2369d30b              0  t10.ATA_____Samsung_SSD_860_EVO_1TB_________________S3Z8NB0KB46308J_____          1
ESX2-500GB   6009f07e-d6fb7a24-a1e8-309c2369d30b              0  t10.ATA_____Samsung_SSD_850_PRO_512GB_______________S250NX0H835192J_____          1

# Remove a device
deviceId=t10.ATA_____Samsung_SSD_860_EVO_1TB_________________S3Z8NB0KB46308J_____
esxcli storage core device detached remove -d $deviceId

[root@esx2:~] esxcli storage filesystem list
Mount Point                                        Volume Name  UUID                                 Mounted  Type            Size          Free
-------------------------------------------------  -----------  -----------------------------------  -------  ------  ------------  ------------
/vmfs/volumes/6009f013-14225b60-137f-309c2369d30b  ESX2-1TB     6009f013-14225b60-137f-309c2369d30b     true  VMFS-6  999922073600  998307266560
/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b  ESX2-500GB   6009f07e-d6fb7a24-a1e8-309c2369d30b     true  VMFS-6  511906414592  288623689728
/vmfs/volumes/bc795aee-3daf7d3d-ae7e-941f7427eeef               bc795aee-3daf7d3d-ae7e-941f7427eeef     true  vfat       261853184     105029632
/vmfs/volumes/262e2ce4-b8bb291a-491d-247b065bf27f               262e2ce4-b8bb291a-491d-247b065bf27f     true  vfat       261853184     261849088
/vmfs/volumes/601254aa-7b7f2998-78ed-309c2369d30b               601254aa-7b7f2998-78ed-309c2369d30b     true  vfat       299712512      92643328

# Unmount a volume
volumeName=ESX2-1TB
esxcli storage filesystem unmount -l $volumeName

[root@esx2:~] esxcli storage filesystem unmount -l $volumeName
Volume 'ESX2-1TB' cannot be unmounted. Reason: Busy

# Change the scratch location
# vim-cmd hostsvc/advopt/update ScratchConfig.ConfiguredScratchLocation string /tmp # use this option to place swap onto the same device as ESXi OS
newSwapDirectory=/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b/swap
mkdir $newSwapDirectory
vim-cmd hostsvc/advopt/update ScratchConfig.ConfiguredScratchLocation string $newSwapDirectory
# Restart from cli
reboot

# Sample output
[root@esx2:~] vim-cmd hostsvc/advopt/view ScratchConfig.ConfiguredScratchLocation
(vim.option.OptionValue) [
   (vim.option.OptionValue) {
      key = "ScratchConfig.ConfiguredScratchLocation", 
      value = "/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b/.locker"
   }
]
[root@esx2:~] vim-cmd hostsvc/advopt/update ScratchConfig.ConfiguredScratchLocation string /tmp
[root@esx2:~] vim-cmd hostsvc/advopt/view ScratchConfig.ConfiguredScratchLocation
(vim.option.OptionValue) [
   (vim.option.OptionValue) {
      key = "ScratchConfig.ConfiguredScratchLocation", 
      value = "/tmp"
   }
]

# List directories and total sizes
[root@esx2:~] df -h
Filesystem   Size   Used Available Use% Mounted on
VMFS-6     476.8G 209.0G    267.7G  44% /vmfs/volumes/ESX2-500GB
VMFS-6       0.0B   0.0B      0.0B   0% /vmfs/volumes/ESX2-1TB
vfat       249.7M 149.6M    100.2M  60% /vmfs/volumes/bc795aee-3daf7d3d-ae7e-941f7427eeef
vfat       249.7M   4.0K    249.7M   0% /vmfs/volumes/262e2ce4-b8bb291a-491d-247b065bf27f
vfat       285.8M 197.5M     88.4M  69% /vmfs/volumes/601254aa-7b7f2998-78ed-309c2369d30b
[root@esx2:~] du -sh *
4.0K	altbootbank
127.0M	bin
4.0K	bootbank
336.0K	bootpart.gz
224.0K	bootpart4kn.gz
6.0T	dev
14.0M	etc
40.4M	lib
173.5M	lib64
12.0K	local.tgz
4.0K	locker
116.0K	mbr
136.0K	opt
2.3M	proc
4.0K	productLocker
4.0K	sbin
4.0K	scratch
4.0K	store
627.2M	tardisks
4.0K	tardisks.noauto
1.7M	tmp
260.7M	usr
16.4M	var
209.4G	vmfs
12.0K	vmimages
4.0K	vmupgrade

# Unmount the same volume after reconnection
root@esx2:~] volumeName=ESX2-1TB
[root@esx2:~] esxcli storage filesystem unmount -l $volumeName
# no errors and no outputs

# Remove device
deviceId=t10.ATA_____Samsung_SSD_860_EVO_1TB_________________S3Z8NB0KB46308J_____
esxcli storage core device set --state=off -d $deviceId

# Verify that the specified Device ID is now missing from the list
[root@esx2:~] esxcli storage filesystem list
Mount Point                                        Volume Name  UUID                                 Mounted  Type            Size          Free
-------------------------------------------------  -----------  -----------------------------------  -------  ------  ------------  ------------
/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b  ESX2-500GB   6009f07e-d6fb7a24-a1e8-309c2369d30b     true  VMFS-6  511906414592  287459770368
/vmfs/volumes/bc795aee-3daf7d3d-ae7e-941f7427eeef               bc795aee-3daf7d3d-ae7e-941f7427eeef     true  vfat       261853184     105025536
/vmfs/volumes/262e2ce4-b8bb291a-491d-247b065bf27f               262e2ce4-b8bb291a-491d-247b065bf27f     true  vfat       261853184     261849088
/vmfs/volumes/601254aa-7b7f2998-78ed-309c2369d30b               601254aa-7b7f2998-78ed-309c2369d30b     true  vfat       299712512      92643328

# Rescan all disks
esxcli storage core adapter rescan --all

# Reattach disk to storage core
deviceId=t10.ATA_____Samsung_SSD_860_EVO_1TB_________________S3Z8NB0KB46308J_____
esxcli storage core device set --state=on -d $deviceId
esxcli storage core adapter rescan --all
esxcli storage filesystem list

Step 3: Map Physical Disks to Guest VM

# Map four physical disks as vmdk files

# Check for available disks
ls -l /vmfs/devices/disks

# List existing volumes
ls -l /vmfs/volumes



# Configure the devices as an RDM's and generate the RDM pointer file at user-defined locations
# Map a device (physical disk) as a virtual disk
devicePath0=/vmfs/devices/disks/t10.ATA_____Samsung_SSD_840_EVO_1TB_________________S1D9NEAD705175N_____
targetVmdk0=/vmfs/volumes/604ef4d8-b2bcec6c-36bd-309c2369d30b/OpenMediaVault/nas_disk4.vmdk
vmkfstools -z $devicePath0 $targetVmdk0

targetVmdk1=/vmfs/volumes/604ef4d8-b2bcec6c-36bd-309c2369d30b/OpenMediaVault/nas_disk1.vmdk
deviceId1=t10.ATA_____Samsung_SSD_860_EVO_1TB_________________S3Z8NB0KB46308J_____
vmkfstools -z /vmfs/devices/disks/$deviceId1 $targetVmdk1
 
targetVmdk2=/vmfs/volumes/604ef4d8-b2bcec6c-36bd-309c2369d30b/OpenMediaVault/nas_disk2.vmdk
deviceId2=t10.ATA_____Samsung_SSD_870_QVO_1TB_________________S5VSNG0NA05357H_____
vmkfstools -z /vmfs/devices/disks/$deviceId2 $targetVmdk2
 
targetVmdk3=/vmfs/volumes/604ef4d8-b2bcec6c-36bd-309c2369d30b/OpenMediaVault/nas_disk3.vmdk
deviceId3=t10.ATA_____Samsung_SSD_870_QVO_1TB_________________S5VSNJ0NC00894D_____
vmkfstools -z /vmfs/devices/disks/$deviceId3 $targetVmdk3

[root@esx2:~] vim-cmd vmsvc/getallvms
Vmid           Name                                      File                                  Guest OS       Version   Annotation
1      OpenMediaVault         [ESX2-500GB] OpenMediaVault/OpenMediaVault.vmx               debian10_64Guest   vmx-15              
2      linux03                [ESX2-500GB] linux03/linux03.vmx                             ubuntu64Guest      vmx-15              
3      UbuntuServerTemplate   [ESX2-500GB] UbuntuServerTemplate/UbuntuServerTemplate.vmx   ubuntu64Guest      vmx-15

# Attach disk to guest VM
vmid=1
disk1=/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b/OpenMediaVault/nas_disk1.vmdk
disk2=/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b/OpenMediaVault/nas_disk2.vmdk
disk3=/vmfs/volumes/6009f07e-d6fb7a24-a1e8-309c2369d30b/OpenMediaVault/nas_disk3.vmdk
scsiControllerNumber=0
availableScsiSlot=1
vim-cmd vmsvc/device.diskaddexisting $vmid $disk1 0 1
vim-cmd vmsvc/device.diskaddexisting $vmid $disk1 $scsiControllerNumber $availableScsiSlot
vim-cmd vmsvc/device.diskaddexisting $vmid $disk2 $scsiControllerNumber $(($availableScsiSlot+1))
vim-cmd vmsvc/device.diskaddexisting $vmid $disk3 $scsiControllerNumber $(($availableScsiSlot+2))

# The above CLI commands don't work
[root@esx2:~] vim-cmd vmsvc/device.diskaddexisting $vmid $disk1 $scsiControllerNumber $availableScsiSlot
Reconfigure failed

# Use the GUI method to attach existing disks
Right click the virtual machine > Edit Settings > Add > Hard Disk > Use an existing virtual disk > Browse to the directory you saved the RDM pointer > select the RDM pointer file > Next > Repeat for additional pointers > click Finish to save changes

Configure Networking

# Check for network interfaces
root@MediaServer:/home/rambo# ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens192: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
    link/ether 00:0c:29:20:47:32 brd ff:ff:ff:ff:ff:ff
 
# Configure networking
cat > /etc/netplan/ens192.yaml <<EOF
### Sample content ###
network:
  version: 2
  renderer: networkd
  ethernets:
    ens192:
      dhcp4: false
      addresses: [10.10.10.###/24]
      gateway4: 10.10.10.1
      nameservers:
        addresses: [8.8.8.8,1.1.1.1]
#######################
EOF
 
# Apply new network configuration
sudo netplan apply

Step by Step Kubernetes Application Hosting: Persistent Storage, Pod Creation, Deployment Plan, Service Clustering, and Ingress Routing

Overview

Update: we have a new article which goes directly into a production-style setup of Kubernetes containers using Network File System (NFS) Persistent Volume. Using K8 with local storage, as illustrated in this posting, is reserved for edge cases as such deployment sacrifices versatility & robustness of a K8 system. Network storage is definitely recommended over local mounts.

This walk-through is intended as a practical demonstration of an application (App) deployment using Kubernetes. To convert this guide into Production-grade practice, it’s necessary to add Secret (tls.crt, tls.key), external storage, and production-grade load balancer components. Explaining all the intricacies of the underlying technologies would require a much longer article; thus, we will just dive straight into the codes with sporadic commentaries.

Assumptions for lab environment:

a. There already exists a Kubernetes cluster.
b. The cluster consists of these nodes: linux01 & linux02
c. Each node is running Ubuntu 20.04 LTS with setup instructions given here.
d. A local mount named /data has been set on the worker node, linux02. A how-to is written here.
e. These are the roles and IPs of nodes:
linux01: controller, 192.168.100.91
linux02: worker, 192.168.100.92
f. The controller node has Internet connectivity to download Yaml files for configurations and updates.
g. The ingress-nginx controller has been added to the cluster as detailed in the article link above (part 2: optional components).
h. This instruction shall include a ‘bare-metal’ load balancer installation. Although, a separate load balancer outside of the Kubernetes cluster is recommended for production.
i. A client computer with an Internet browser or BASH

Here are the steps to host an App in this Kubernetes cluster lab:

1. Setup a Persistent Volume
2. Create a Persistent Volume Claim
3. Provision a Pod
4. Apply a Deployment Plan
5. Implement MetalLB Load Balancer
6. Create a Service Cluster
7. Generate an Ingress Route for the Service
8. Add the SSL Secret component 
9. Miscellaneous Discovery & Troubleshooting

The general work-flow is divided into part (a) generate a shell script on the worker node linux02, and part (b) paste the code into the master node, linux01. This sequence is to be repeated at the subsequent steps number until completion.

View of a Finished Product

kim@linux01:~$ k get all -o wide
NAME                                   READY   STATUS    RESTARTS   AGE    IP               NODE      NOMINATED NODE   READINESS GATES
pod/test-deployment-7d4cc6df47-4z2mr   1/1     Running   0          146m   192.168.100.92   linux02   <none>           <none>

NAME                      TYPE           CLUSTER-IP       EXTERNAL-IP      PORT(S)        AGE     SELECTOR
service/kubernetes        ClusterIP      10.96.0.1        <none>           443/TCP        6d6h    <none>
service/test-deployment   ClusterIP      10.96.244.250    <none>           80/TCP         2d23h   app=test
service/test-service      LoadBalancer   10.111.131.202   192.168.100.80   80:30000/TCP   120m    app=test

NAME                              READY   UP-TO-DATE   AVAILABLE   AGE    CONTAINERS   IMAGES         SELECTOR
deployment.apps/test-deployment   1/1     1            1           146m   test         nginx:alpine   app=test

NAME                                         DESIRED   CURRENT   READY   AGE    CONTAINERS   IMAGES         SELECTOR
replicaset.apps/test-deployment-7d4cc6df47   1         1         1       146m   test         nginx:alpine   app=test,pod-template-hash=7d4cc6df47
Step 1: Setup a Persistent Volume

There are many storage classes available for integration with Kubernetes (https://kubernetes.io/docs/concepts/storage/storage-classes/). For purposes of this demo, we’re using a local mount on the node named linux02. Assuming that the mount has been set as /data, we would run this script on linux02 to generate a yaml file containing instructions for a creation of a Peristent Volume in the cluster. After the script has been generated at the shell console output of linux02, it is to be copied and pasted onto the terminal console of the Kubernetes controller, linux01.

1a: generate script on worker node

# Set variables
mountPoint=/data

# Set permissions
# sudo chcon -Rt svirt_sandbox_file_t $mountPoint # SELinux: enable Kubernetes virtual volumes - only on systems with SELinux enabled
# setsebool -P virt_use_nfs 1 # SELinux: enable nfs mounts
sudo chmod 777 $mountSource

# This would occur if SELinux isn't running
# kim@linux02:~$ sudo chcon -Rt svirt_sandbox_file_t $mountPoint
# chcon: can't apply partial context to unlabeled file 'lost+found'
# chcon: can't apply partial context to unlabeled file '/data'

# create a dummy index.html
echo "linux02 Persistent Volume Has Successfully Set In Kubernetes!" >> $mountPoint/index.html

# Generate the yaml file creation script
mountSource=$(findmnt --mountpoint $mountPoint | tail -1 | awk '{print $2}')
availableSpace=$(df $mountPoint --output=avail | tail -1 | sed 's/[[:blank:]]//g')
availbleInGib=`expr $availableSpace / 1024 / 1024`
echo "Mount point $mountPoint has free disk space of $availableSpace KB or $availbleInGib GiB"
hostname=$(cat /proc/sys/kernel/hostname)
pvName=$hostname-local-pv
storageClassName=$hostname-local-volume
pvFileName=$hostname-persistentVolume.yaml
echo "Please paste the below contents on the master node"
echo "# Step 1: Setup a Persistent Volume
cat > $pvFileName << EOF
apiVersion: v1
kind: PersistentVolume
metadata:
  name: $pvName
spec:
  capacity:
    # expression can be in the form of an integer or a human readable string (1M=1000KB vs 1Mi=1024KB)
    storage: $availableSpace
  accessModes:
  - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: $storageClassName
  local:
    path: $mountPoint
  nodeAffinity:
    required:
      nodeSelectorTerms:
      - matchExpressions:
        - key: kubernetes.io/hostname
          operator: In
          values:
          - $hostname
EOF
kubectl create -f $pvFileName"

1b: Resulting Script to be executed on the Master Node

# Step 1: Setup a Persistent Volume
cat > linux02-persistentVolume.yaml << EOF
apiVersion: v1
kind: PersistentVolume
metadata:
  name: linux02-local-pv
spec:
  capacity:
    # expression can be in the form of an integer or a human readable string (1M=1000KB vs 1Mi=1024KB)
    storage: 243792504
  accessModes:
  - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: linux02-local-volume
  local:
    path: /data
  nodeAffinity:
    required:
      nodeSelectorTerms:
      - matchExpressions:
        - key: kubernetes.io/hostname
          operator: In
          values:
          - linux02
EOF
kubectl create -f linux02-persistentVolume.yaml

1c: Expected Results

kim@linux01:~$ kubectl create -f linux02-persistentVolume.yaml
persistentvolume/linux02-local-pv created
Step 2: Create a Persistent Volume Claim

2a: generate script on worker node

# Generate the Persistent Volume Claim Script
pvcClaimScriptFile=$hostname-persistentVolumeClaim.yaml
pvClaimName=$hostname-claim
echo "# Step 2. Create a Persistent Volume Claim
cat > $pvcClaimScriptFile << EOF
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: $pvClaimName
spec:
  accessModes:
  - ReadWriteOnce
  storageClassName: $storageClassName
  resources:
    requests:
      storage: $availableSpace
EOF
kubectl create -f $pvcClaimScriptFile
kubectl get pv"

2b: Resulting Script to be executed on the Master Node

# Step 2: Create a Persistent Volume Claim
cat > linux02-persistentVolumeClaim.yaml << EOF
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: linux02-claim
spec:
  accessModes:
  - ReadWriteOnce
  storageClassName: linux02-local-volume
  resources:
    requests:
      storage: 243792504
EOF
kubectl create -f linux02-persistentVolumeClaim.yaml
kubectl get pv

2c: Expected Results

kim@linux01:~$ kubectl create -f linux02-persistentVolumeClaim.yaml
persistentvolumeclaim/linux02-claim created

kim@linux01:~$ kubectl get pv
NAME               CAPACITY    ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                   STORAGECLASS           REASON   AGE
linux02-local-pv   243792504   RWO            Retain           Bound    default/linux02-claim   linux02-local-volume            8m26s
Step 3: Provision a Pod (to be deleted in favor of Deployment Plan later)

3a: generate script on worker node

# Optional: Generate the Test Pod Script
podName=testpod # must be in lower case
image=nginx:alpine
exposePort=80
echo "$hostname Persistent Volume Has Successfully Set In Kubernetes!" > $mountPoint/index.html
echo "# Step 3. Provision a Pod
cat > $hostname-$podName.yaml << EOF
apiVersion: v1
kind: Pod
metadata:
  name: $podName
  labels:
    name: $podName
spec:
  containers:
  - name: $podName
    image: $image
    ports:
      - containerPort: $exposePort
        name: $podName
    volumeMounts:
      - name: $pvName
        mountPath: /usr/share/nginx/html
  volumes:
    - name: $pvName
      persistentVolumeClaim:
        claimName: $pvClaimName
EOF
kubectl create -f $hostname-$podName.yaml
# Wait a few moments for pods to generate and check results
kubectl get pods -o wide
#
#
#
# Remove pods in preparation for deployment plan
kubectl delete pods test-pod"

3b: Resulting Script to be executed on the Master Node

# Step 3. Provision a Pod
cat > linux02-testpod.yaml << EOF
apiVersion: v1
kind: Pod
metadata:
  name: testpod
  labels:
    name: testpod
spec:
  containers:
  - name: testpod
    image: nginx:alpine
    ports:
      - containerPort: 80
        name: testpod
    volumeMounts:
      - name: linux02-local-pv
        mountPath: /usr/share/nginx/html
  volumes:
    - name: linux02-local-pv
      persistentVolumeClaim:
        claimName: linux02-claim
EOF
kubectl create -f linux02-testpod.yaml
# Wait a few moments for pods to generate and check results
kubectl get pods -o wide
#
#
#
# Remove pods in preparation for deployment plan
kubectl delete pods test-pod

3c: Expected Results

kim@linux01:~$ kubectl create -f linux02-testpod.yaml
pod/testpod created

kim@linux01:~$ kubectl get pods -o wide
NAME                               READY   STATUS    RESTARTS   AGE   IP              NODE      NOMINATED NODE   READINESS GATES
test-deployment-7dc8569756-2tl46   1/1     Running   0          26h   172.16.90.130   linux02   <none>           <none>
test-deployment-7dc8569756-mhch7   1/1     Running   0          26h   172.16.90.131   linux02   <none>           <none>
Step 4. Apply a Deployment Plan

4a: generate script on worker node

# Generate the Test Deployment Script
appName=test # app name value must be in lower case, '-' (dashes) are ok
image=nginx:alpine
exposePort=80
mountPath=/usr/share/nginx/html
replicas=1
# create a sample file in the persistent volume to be mounted by a test-pod
echo "$hostname Persistent Volume Has Successfully Set In Kubernetes!" > $mountPoint/index.html
# output the script to be ran on the Master node
deploymentFile=$hostname-$appName-deployment.yaml
echo "# Step 4. Apply a Deployment Plan
cat > $deploymentFile << EOF
kind: Deployment
apiVersion: apps/v1
metadata:
  name: $appName-deployment
spec:
  replicas: $replicas
  selector: # select pods to be managed by this deployment   
    matchLabels:
      app: $appName # This must be identical to the pod name
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 1    
  template:
    metadata:
      labels:
        app: $appName
    spec:
      containers:
      - name: $appName
        image: $image
        ports:
          - containerPort: $exposePort
            name: $appName
        volumeMounts:
          - name: $pvName
            mountPath: $mountPath
      volumes:
        - name: $pvName
          persistentVolumeClaim:
            claimName: $pvClaimName
EOF
kubectl create -f $deploymentFile
kubectl get deployments # list deployments
kubectl get pods -o wide  # get pod info to include IPs
kubectl get rs # check replica sets"

4b: Resulting Script to be executed on the Master Node

# Step 4. Apply a Deployment Plan
cat > linux02-test-deployment.yaml << EOF
kind: Deployment
apiVersion: apps/v1
metadata:
  name: test-deployment
spec:
  replicas: 2
  selector: # select pods to be managed by this deployment   
    matchLabels:
      app: test # This must be identical to the pod name
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 1    
  template:
    metadata:
      labels:
        app: test
    spec:
      containers:
      - name: test
        image: nginx:alpine
        ports:
          - containerPort: 80
            name: test
        volumeMounts:
          - name: linux02-local-pv
            mountPath: /usr/share/nginx/html
      volumes:
        - name: linux02-local-pv
          persistentVolumeClaim:
            claimName: linux02-claim
EOF
kubectl create -f linux02-test-deployment.yaml
kubectl get deployments # list deployments
kubectl get pods -o wide  # get pod info to include IPs
kubectl get rs # check replica sets

4c: Expected Results

kim@linux01:~$ kubectl create -f linux02-test-deployment.yaml
deployment.apps/test-deployment created

kim@linux01:~$ kubectl get pods -o wide
NAME                               READY   STATUS    RESTARTS   AGE     IP              NODE      NOMINATED NODE   READINESS GATES
test-deployment-7dc8569756-2tl46   1/1     Running   0          4m41s   172.16.90.130   linux02   <none>           <none>
test-deployment-7dc8569756-mhch7   1/1     Running   0          4m41s   172.16.90.131   linux02   <none>           <none>

kim@linux01:~$ kubectl get rs
NAME                         DESIRED   CURRENT   READY   AGE
test-deployment-7dc8569756   2         2         2       25h
Step 5. Implement MetalLB Load Balancer

Source: https://metallb.universe.tf/installation/

# Set strictARP & ipvs mode
kubectl get configmap kube-proxy -n kube-system -o yaml | \
sed -e "s/strictARP: false/strictARP: true/" | sed -e "s/mode: \"\"/mode: \"ipvs\"/" | \
kubectl apply -f - -n kube-system

# Apply the manifests provided by the author, David Anderson (https://www.dave.tf/) - an awesome dude
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/namespace.yaml
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/metallb.yaml
# On first install only
kubectl create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)"

# Sample output:
kim@linux01:~$ kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/namespace.yaml
namespace/metallb-system created
kim@linux01:~$ kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/metallb.yaml
podsecuritypolicy.policy/controller created
podsecuritypolicy.policy/speaker created
serviceaccount/controller created
serviceaccount/speaker created
clusterrole.rbac.authorization.k8s.io/metallb-system:controller created
clusterrole.rbac.authorization.k8s.io/metallb-system:speaker created
role.rbac.authorization.k8s.io/config-watcher created
role.rbac.authorization.k8s.io/pod-lister created
clusterrolebinding.rbac.authorization.k8s.io/metallb-system:controller created
clusterrolebinding.rbac.authorization.k8s.io/metallb-system:speaker created
rolebinding.rbac.authorization.k8s.io/config-watcher created
rolebinding.rbac.authorization.k8s.io/pod-lister created
daemonset.apps/speaker created
deployment.apps/controller created
kim@linux01:~$ kubectl create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)"
secret/memberlist created

# Customize for this system
ipRange=192.168.100.80-192.168.100.90
fileName=metallb-config.yaml
cat > $fileName << EOF
apiVersion: v1
kind: ConfigMap
metadata:
  namespace: metallb-system
  name: config
data:
  config: |
    address-pools:
    - name: default
      protocol: layer2
      addresses:
      - $ipRange
EOF
kubectl apply -f $fileName

# Sample output
kim@linux01:~$ kubectl apply -f $fileName
configmap/config created
Step 6. Create a Service Cluster

6a: generate script on worker node

A Kubernete service is a virtual entity to perform routing and load balancing of incoming requests toward intended pods by magically forwarding traffic toward the hosts of named pods, masquerading a significant amount of networking complexities. There are five types of Services:
1. ClusterIP – default type to internally load balance traffic to live pods with a single entry point
2. NodePort – is ClusterIP with added feature to bind toward an ephemeral port
3. LoadBalancer – typically dependent on an external load balancer (AWS, Azure, Google, etc). Although, our lab has already included the MetalLB internal component as detailed prior. Hence, we would be utilizing this type to enable ingress into the cluster using an virtual external IP.
4. ExternalName – I don’t know about this one
5. Headless – there are use cases where certain services are purely internal; hence, this type would be necessary.

In this example, we’re preparing a Service to be consumed by an Ingress entity, to be further detailed in the next section. NodePort service type is necessary for this coupling between Service and Ingress entities.

# Generate script for Service of type NodePort
appName=test
protocol=TCP
publicPort=80
appPort=80
nodePort=30000
serviceName=$appName-service
serviceFile=$serviceName.yaml
echo "# Step 5. Create a Service Cluster
cat > $serviceFile << EOF
apiVersion: v1
kind: Service
metadata:
  name: $serviceName
spec:
  type: LoadBalancer # Other options: ClusterIP, NodePort, LoadBalancer
  # NodePort is a ClusterIP service with an additional capability.
  # It is reachable via ingress of any node in the cluster via the assigned 'ephemeral' port
  selector:
    app: $appName # This name must match the template.metadata.labels.app value
  ports:
  - protocol: $protocol
    port: $publicPort
    targetPort: $appPort
    nodePort: $nodePort # by default, Kubernetes control plane will allocate a port from 30000-32767 range
EOF
kubectl apply -f $serviceFile
clusterIP=\$(kubectl get service $serviceName --output yaml|grep 'clusterIP: '|awk '{print \$2}')
echo \"clusterIP: \$clusterIP\"
curl \$clusterIP
kubectl get service $serviceName"

6b: Resulting Script to be executed on the Master Node

# Step 5. Create a Service Cluster
cat > test-service.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: test-service
spec:
  type: LoadBalancer # Other options: ClusterIP, LoadBalancer
  selector:
    app: test # This name must match the template.metadata.labels.app value
  ports:
  - protocol: TCP
    port: 80
    targetPort: 80
    nodePort: 30000 # optional field: by default, Kubernetes control plane will allocate a port from 30000-32767 range
EOF
kubectl apply -f test-service.yaml
clusterIP=$(kubectl get service test-service --output yaml|grep 'clusterIP: '|awk '{print $2}')
echo "clusterIP: $clusterIP"
curl $clusterIP
kubectl get service test-service

6c: Expected Results

kim@linux01:~$ kubectl apply -f test-service.yaml
service.apps/test-service created

kim@linux01:~$ clusterIP=$(kubectl get service test-service --output yaml|grep 'clusterIP: '|awk '{print $2}')
kim@linux01:~$ echo "clusterIP: $clusterIP"
clusterIP: 10.108.54.11
kim@linux01:~$ curl $clusterIP
linux02 Persistent Volume Has Successfully Set In Kubernetes!

kim@linux01:~$ kubectl get service test-service
NAME           TYPE           CLUSTER-IP       EXTERNAL-IP      PORT(S)        AGE
test-service   LoadBalancer   10.111.131.202   192.168.100.80   80:30000/TCP   135m

6d: Check for ingress from an external client machine

kim@kim-linux:~$ curl -D- http://192.168.100.80 -H 'Host: test.kimconnect.com'
HTTP/1.1 200 OK
Server: nginx/1.19.6
Date: Wed, 27 Jan 2021 04:48:29 GMT
Content-Type: text/html
Content-Length: 62
Last-Modified: Sun, 24 Jan 2021 03:40:16 GMT
Connection: keep-alive
ETag: "600cec20-3e"
Accept-Ranges: bytes

linux02 Persistent Volume Has Successfully Set In Kubernetes!
Step 7. Generate an Ingress Route for the Service (optional)

7a: generate script on worker node

# Generate the Ingress yaml Script
virtualHost=test.kimconnect.com
serviceName=test-service
nodePort=30001
ingressName=virtual-host-ingress
ingressFile=$ingressName.yaml
echo "# Step 6. Generate an Ingress Route for the Service
cat > $ingressFile << EOF
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: virtual-host-ingress
  annotations:
    kubernetes.io/ingress.class: nginx # use the shared ingress-nginx
spec:
  rules:
  - host: $virtualHost
    http:
      paths:
      - pathType: Prefix
        path: /
        backend:
          service:
            name: $serviceName
            port:
              number: 80
EOF
kubectl apply -f $ingressFile
kubectl describe ingress $ingressName
# run this command on client PC with hosts record of $virtualHost manually set to the IP of any node of this Kubernetes cluster
curl $virtualHost:$nodePort
# Remote test-ingress
kubectl delete $ingressName"

7b: Resulting Script to be executed on the Master Node

# Step 6. Generate an Ingress Route for the Service
cat > virtual-host-ingress.yaml << EOF
# apiVersion: networking.k8s.io/v1
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: virtual-host-ingress
  annotations:
    kubernetes.io/ingress.class: nginx # use the shared ingress-nginx
spec:
  rules:
  - host: test.kimconnect.com
    http:
      paths:
      - pathType: Prefix
        path: /
        backend:
          service:
            name: test-service
            servicePort: 80
            # port:
            #   number: 80
EOF
kubectl apply -f virtual-host-ingress.yaml
kubectl describe ingress virtual-host-ingress
# run this command on client PC with hosts record of $virtualHost manually set to the IP of any node of this Kubernetes cluster
curl test.kimconnect.com:30001
kubectl delete test-ingress

7c: Expected Results

kim@linux01:~$ kubectl apply -f virtual-host-ingress.yaml
ingress.networking.k8s.io/virtual-host-ingress created

kim@linux01:~$ cat /etc/hosts
127.0.0.1 localhost
127.0.1.1 linux01
127.0.1.2 test.kimconnect.com

# The following lines are desirable for IPv6 capable hosts
::1     ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters

kim@linux01:~$ curl test.kimconnect.com:30001
linux02 Persistent Volume Has Successfully Set In Kubernetes!

kim@linux01:~$ k delete ingress test-ingress
ingress.networking.k8s.io "test-ingress" deleted
Step 8. Add SSL Secret Component
apiVersion: v1
kind: Secret
metadata:
  name: test-secret-tls
  namespace: default # must be in the same namespace as the test-app
data:
  tls.crt: CERTCONTENTHERE
  tls.key: KEYVALUEHERE
type: kubernetes.io/tls
Step 9. Miscellaneous Discovery and Troubleshooting
# How to find the IP address of a pod and assign to a variable
podName=test-deployment-7dc8569756-2tl46
podIp=$(kubectl get pod $podName -o wide | tail -1 | awk '{print $6}')
echo "Pod IP: $podIp"

kim@linux01:~$ k get services
NAME         TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)   AGE
kubernetes   ClusterIP   10.96.0.1    <none>        443/TCP   3d2h

kim@linux01:~$ kubectl describe service test-service
Name:              test-service
Namespace:         default
Labels:            <none>
Annotations:       <none>
Selector:          app=test
Type:              ClusterIP
IP Families:       <none>
IP:                10.108.54.11
IPs:               10.108.54.11
Port:              <unset>  80/TCP
TargetPort:        80/TCP
Endpoints:         172.16.90.130:80,172.16.90.131:80
Session Affinity:  None
Events:
  Type    Reason  Age   From                Message
  ----    ------  ----  ----                -------
  Normal  Type    43m   service-controller  NodePort -> LoadBalancer
  Normal  Type    38m   service-controller  LoadBalancer -> NodePort

# Clear any presets of a host on common services
unset {http,ftp,socks,https}_proxy
env | grep -i proxy
curl $clusterIP

# How to expose a deployment
# kubectl expose deployment test-deployment --type=ClusterIP # if ClusterIP is specified in the service plan
kubectl expose deployment test-deployment --port=80 --target-port=80
kubectl expose deployment/test-deployment --type="NodePort" --port 80

kim@linux01:~$ kubectl expose deployment test-deployment --type=ClusterIP
Error from server (AlreadyExists): services "test-deployment" already exists
kim@linux01:~$ kubectl expose deployment/test-deployment --type="NodePort" --port 80
Error from server (AlreadyExists): services "test-deployment" already exists

# Create a name space - not included in the examples of this article
# kubectl creates ns persistentVolume1
Step 10. Cleanup
# Cleanup: must be in the correct sequence!
kubectl delete ingress virtual-host-ingress
kubectl delete services test-service
kubectl delete deployment test-deployment
kubectl delete persistentvolumeclaims linux02-claim
kubectl delete pv linux02-local-pv

How To Install Kubernetes on Ubuntu 20.04 Server

Overview

Docker is available in 2 versions, Community Edition (CE) and Enterprise Edition (DE). The former is available with a GPL license, while the latter is fee-based for addtional features and support. As Kubernetes overlaps some of the features of DE, the CE variant is sufficient for purposes of being integrated with a container orchestration software. It is important to distinguish between type 1 hypervisors such as VMWare ESXi, AWS Instances, Azure VMs, and Windows Hyper-V versus OS-level virtualization with shared name spaces and control groups such as Docker, rkt (‘rocket’), LXD (‘lexdi’), vServer, and Windows Containers. Hypervisors virtualizes and isolate the base base OS kernel between virtual machines, while OS-level virtualization is containering instances of machines using the host OS kernel. That is why a Linux VM cannot be instantiated on a Windows host (without invoking the WSL/WSL2) using Docker, and vice versa.

Kubernetes is abstraction layer on top of a Linux or Windows kernel. The scope of this article is limited to Linux. Although there are many flavors of Linux distributions, GPL licensing such as the Ubuntu Server is favored, whereas CentOS no longer has a long term horizon as a freeware after version 8. Therefore, Ubuntu is currently being chosen as the underlying OS in the following scripts.

The general layout of Kubernetes consist of a Master Node and several Worker Nodes. The Master Node will run the docker containers of the control plane (etcd, API server, and scheduler & Controller Manager), KubeDNS, and networking. The Worker Nodes would join or integrate with controller to serve as hosts for Pods. These are containers being deployed by Kubernetes as a single cohesive entity. For instance, there would be a pod for mysql (database) and a pod for apache (web server). Each of those pods can have a specified number of identically sized containers serving the same purpose. Thus, the mysql pod would be connected to the apache pod to host a web application.

There are several other important concepts related to Kubernetes that would be discussed in the next article, namely Helm and Persistent Storage. Production ready machines would require full implemenetation of these entities. Linkage to a future article shall be provided once it’s available.

Part 1: Install Docker on Masternode

# Include prerequisites
sudo apt-get update -y
sudo apt-get -y install \
    apt-transport-https \
    ca-certificates \
    curl \
    gnupg-agent \
    software-properties-common

# Add docker key
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -

# Add docker official repository
sudo add-apt-repository \
   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
   $(lsb_release -cs) \
   stable"

# Install docker
sudo apt-get update -y && sudo apt-get install docker-ce docker-ce-cli containerd.io -y

Part 2: Install Kubernetes on Masternode

# Source: https://kubernetes.io/docs/tasks/tools/install-kubectl/

# Prepare to install
# runas root
sudo su
# include prerequisites
sudo apt-get update && sudo apt-get install -y apt-transport-https gnupg2 curl nfs-common

# Install kubernetes controller modules
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list
sudo apt-get -y update
sudo apt-get install -y kubectl kubelet kubeadm
apt-mark hold kubeadm kubelet kubectl

# Verify
kubectl cluster-info

# Sample output: failed
root@linux01:/home/admin# kubectl cluster-info
To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.
The connection to the server localhost:8080 was refused - did you specify the right host or port?

# Sample output: succeeded
brucelee@linux01:~$ k cluster-info
Kubernetes control plane is running at https://192.168.1.91:6443
KubeDNS is running at https://192.168.1.91:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy
To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.

# Install bash-completion
sudo apt -y install bash-completion

# Enable kubectl autocompletion
source /usr/share/bash-completion/bash_completion
echo 'source <(kubectl completion bash)' >>~/.bashrc
kubectl completion bash >/etc/bash_completion.d/kubectl

# create an alias for kubectl
echo 'alias k=kubectl' >>~/.bashrc
echo 'complete -F __start_kubectl k' >>~/.bashrc
kPath=$(which kubectl)
alias k=$kPath

# Alternative autocompletion commands
# source /usr/share/bash-completion/bash_completion
# source <(kubectl completion bash)
# alias k=kubectl
# complete -o default -F __start_kubectl k

# Open firewall ports

# Required on Master Node
sudo ufw allow ssh
sudo ufw allow 80/tcp
sudo ufw allow 8080/tcp # localhost connections
sudo ufw allow 443/tcp # worker nodes, API requests, and GUI
sudo ufw allow 6443/tcp # Kubernetes API server
sudo ufw allow 8443/tcp 
sudo ufw allow 2379:2380/tcp  # etcd server client api
sudo ufw allow 10250:10252/tcp # Kubelet API, kube-scheduler, kube-controller-manager
sudo ufw allow 10255/tcp # Kubelet to serve with no authentication/authorization
sudo ufw allow 30000:32767/tcp # Kubelet API

# Master & worker communication
sudo ufw allow from x.x.x.x/24 # Change this to match the Kubernetes subnet
sudo ufw allow to x.x.x.x/24

# Other plugins as required
sudo ufw allow 179/tcp # Calico BGP network
sudo ufw allow 6783/tcp # weave
sudo ufw allow 6783/udp # weave
sudo ufw allow 6784/tcp # weave
sudo ufw allow 6784/udp # weave
sudo ufw allow 8285/udp # flannel udp backend
sudo ufw allow 8472/udp # flannel vxlan backend
sudo ufw allow 8090/udp # flannel vxlan backend

# Required for kube-proxy and Kubernetes internal routing 
sudo ufw allow out on weave to 10.32.0.0/12
sudo ufw allow in on weave from 10.32.0.0/12

sudo ufw reload
sudo ufw status numbered

# How to remove a rule
# ufw delete RULENUMBER 

# Sample firewall output:
root@linux01:/home/admin# sudo ufw status numbered
Status: active

     To                         Action      From
     --                         ------      ----
[ 1] 6443/tcp                   ALLOW IN    Anywhere                  
[ 2] 10250/tcp                  ALLOW IN    Anywhere                  
[ 3] 10251/tcp                  ALLOW IN    Anywhere                  
[ 4] 10252/tcp                  ALLOW IN    Anywhere                  
[ 5] 10255/tcp                  ALLOW IN    Anywhere                  
[ 6] 6443/tcp (v6)              ALLOW IN    Anywhere (v6)             
[ 7] 10250/tcp (v6)             ALLOW IN    Anywhere (v6)             
[ 8] 10251/tcp (v6)             ALLOW IN    Anywhere (v6)             
[ 9] 10252/tcp (v6)             ALLOW IN    Anywhere (v6)             
[10] 10255/tcp (v6)             ALLOW IN    Anywhere (v6)

Optional Components:

# Install Helm, a Kubernetes package manager
curl https://baltocdn.com/helm/signing.asc | sudo apt-key add -
sudo apt-get install apt-transport-https --yes
echo "deb https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
sudo apt-get update -y
sudo apt-get install helm

# Deploy Ingress-Nginx - a prerequisite for bare-metal Load Balancer deployments
# Source: https://kubernetes.github.io/ingress-nginx/deploy/
kim@linux01:~$ kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v0.43.0/deploy/static/provider/baremetal/deploy.yaml
namespace/ingress-nginx created
serviceaccount/ingress-nginx created
configmap/ingress-nginx-controller created
clusterrole.rbac.authorization.k8s.io/ingress-nginx created
clusterrolebinding.rbac.authorization.k8s.io/ingress-nginx created
role.rbac.authorization.k8s.io/ingress-nginx created
rolebinding.rbac.authorization.k8s.io/ingress-nginx created
service/ingress-nginx-controller-admission created
service/ingress-nginx-controller created
deployment.apps/ingress-nginx-controller created
validatingwebhookconfiguration.admissionregistration.k8s.io/ingress-nginx-admission created
serviceaccount/ingress-nginx-admission created
clusterrole.rbac.authorization.k8s.io/ingress-nginx-admission created
clusterrolebinding.rbac.authorization.k8s.io/ingress-nginx-admission created
role.rbac.authorization.k8s.io/ingress-nginx-admission created
rolebinding.rbac.authorization.k8s.io/ingress-nginx-admission created
job.batch/ingress-nginx-admission-create created
job.batch/ingress-nginx-admission-patch created

# As of 01-26-2021, the ingress-nginx repo in helm is broken
# This method of install is currently NOT recommended.
kim@linux01:~$ helm install ingress-nginx ingress-nginx/ingress-nginx
NAME: ingress-nginx
LAST DEPLOYED: Wed Jan 27 02:53:47 2021
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
The ingress-nginx controller has been installed.
It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status by running 'kubectl --namespace default get services -o wide -w ingress-nginx-controller'

An example Ingress that makes use of the controller:

  apiVersion: networking.k8s.io/v1beta1
  kind: Ingress
  metadata:
    annotations:
      kubernetes.io/ingress.class: nginx
    name: example
    namespace: foo
  spec:
    rules:
      - host: www.example.com
        http:
          paths:
            - backend:
                serviceName: exampleService
                servicePort: 80
              path: /
    # This section is only required if TLS is to be enabled for the Ingress
    tls:
        - hosts:
            - www.example.com
          secretName: example-tls

If TLS is enabled for the Ingress, a Secret containing the certificate and key must also be provided:

  apiVersion: v1
  kind: Secret
  metadata:
    name: example-tls
    namespace: foo
  data:
    tls.crt: <base64 encoded cert>
    tls.key: <base64 encoded key>
  type: kubernetes.io/tls
 

Part 3: Initialize the Cluster

A) Master Node

# Install net-tools
sudo apt install net-tools -y

# Disable swap as Kubernettes cannot work with it
swapoff -a # turn off swap
sed '/^#/! {/swap/ s/^/#/}' -i /etc/fstab # set swapoff permanent

# Generate networking variables
defaultInterface=$(route | grep '^default' | grep -o '[^ ]*$')
thisIp=$(ifconfig $defaultInterface | sed -En 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*/\2/p')

# Set private network for Kubernetes
k8network='172.16.90.0/24'

# Initialize the master node with the given variables
kubeadm init --apiserver-advertise-address=$thisIp --pod-network-cidr=$k8network

# Sample output of a successful setup:
# Your Kubernetes control-plane has initialized successfully!
# To start using your cluster, you need to run the following as a regular user:
#   mkdir -p $HOME/.kube
#   sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
#   sudo chown $(id -u):$(id -g) $HOME/.kube/config
# Alternatively, if you are the root user, you can run:
#   export KUBECONFIG=/etc/kubernetes/admin.conf
# You should now deploy a pod network to the cluster.
# Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
#   https://kubernetes.io/docs/concepts/cluster-administration/addons/
# Then you can join any number of worker nodes by running the following on each as root:
# kubeadm join 10.10.100.91:6443 --token pnqq2p.cvr4z0ub0ils5498 \
#     --discovery-token-ca-cert-hash sha256:HASHSTRINGHERE

# Error:
# root@linux01:/home/admin# kubeadm init --apiserver-advertise-address=$thisIp --pod-network-cidr=$k8network
# W0120 22:33:22.034382   50486 kubelet.go:200] cannot automatically set CgroupDriver when starting the Kubelet: cannot execute 'docker info -f {{.CgroupDriver}}': executable file not found in $PATH
# [init] Using Kubernetes version: v1.20.2
# [preflight] Running pre-flight checks
# [preflight] WARNING: Couldn't create the interface used for talking to the container runtime: docker is required for container runtime: exec: "docker": executable file not found in $PATH
# error execution phase preflight: [preflight] Some fatal errors occurred:
# 	[ERROR FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables does not exist
# 	[ERROR FileContent--proc-sys-net-ipv4-ip_forward]: /proc/sys/net/ipv4/ip_forward contents are not set to 1
# 	[ERROR Swap]: running with swap on is not supported. Please disable swap
# [preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
# To see the stack trace of this error execute with --v=5 or higher
#
# Resolution:
# swapoff -a # turn off swap
# sed '/^#/! {/swap/ s/^/#/}' -i /etc/fstab # set swapoff permanent
# 
# Sub-issue:
# Docker doesn't start
# root@linux01:/home/admin# sudo apt install docker -y
# root@linux01:/home/admin# service docker start
# Failed to start docker.service: Unit docker.service not found.
# sub-issue resolution
# Source: https://docs.docker.com/engine/install/ubuntu/
# sudo apt-get remove docker docker-engine docker.io containerd runc -y
# Re-install docker as shown in 'Part 2'

# OPTIONAL: How to reset or uninstall k8
# [root@localhost ~]# kubeadm reset
# [reset] Reading configuration from the cluster...
# [reset] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
# [reset] WARNING: Changes made to this host by 'kubeadm init' or 'kubeadm join' will be reverted.
# [reset] Are you sure you want to proceed? [y/N]: y

# Return to regular user
# root@linux01:/home/admin# exit
exit
# admin@linux01:~$

# Grant current user admin privileges on Kubernetes
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

# Check status of pods - BEFORE installing a network plugin
admin@linux01:~$ kubectl get nodes
NAME      STATUS     ROLES                  AGE   VERSION
linux01   NotReady   control-plane,master   22m   v1.20.2
 
# Recommended: install Calico network plugin
kubectl apply -f https://docs.projectcalico.org/manifests/calico.yaml
 
# Validate the nodes are now 'ready' - AFTER network plugin has been added
admin@linux01:~$ kubectl get nodes
NAME      STATUS   ROLES                  AGE   VERSION
linux01   Ready    control-plane,master   38m   v1.20.2
 
# Monitor the statuses of all pods in real time
watch kubectl get pods --all-namespaces

B) Worker Nodes

# runas root
sudo su
# Install prerequisites
sudo apt-get update -y
sudo apt-get -y install apt-transport-https ca-certificates curl gnupg-agent software-properties-common gnupg2
# Add docker & K8
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" | sudo tee -a /etc/apt/sources.list.d/kubernetes.list
sudo apt-get update -y

# Install docker & kubernetes
sudo apt-get install docker-ce docker-ce-cli containerd.io kubectl kubeadm kubelet nfs-common -y
apt-mark hold kubeadm kubelet kubectl

# Alternative install of docker & kubernetes 
version=1.20.10-00
apt-get install -qy --allow-downgrades --allow-change-held-packages kubeadm=$version kubelet=$version kubectl=$version docker-ce docker-ce-cli containerd.io nfs-common
apt-mark hold kubeadm kubelet kubectl

# Optional: re-installing a compatible version to match an existing cluster
sudo su # enter sudo context
version=1.20.10-00
apt-mark unhold kubeadm kubelet kubectl && apt-get update
apt-get install -qy --allow-downgrades --allow-change-held-packages kubeadm=$version kubelet=$version kubectl=$version
apt-mark hold kubeadm kubelet kubectl

# Ports required on worker nodes
sudo ufw allow ssh
sudo ufw allow 6443/tcp # Kubernetes API server
sudo ufw allow 10250:10255/tcp # Kubelet API, worker node kubelet healthcheck
sudo ufw allow 30000:32767/tcp # Kubelet API

# Required for kube-proxy and Kubernetes internal routing 
sudo ufw allow out on weave to 10.32.0.0/12
sudo ufw allow in on weave from 10.32.0.0/12

# Master & worker communication
sudo ufw allow from 192.168.100.0/24
sudo ufw allow to 192.168.100.0/24

# Calico BGP network
sudo ufw allow 179/tcp

# Weave and flannel
sudo ufw allow 6783/tcp # weave
sudo ufw allow 6783/udp # weave
sudo ufw allow 6784/tcp # weave
sudo ufw allow 6784/udp # weave
sudo ufw allow 8285/udp # flannel udp backend
sudo ufw allow 8472/udp # flannel vxlan backend

# Enable firewall
sudo ufw enable
sudo ufw reload
sudo ufw status numbered

# Disable swapping
swapoff -a
sed '/^#/! {/swap/ s/^/#/}' -i /etc/fstab

# Join the cluster
masternodeIp=10.10.100.91
token=pnqq2p.cvr4z0ub0ils5498
hash=sha256:HASHSTRINGHERE
kubeadm join $masternodeIp:6443 --token $token --discovery-token-ca-cert-hash $hash
# Linux Mint 20.04 commands variation

# Install prerequisites
sudo apt-get update -y
sudo apt-get -y install apt-transport-https ca-certificates curl gnupg-agent software-properties-common gnupg2
# Add docker & K8
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -

# install docker e docker-compose
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(. /etc/os-release; echo "$UBUNTU_CODENAME") stable"

# update repos
sudo apt update -y

# Install docker
sudo apt-get install docker-ce docker-ce-cli containerd.io kubectl kubeadm -y

Part 4: Manage Cluster

A) How To Gracefully Remove Worker Nodes

kim@linux01:~$ k get nodes
NAME      STATUS   ROLES                  AGE    VERSION
linux01   Ready    control-plane,master   2d3h   v1.20.2
linux02   Ready    <none>                 2d1h   v1.20.2
linux03   Ready    <none>                 2d1h   v1.20.2

# Try to drain node
kim@linux01:~$ k drain linux03
node/linux03 cordoned
error: unable to drain node "linux03", aborting command...

There are pending nodes to be drained:
 linux03
error: cannot delete DaemonSet-managed Pods (use --ignore-daemonsets to ignore): kube-system/calico-node-nc47f, kube-system/kube-proxy-f469g

# Drain node with additional arguments
kim@linux01:~$ k drain linux03 --ignore-daemonsets --delete-local-data
Flag --delete-local-data has been deprecated, This option is deprecated and will be deleted. Use --delete-emptydir-data.
node/linux03 already cordoned
WARNING: ignoring DaemonSet-managed Pods: kube-system/calico-node-nc47f, kube-system/kube-proxy-f469g
node/linux03 drained

# Check nodes
kim@linux01:~$ k get nodes
NAME      STATUS                     ROLES                  AGE    VERSION
linux01   Ready                      control-plane,master   2d3h   v1.20.2
linux02   Ready                      <none>                 2d1h   v1.20.2
linux03   Ready,SchedulingDisabled   <none>                 2d1h   v1.20.2

# Delete the node
kim@linux01:~$ kubectl delete node linux03
node "linux03" deleted

# Verify
kim@linux01:~$ k get nodes
NAME      STATUS   ROLES                  AGE    VERSION
linux01   Ready    control-plane,master   2d3h   v1.20.2
linux02   Ready    <none>                 2d1h   v1.20.2

# Quick commands: On the Master Node
nodeName=linux05
kubectl drain $nodeName --ignore-daemonsets --delete-emptydir-data
kubectl delete node $nodeName

# On the worker node
kubeadm reset
# Sample output
clusteradmin@controller:~$ nodeName=linux05
clusteradmin@controller:~$ kubectl drain $nodeName --ignore-daemonsets --delete-emptydir-data
node/linux05 cordoned
WARNING: ignoring DaemonSet-managed Pods: ingress-nginx/ingress-nginx-controller-4jghn, kube-system/calico-node-jcj6s, kube-system/kube-proxy-m2jsd, metallb-system/speaker-qt4kv
node/linux05 drained
clusteradmin@controller:~$ kubectl delete node $nodeName
node "linux05" deleted

B) How To Retrieve the Join Token Hash (in case you’ve forgotten to document it)

rambo@k8-controller:~$ sudo kubeadm token create --print-join-command
kubeadm join 500.500.100.91:6443 --token :-).cm7echvpguzw01rj --discovery-token-ca-cert-hash sha256:SOMEHASHSTRINGHERE

C) Check K8 Context

# Null context would result if kubectl is triggered under root

root@linux01:/home/k8admin# kubectl config view
apiVersion: v1
clusters: null
contexts: null
current-context: ""
kind: Config
preferences: {}
users: null

root@linux01:/home/k8admin# k get nodes
The connection to the server localhost:8080 was refused - did you specify the right host or port?

# Option 1: exit root to enter the context of an authorized kubernetes admin

root@linux01:/home/k8admin# exit
exit

k8admin@linux01:~$ kubectl config view
apiVersion: v1
clusters:
- cluster:
    certificate-authority-data: DATA+OMITTED
    server: https://10.10.100.91:6443
  name: kubernetes
contexts:
- context:
    cluster: kubernetes
    user: kubernetes-admin
  name: kubernetes-admin@kubernetes
current-context: kubernetes-admin@kubernetes
kind: Config
preferences: {}
users:
- name: kubernetes-admin
  user:
    client-certificate-data: REDACTED
    client-key-data: REDACTED

k8admin@linux01:~$ k get nodes
NAME      STATUS   ROLES                  AGE    VERSION
linux01   Ready    control-plane,master   2d6h   v1.20.2
linux02   Ready    <none>                 2d5h   v1.20.2
linux03   Ready    <none>                 17m    v1.20.2

# Option 2: Grant current user (could be root) admin privileges on Kubernetes

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

Troubleshooting Installation Process

# Problem while installing an apt package:
Unpacking kubeadm (1.22.1-00) ...
dpkg: error processing archive /tmp/apt-dpkg-install-E8nExm/13-kubeadm_1.22.1-00_amd64.deb (--unpack):
 unable to sync file '/usr/bin/kubeadm.dpkg-new': Input/output error
sh: 1: /bin/dmesg: Input/output error
                                     sh: 1: /bin/df: Input/output error
                                                                       dpkg: unrecoverable fatal error, aborting:
 unable to fsync updated status of 'kubeadm': Input/output error
touch: cannot touch '/var/lib/update-notifier/dpkg-run-stamp': Read-only file system
E: Sub-process /usr/bin/dpkg returned an error code (2)

# Unable to install any package
root@linux03:/home/kim# sudo apt-get install docker-ce docker-ce-cli containerd.io kubectl kubeadm -y
W: Not using locking for read only lock file /var/lib/dpkg/lock-frontend
W: Not using locking for read only lock file /var/lib/dpkg/lock
E: dpkg was interrupted, you must manually run 'sudo dpkg --configure -a' to correct the problem.

# Trying to fix the problem
### Script ###
sudo rm /var/lib/dpkg/available 
sudo touch /var/lib/dpkg/available  
sudo sh -c 'for i in /var/lib/apt/lists/*_Packages; do dpkg --merge-avail "$i"; done'
### Result ###
root@linux03:/home/kim# sudo rm /var/lib/dpkg/available
rm: cannot remove '/var/lib/dpkg/available': Read-only file system
root@linux03:/home/kim# sudo touch /var/lib/dpkg/available
touch: cannot touch '/var/lib/dpkg/available': Read-only file system
### Retry after REBOOT ###
root@linux03:/home/kim# sudo sh -c 'for i in /var/lib/apt/lists/*_Packages; do dpkg --merge-avail "$i"; done'
Updating available packages info, using /var/lib/apt/lists/apt.kubernetes.io_dists_kubernetes-xenial_main_binary-amd64_Packages.
Information about 716 packages was updated.
Updating available packages info, using /var/lib/apt/lists/download.docker.com_linux_ubuntu_dists_focal_stable_binary-amd64_Packages.
Information about 50 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-backports_main_binary-amd64_Packages.
Information about 8 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-backports_universe_binary-amd64_Packages.
Information about 20 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-security_main_binary-amd64_Packages.
Information about 4344 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-security_multiverse_binary-amd64_Packages.
Information about 85 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-security_restricted_binary-amd64_Packages.
Information about 2040 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-security_universe_binary-amd64_Packages.
Information about 3234 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-updates_main_binary-amd64_Packages.
Information about 5661 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-updates_multiverse_binary-amd64_Packages.
Information about 96 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-updates_restricted_binary-amd64_Packages.
Information about 2239 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal-updates_universe_binary-amd64_Packages.
Information about 3859 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal_main_binary-amd64_Packages.
Information about 3569 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal_multiverse_binary-amd64_Packages.
Information about 778 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal_restricted_binary-amd64_Packages.
Information about 30 packages was updated.
Updating available packages info, using /var/lib/apt/lists/us.archive.ubuntu.com_ubuntu_dists_focal_universe_binary-amd64_Packages.
Information about 49496 packages was updated.

# Last resort solution
### Script ###
sudo dpkg --configure -a
sudo apt-get -f install -y
sudo apt-get clean
sudo apt-get update -y && sudo apt-get upgrade -y
### Result initially  ###
root@linux03:/home/kim# sudo dpkg --configure -a
dpkg: error: unable to access the dpkg database directory /var/lib/dpkg: Read-only file system
### Retry after REBOOT ###
root@linux03:/home/kim# apt upgrade -y
E: dpkg was interrupted, you must manually run 'sudo dpkg --configure -a' to correct the problem.
root@linux03:/home/kim# sudo dpkg --configure -a
Setting up docker-scan-plugin (0.8.0~ubuntu-focal) ...
Setting up conntrack (1:1.4.5-2) ...
Setting up kubectl (1.22.1-00) ...
Setting up ebtables (2.0.11-3build1) ...
Setting up socat (1.7.3.3-2) ...
Setting up containerd.io (1.4.9-1) ...
Created symlink /etc/systemd/system/multi-user.target.wants/containerd.service → /lib/systemd/system/containerd.service.
Setting up docker-ce-cli (5:20.10.8~3-0~ubuntu-focal) ...
Setting up pigz (2.4-1) ...
Setting up cri-tools (1.13.0-01) ...
Setting up docker-ce-rootless-extras (5:20.10.8~3-0~ubuntu-focal) ...
Setting up kubernetes-cni (0.8.7-00) ...
Setting up docker-ce (5:20.10.8~3-0~ubuntu-focal) ...
Created symlink /etc/systemd/system/multi-user.target.wants/docker.service → /lib/systemd/system/docker.service.
Created symlink /etc/systemd/system/sockets.target.wants/docker.socket → /lib/systemd/system/docker.socket.
Setting up kubelet (1.22.1-00) ...
Created symlink /etc/systemd/system/multi-user.target.wants/kubelet.service → /lib/systemd/system/kubelet.service.
Processing triggers for man-db (2.9.1-1) ...
Processing triggers for systemd (245.4-4ubuntu3.11) ...
root@linux03:/home/kim# sudo apt-get -f install
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following additional packages will be installed:
  kubeadm
The following packages will be upgraded:
  kubeadm
1 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
1 not fully installed or removed.
Need to get 8,717 kB of archives.
After this operation, 45.9 MB of additional disk space will be used.
Do you want to continue? [Y/n] y
Abort.
root@linux03:/home/kim#
root@linux03:/home/kim# sudo apt-get -f install -y
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following additional packages will be installed:
  kubeadm
The following packages will be upgraded:
  kubeadm
1 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
1 not fully installed or removed.
Need to get 8,717 kB of archives.
After this operation, 45.9 MB of additional disk space will be used.
Get:1 https://packages.cloud.google.com/apt kubernetes-xenial/main amd64 kubeadm amd64 1.22.1-00 [8,717 kB]
Fetched 8,717 kB in 2s (3,883 kB/s)
(Reading database ... 108028 files and directories currently installed.)
Preparing to unpack .../kubeadm_1.22.1-00_amd64.deb ...
Unpacking kubeadm (1.22.1-00) over (1.22.1-00) ...
Setting up kubeadm (1.22.1-00) ...

# Problem: unable to join cluster
root@worker3:/home/kimconnect# kubeadm join 10.10.10.10:6443 --token somecode.morecode     --discovery-token-ca-cert-hash sha256:somehash
[preflight] Running pre-flight checks
        [WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
        [WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.8. Latest validated version: 19.03
error execution phase preflight: couldn't validate the identity of the API Server: could not find a JWS signature in the cluster-info ConfigMap for token ID "qisk11"
To see the stack trace of this error execute with --v=5 or higher

# Resolution:
# ON MASTER NODE: run this command to get a new join command
sudo kubeadm token create --print-join-command

How To Change VMWare ESXi Hypervisor’s IP Address and Host Name

# List hypervisor's network interfaces
esxcli network ip interface ipv4 get

# Sample output
[root@esx76:~] esxcli network ip interface ipv4 get
Name  IPv4 Address     IPv4 Netmask   IPv4 Broadcast   Address Type  Gateway        DHCP DNS
----  ---------------  -------------  ---------------  ------------  -------------  --------
vmk0  10.10.46.101      255.255.255.0  10.10.46.255     STATIC        10.10.46.1     false

# set new ip
interface=vmk0
ipaddress=10.10.100.11
netmask=255.255.255.0
esxcli network ip interface ipv4 set -i $interface -I $ipaddress -N $netmask -t static

# Error when a non-existence interface were specified
# Error changing IPv4 configuration for the interface vmk1. Error was : Unable to complete Sysinfo operation.  Please see the VMkernel log file for more details.: Sysinfo error: Not foundSee VMkernel log for details.
# Set ESXi host name via CLI
hostname=ESX1
domain=kimconnect.com
fqdn=$hostname.$domain
esxcli system hostname set --host=$hostname
esxcli system hostname set --fqdn=$fqdn