]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/kubeadm.py
2 Kubernetes cluster task, deployed via kubeadm
11 from io
import BytesIO
13 from teuthology
import misc
as teuthology
14 from teuthology
import contextutil
15 from teuthology
.config
import config
as teuth_config
16 from teuthology
.orchestra
import run
18 log
= logging
.getLogger(__name__
)
21 def _kubectl(ctx
, config
, args
, **kwargs
):
22 cluster_name
= config
['cluster']
23 ctx
.kubeadm
[cluster_name
].bootstrap_remote
.run(
24 args
=['kubectl'] + args
,
29 def kubectl(ctx
, config
):
30 if isinstance(config
, str):
32 assert isinstance(config
, list)
34 if isinstance(c
, str):
35 _kubectl(ctx
, config
, c
.split(' '))
37 _kubectl(ctx
, config
, c
)
40 @contextlib.contextmanager
41 def preflight(ctx
, config
):
45 'sudo', 'modprobe', 'br_netfilter',
47 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-ip6tables=1',
49 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-iptables=1',
51 'sudo', 'sysctl', 'net.ipv4.ip_forward=1',
53 'sudo', 'swapoff', '-a',
59 # set docker cgroup driver = systemd
60 # see https://kubernetes.io/docs/setup/production-environment/container-runtimes/#docker
61 # see https://github.com/kubernetes/kubeadm/issues/2066
62 for remote
in ctx
.cluster
.remotes
.keys():
64 orig
= remote
.read_file('/etc/docker/daemon.json', sudo
=True)
66 except Exception as e
:
67 log
.info(f
'Failed to pull old daemon.json: {e}')
69 j
["exec-opts"] = ["native.cgroupdriver=systemd"]
70 j
["log-driver"] = "json-file"
71 j
["log-opts"] = {"max-size": "100m"}
72 j
["storage-driver"] = "overlay2"
73 remote
.write_file('/etc/docker/daemon.json', json
.dumps(j
), sudo
=True)
77 'sudo', 'systemctl', 'restart', 'docker',
87 @contextlib.contextmanager
88 def kubeadm_install(ctx
, config
):
89 version
= config
.get('version', '1.21')
91 os_type
= teuthology
.get_distro(ctx
)
92 os_version
= teuthology
.get_distro_version(ctx
)
95 if os_type
in ['centos', 'rhel']:
96 os
= f
"CentOS_{os_version.split('.')[0]}"
97 log
.info('Installing cri-o')
103 '/etc/yum.repos.d/devel:kubic:libcontainers:stable.repo',
104 f
'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/{os}/devel:kubic:libcontainers:stable.repo',
108 f
'/etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:{version}.repo',
109 f
'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{version}/{os}/devel:kubic:libcontainers:stable:cri-o:{version}.repo',
111 'sudo', 'dnf', 'install', '-y', 'cri-o',
117 log
.info('Installing kube{adm,ctl,let}')
118 repo
= """[kubernetes]
120 baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$basearch
124 gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
126 for remote
in ctx
.cluster
.remotes
.keys():
128 '/etc/yum.repos.d/kubernetes.repo',
135 'sudo', 'dnf', 'install', '-y',
136 'kubelet', 'kubeadm', 'kubectl',
137 'iproute-tc', 'bridge-utils',
144 for remote
in ctx
.cluster
.remotes
.keys():
145 conf
= """# from https://github.com/cri-o/cri-o/blob/master/tutorials/kubernetes.md#flannel-network
151 remote
.write_file('/etc/cni/net.d/10-crio-flannel.conf', conf
, sudo
=True)
154 '/etc/cni/net.d/87-podman-bridge.conflist',
155 '/etc/cni/net.d/100-crio-bridge.conf',
162 'sudo', 'systemctl', 'daemon-reload',
164 'sudo', 'systemctl', 'enable', 'crio', '--now',
170 elif os_type
== 'ubuntu':
171 os
= f
"xUbuntu_{os_version}"
172 log
.info('Installing kube{adm,ctl,let}')
176 'sudo', 'apt', 'update',
178 'sudo', 'apt', 'install', '-y',
179 'apt-transport-https', 'ca-certificates', 'curl',
181 'sudo', 'curl', '-fsSLo',
182 '/usr/share/keyrings/kubernetes-archive-keyring.gpg',
183 'https://packages.cloud.google.com/apt/doc/apt-key.gpg',
185 'echo', 'deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main',
187 'sudo', 'tee', '/etc/apt/sources.list.d/kubernetes.list',
189 'sudo', 'apt', 'update',
191 'sudo', 'apt', 'install', '-y',
192 'kubelet', 'kubeadm', 'kubectl',
200 raise RuntimeError(f
'unsupported distro {os_type} for cri-o')
205 'sudo', 'systemctl', 'enable', '--now', 'kubelet',
207 'sudo', 'kubeadm', 'config', 'images', 'pull',
216 if config
.get('uninstall', True):
217 log
.info('Uninstalling kube{adm,let,ctl}')
218 if os_type
in ['centos', 'rhel']:
223 '/etc/yum.repos.d/kubernetes.repo',
225 'sudo', 'dnf', 'remove', '-y',
226 'kubeadm', 'kubelet', 'kubectl', 'cri-o',
231 elif os_type
== 'ubuntu' and False:
236 '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list',
237 f
'/etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:{version}.list',
238 '/etc/apt/trusted.gpg.d/libcontainers-cri-o.gpg',
240 'sudo', 'apt', 'remove', '-y',
241 'kkubeadm', 'kubelet', 'kubectl', 'cri-o', 'cri-o-runc',
248 @contextlib.contextmanager
249 def kubeadm_init_join(ctx
, config
):
250 cluster_name
= config
['cluster']
252 bootstrap_remote
= None
253 remotes
= {} # remote -> ip
254 for remote
, roles
in ctx
.cluster
.remotes
.items():
256 if role
.startswith('host.'):
257 if not bootstrap_remote
:
258 bootstrap_remote
= remote
259 if remote
not in remotes
:
260 remotes
[remote
] = remote
.ssh
.get_transport().getpeername()[0]
261 if not bootstrap_remote
:
262 raise RuntimeError('must define at least one host.something role')
263 ctx
.kubeadm
[cluster_name
].bootstrap_remote
= bootstrap_remote
264 ctx
.kubeadm
[cluster_name
].remotes
= remotes
265 ctx
.kubeadm
[cluster_name
].token
= 'abcdef.' + ''.join([
266 random
.choice('0123456789abcdefghijklmnopqrstuvwxyz') for _
in range(16)
268 log
.info(f
'Token: {ctx.kubeadm[cluster_name].token}')
269 log
.info(f
'Remotes: {ctx.kubeadm[cluster_name].remotes}')
274 'sudo', 'kubeadm', 'init',
275 '--node-name', ctx
.kubeadm
[cluster_name
].bootstrap_remote
.shortname
,
276 '--token', ctx
.kubeadm
[cluster_name
].token
,
277 '--pod-network-cidr', str(ctx
.kubeadm
[cluster_name
].pod_subnet
),
279 bootstrap_remote
.run(args
=cmd
)
281 # join additional nodes
283 for remote
, ip
in ctx
.kubeadm
[cluster_name
].remotes
.items():
284 if remote
== bootstrap_remote
:
287 'sudo', 'kubeadm', 'join',
288 ctx
.kubeadm
[cluster_name
].remotes
[ctx
.kubeadm
[cluster_name
].bootstrap_remote
] + ':6443',
289 '--node-name', remote
.shortname
,
290 '--token', ctx
.kubeadm
[cluster_name
].token
,
291 '--discovery-token-unsafe-skip-ca-verification',
293 joins
.append(remote
.run(args
=cmd
, wait
=False))
297 except Exception as e
:
302 log
.info('Cleaning up node')
305 args
=['sudo', 'kubeadm', 'reset', 'cleanup-node', '-f'],
311 @contextlib.contextmanager
312 def kubectl_config(ctx
, config
):
313 cluster_name
= config
['cluster']
314 bootstrap_remote
= ctx
.kubeadm
[cluster_name
].bootstrap_remote
316 ctx
.kubeadm
[cluster_name
].admin_conf
= \
317 bootstrap_remote
.read_file('/etc/kubernetes/admin.conf', sudo
=True)
319 log
.info('Setting up kubectl')
321 ctx
.cluster
.run(args
=[
322 'mkdir', '-p', '.kube',
324 'sudo', 'mkdir', '-p', '/root/.kube',
326 for remote
in ctx
.kubeadm
[cluster_name
].remotes
.keys():
327 remote
.write_file('.kube/config', ctx
.kubeadm
[cluster_name
].admin_conf
)
328 remote
.sudo_write_file('/root/.kube/config',
329 ctx
.kubeadm
[cluster_name
].admin_conf
)
332 except Exception as e
:
337 log
.info('Deconfiguring kubectl')
338 ctx
.cluster
.run(args
=[
339 'rm', '-rf', '.kube',
341 'sudo', 'rm', '-rf', '/root/.kube',
346 for mapping
in teuth_config
.get('vnet', []):
347 mnet
= ipaddress
.ip_network(mapping
['machine_subnet'])
348 vnet
= ipaddress
.ip_network(mapping
['virtual_subnet'])
349 if vnet
.prefixlen
>= mnet
.prefixlen
:
350 log
.error(f
"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
353 pos
= list(mnet
.hosts()).index(mip
)
354 log
.info(f
"{mip} is in {mnet} at pos {pos}")
355 sub
= list(vnet
.subnets(32 - mnet
.prefixlen
))[pos
]
360 @contextlib.contextmanager
361 def allocate_pod_subnet(ctx
, config
):
363 Allocate a private subnet that will not collide with other test machines/clusters
365 cluster_name
= config
['cluster']
366 assert cluster_name
== 'kubeadm', 'multiple subnets not yet implemented'
368 log
.info('Identifying pod subnet')
369 remote
= list(ctx
.cluster
.remotes
.keys())[0]
370 ip
= remote
.ssh
.get_transport().getpeername()[0]
371 mip
= ipaddress
.ip_address(ip
)
374 log
.info(f
'Pod subnet: {vnet}')
375 ctx
.kubeadm
[cluster_name
].pod_subnet
= vnet
379 @contextlib.contextmanager
380 def pod_network(ctx
, config
):
381 cluster_name
= config
['cluster']
382 pnet
= config
.get('pod_network', 'calico')
383 if pnet
== 'flannel':
384 r
= ctx
.kubeadm
[cluster_name
].bootstrap_remote
.run(
387 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml',
391 assert r
.exitstatus
== 0
392 flannel
= list(yaml
.load_all(r
.stdout
.getvalue(), Loader
=yaml
.FullLoader
))
394 if o
.get('data', {}).get('net-conf.json'):
395 log
.info(f
'Updating {o}')
396 o
['data']['net-conf.json'] = o
['data']['net-conf.json'].replace(
398 str(ctx
.kubeadm
[cluster_name
].pod_subnet
)
401 flannel_yaml
= yaml
.dump_all(flannel
)
402 log
.debug(f
'Flannel:\n{flannel_yaml}')
403 _kubectl(ctx
, config
, ['apply', '-f', '-'], stdin
=flannel_yaml
)
405 elif pnet
== 'calico':
406 _kubectl(ctx
, config
, [
408 'https://docs.projectcalico.org/manifests/tigera-operator.yaml'
411 'apiVersion': 'operator.tigera.io/v1',
412 'kind': 'Installation',
413 'metadata': {'name': 'default'},
419 'cidr': str(ctx
.kubeadm
[cluster_name
].pod_subnet
),
420 'encapsulation': 'IPIPCrossSubnet',
421 'natOutgoing': 'Enabled',
422 'nodeSelector': 'all()',
428 _kubectl(ctx
, config
, ['create', '-f', '-'], stdin
=yaml
.dump(cr
))
431 raise RuntimeError(f
'unrecognized pod_network {pnet}')
437 if pnet
== 'flannel':
438 _kubectl(ctx
, config
, [
440 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml',
443 elif pnet
== 'calico':
444 _kubectl(ctx
, config
, ['delete', 'installation', 'default'])
445 _kubectl(ctx
, config
, [
447 'https://docs.projectcalico.org/manifests/tigera-operator.yaml'
451 @contextlib.contextmanager
452 def setup_pvs(ctx
, config
):
454 Create PVs for all scratch LVs and set up a trivial provisioner
456 log
.info('Scanning for scratch devices')
458 for remote
in ctx
.cluster
.remotes
.keys():
459 ls
= remote
.read_file('/scratch_devs').decode('utf-8').strip().splitlines()
460 log
.info(f
'Scratch devices on {remote.shortname}: {ls}')
462 devname
= dev
.split('/')[-1].replace("_", "-")
465 'kind': 'PersistentVolume',
466 'metadata': {'name': f
'{remote.shortname}-{devname}'},
468 'volumeMode': 'Block',
469 'accessModes': ['ReadWriteOnce'],
470 'capacity': {'storage': '100Gi'}, # doesn't matter?
471 'persistentVolumeReclaimPolicy': 'Retain',
472 'storageClassName': 'scratch',
473 'local': {'path': dev
},
476 'nodeSelectorTerms': [
478 'matchExpressions': [
480 'key': 'kubernetes.io/hostname',
482 'values': [remote
.shortname
]
491 # overwriting first few MB is enough to make k8s happy
493 'sudo', 'dd', 'if=/dev/zero', f
'of={dev}', 'bs=1M', 'count=10'
496 'kind': 'StorageClass',
497 'apiVersion': 'storage.k8s.io/v1',
498 'metadata': {'name': 'scratch'},
499 'provisioner': 'kubernetes.io/no-provisioner',
500 'volumeBindingMode': 'WaitForFirstConsumer',
502 y
= yaml
.dump_all(crs
)
503 log
.info('Creating PVs + StorageClass')
505 _kubectl(ctx
, config
, ['create', '-f', '-'], stdin
=y
)
510 @contextlib.contextmanager
511 def final(ctx
, config
):
512 cluster_name
= config
['cluster']
514 # remove master node taint
515 _kubectl(ctx
, config
, [
517 ctx
.kubeadm
[cluster_name
].bootstrap_remote
.shortname
,
518 'node-role.kubernetes.io/master-',
526 @contextlib.contextmanager
527 def task(ctx
, config
):
530 assert isinstance(config
, dict), \
531 "task only supports a dictionary for configuration"
533 log
.info('Kubeadm start')
535 overrides
= ctx
.config
.get('overrides', {})
536 teuthology
.deep_merge(config
, overrides
.get('kubeadm', {}))
537 log
.info('Config: ' + str(config
))
539 # set up cluster context
540 if not hasattr(ctx
, 'kubeadm'):
542 if 'cluster' not in config
:
543 config
['cluster'] = 'kubeadm'
544 cluster_name
= config
['cluster']
545 if cluster_name
not in ctx
.kubeadm
:
546 ctx
.kubeadm
[cluster_name
] = argparse
.Namespace()
548 with contextutil
.nested(
549 lambda: preflight(ctx
, config
),
550 lambda: allocate_pod_subnet(ctx
, config
),
551 lambda: kubeadm_install(ctx
, config
),
552 lambda: kubeadm_init_join(ctx
, config
),
553 lambda: kubectl_config(ctx
, config
),
554 lambda: pod_network(ctx
, config
),
555 lambda: setup_pvs(ctx
, config
),
556 lambda: final(ctx
, config
),
559 log
.info('Kubeadm complete, yielding')
563 log
.info('Tearing down kubeadm')