]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/kubeadm.py
2 Kubernetes cluster task, deployed via kubeadm
10 from io
import BytesIO
12 from teuthology
import misc
as teuthology
13 from teuthology
import contextutil
14 from teuthology
.config
import config
as teuth_config
15 from teuthology
.orchestra
import run
17 log
= logging
.getLogger(__name__
)
20 def _kubectl(ctx
, config
, args
, **kwargs
):
21 cluster_name
= config
['cluster']
22 ctx
.kubeadm
[cluster_name
].bootstrap_remote
.run(
23 args
=['kubectl'] + args
,
28 def kubectl(ctx
, config
):
29 if isinstance(config
, str):
31 assert isinstance(config
, list)
33 if isinstance(c
, str):
34 _kubectl(ctx
, config
, c
.split(' '))
36 _kubectl(ctx
, config
, c
)
39 @contextlib.contextmanager
40 def preflight(ctx
, config
):
44 'sudo', 'modprobe', 'br_netfilter',
46 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-ip6tables=1',
48 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-iptables=1',
50 'sudo', 'sysctl', 'net.ipv4.ip_forward=1',
52 'sudo', 'swapoff', '-a',
58 # set docker cgroup driver = systemd
59 # see https://kubernetes.io/docs/setup/production-environment/container-runtimes/#docker
60 # see https://github.com/kubernetes/kubeadm/issues/2066
63 "exec-opts": ["native.cgroupdriver=systemd"],
64 "log-driver": "json-file",
68 "storage-driver": "overlay2"
71 for remote
in ctx
.cluster
.remotes
.keys():
72 remote
.write_file('/etc/docker/daemon.json', daemon_json
, sudo
=True)
76 'sudo', 'systemctl', 'restart', 'docker',
86 @contextlib.contextmanager
87 def kubeadm_install(ctx
, config
):
88 version
= config
.get('version', '1.21')
90 os_type
= teuthology
.get_distro(ctx
)
91 os_version
= teuthology
.get_distro_version(ctx
)
94 if os_type
in ['centos', 'rhel']:
95 os
= f
"CentOS_{os_version.split('.')[0]}"
96 log
.info('Installing cri-o')
102 '/etc/yum.repos.d/devel:kubic:libcontainers:stable.repo',
103 f
'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/{os}/devel:kubic:libcontainers:stable.repo',
107 f
'/etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:{version}.repo',
108 f
'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{version}/{os}/devel:kubic:libcontainers:stable:cri-o:{version}.repo',
110 'sudo', 'dnf', 'install', '-y', 'cri-o',
116 log
.info('Installing kube{adm,ctl,let}')
117 repo
= """[kubernetes]
119 baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$basearch
123 gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
125 for remote
in ctx
.cluster
.remotes
.keys():
127 '/etc/yum.repos.d/kubernetes.repo',
134 'sudo', 'dnf', 'install', '-y',
135 'kubelet', 'kubeadm', 'kubectl',
136 'iproute-tc', 'bridge-utils',
143 for remote
in ctx
.cluster
.remotes
.keys():
144 conf
= """# from https://github.com/cri-o/cri-o/blob/master/tutorials/kubernetes.md#flannel-network
150 remote
.write_file('/etc/cni/net.d/10-crio-flannel.conf', conf
, sudo
=True)
153 '/etc/cni/net.d/87-podman-bridge.conflist',
154 '/etc/cni/net.d/100-crio-bridge.conf',
161 'sudo', 'systemctl', 'daemon-reload',
163 'sudo', 'systemctl', 'enable', 'crio', '--now',
169 elif os_type
== 'ubuntu':
170 os
= f
"xUbuntu_{os_version}"
171 log
.info('Installing kube{adm,ctl,let}')
175 'sudo', 'apt', 'update',
177 'sudo', 'apt', 'install', '-y',
178 'apt-transport-https', 'ca-certificates', 'curl',
180 'sudo', 'curl', '-fsSLo',
181 '/usr/share/keyrings/kubernetes-archive-keyring.gpg',
182 'https://packages.cloud.google.com/apt/doc/apt-key.gpg',
184 'echo', 'deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main',
186 'sudo', 'tee', '/etc/apt/sources.list.d/kubernetes.list',
188 'sudo', 'apt', 'update',
190 'sudo', 'apt', 'install', '-y',
191 'kubelet', 'kubeadm', 'kubectl',
199 raise RuntimeError(f
'unsupported distro {os_type} for cri-o')
204 'sudo', 'systemctl', 'enable', '--now', 'kubelet',
206 'sudo', 'kubeadm', 'config', 'images', 'pull',
215 if config
.get('uninstall', True):
216 log
.info('Uninstalling kube{adm,let,ctl}')
217 if os_type
in ['centos', 'rhel']:
222 '/etc/yum.repos.d/kubernetes.repo',
224 'sudo', 'dnf', 'remove', '-y',
225 'kubeadm', 'kubelet', 'kubectl', 'cri-o',
230 elif os_type
== 'ubuntu' and False:
235 '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list',
236 f
'/etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:{version}.list',
237 '/etc/apt/trusted.gpg.d/libcontainers-cri-o.gpg',
239 'sudo', 'apt', 'remove', '-y',
240 'kkubeadm', 'kubelet', 'kubectl', 'cri-o', 'cri-o-runc',
247 @contextlib.contextmanager
248 def kubeadm_init_join(ctx
, config
):
249 cluster_name
= config
['cluster']
251 bootstrap_remote
= None
252 remotes
= {} # remote -> ip
253 for remote
, roles
in ctx
.cluster
.remotes
.items():
255 if role
.startswith('host.'):
256 if not bootstrap_remote
:
257 bootstrap_remote
= remote
258 if remote
not in remotes
:
259 remotes
[remote
] = remote
.ssh
.get_transport().getpeername()[0]
260 if not bootstrap_remote
:
261 raise RuntimeError('must define at least one host.something role')
262 ctx
.kubeadm
[cluster_name
].bootstrap_remote
= bootstrap_remote
263 ctx
.kubeadm
[cluster_name
].remotes
= remotes
264 ctx
.kubeadm
[cluster_name
].token
= 'abcdef.' + ''.join([
265 random
.choice('0123456789abcdefghijklmnopqrstuvwxyz') for _
in range(16)
267 log
.info(f
'Token: {ctx.kubeadm[cluster_name].token}')
268 log
.info(f
'Remotes: {ctx.kubeadm[cluster_name].remotes}')
273 'sudo', 'kubeadm', 'init',
274 '--node-name', ctx
.kubeadm
[cluster_name
].bootstrap_remote
.shortname
,
275 '--token', ctx
.kubeadm
[cluster_name
].token
,
276 '--pod-network-cidr', str(ctx
.kubeadm
[cluster_name
].pod_subnet
),
278 bootstrap_remote
.run(args
=cmd
)
280 # join additional nodes
282 for remote
, ip
in ctx
.kubeadm
[cluster_name
].remotes
.items():
283 if remote
== bootstrap_remote
:
286 'sudo', 'kubeadm', 'join',
287 ctx
.kubeadm
[cluster_name
].remotes
[ctx
.kubeadm
[cluster_name
].bootstrap_remote
] + ':6443',
288 '--node-name', remote
.shortname
,
289 '--token', ctx
.kubeadm
[cluster_name
].token
,
290 '--discovery-token-unsafe-skip-ca-verification',
292 joins
.append(remote
.run(args
=cmd
, wait
=False))
296 except Exception as e
:
301 log
.info('Cleaning up node')
304 args
=['sudo', 'kubeadm', 'reset', 'cleanup-node', '-f'],
310 @contextlib.contextmanager
311 def kubectl_config(ctx
, config
):
312 cluster_name
= config
['cluster']
313 bootstrap_remote
= ctx
.kubeadm
[cluster_name
].bootstrap_remote
315 ctx
.kubeadm
[cluster_name
].admin_conf
= \
316 bootstrap_remote
.read_file('/etc/kubernetes/admin.conf', sudo
=True)
318 log
.info('Setting up kubectl')
320 ctx
.cluster
.run(args
=[
321 'mkdir', '-p', '.kube',
323 'sudo', 'mkdir', '-p', '/root/.kube',
325 for remote
in ctx
.kubeadm
[cluster_name
].remotes
.keys():
326 remote
.write_file('.kube/config', ctx
.kubeadm
[cluster_name
].admin_conf
)
327 remote
.sudo_write_file('/root/.kube/config',
328 ctx
.kubeadm
[cluster_name
].admin_conf
)
331 except Exception as e
:
336 log
.info('Deconfiguring kubectl')
337 ctx
.cluster
.run(args
=[
338 'rm', '-rf', '.kube',
340 'sudo', 'rm', '-rf', '/root/.kube',
345 for mapping
in teuth_config
.get('vnet', []):
346 mnet
= ipaddress
.ip_network(mapping
['machine_subnet'])
347 vnet
= ipaddress
.ip_network(mapping
['virtual_subnet'])
348 if vnet
.prefixlen
>= mnet
.prefixlen
:
349 log
.error(f
"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix")
352 pos
= list(mnet
.hosts()).index(mip
)
353 log
.info(f
"{mip} is in {mnet} at pos {pos}")
354 sub
= list(vnet
.subnets(32 - mnet
.prefixlen
))[pos
]
359 @contextlib.contextmanager
360 def allocate_pod_subnet(ctx
, config
):
362 Allocate a private subnet that will not collide with other test machines/clusters
364 cluster_name
= config
['cluster']
365 assert cluster_name
== 'kubeadm', 'multiple subnets not yet implemented'
367 log
.info('Identifying pod subnet')
368 remote
= list(ctx
.cluster
.remotes
.keys())[0]
369 ip
= remote
.ssh
.get_transport().getpeername()[0]
370 mip
= ipaddress
.ip_address(ip
)
373 log
.info(f
'Pod subnet: {vnet}')
374 ctx
.kubeadm
[cluster_name
].pod_subnet
= vnet
378 @contextlib.contextmanager
379 def pod_network(ctx
, config
):
380 cluster_name
= config
['cluster']
381 pnet
= config
.get('pod_network', 'calico')
382 if pnet
== 'flannel':
383 r
= ctx
.kubeadm
[cluster_name
].bootstrap_remote
.run(
386 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml',
390 assert r
.exitstatus
== 0
391 flannel
= list(yaml
.load_all(r
.stdout
.getvalue(), Loader
=yaml
.FullLoader
))
393 if o
.get('data', {}).get('net-conf.json'):
394 log
.info(f
'Updating {o}')
395 o
['data']['net-conf.json'] = o
['data']['net-conf.json'].replace(
397 str(ctx
.kubeadm
[cluster_name
].pod_subnet
)
400 flannel_yaml
= yaml
.dump_all(flannel
)
401 log
.debug(f
'Flannel:\n{flannel_yaml}')
402 _kubectl(ctx
, config
, ['apply', '-f', '-'], stdin
=flannel_yaml
)
404 elif pnet
== 'calico':
405 _kubectl(ctx
, config
, [
407 'https://docs.projectcalico.org/manifests/tigera-operator.yaml'
410 'apiVersion': 'operator.tigera.io/v1',
411 'kind': 'Installation',
412 'metadata': {'name': 'default'},
418 'cidr': str(ctx
.kubeadm
[cluster_name
].pod_subnet
),
419 'encapsulation': 'VXLANCrossSubnet',
420 'natOutgoing': 'Enabled',
421 'nodeSelector': 'all()',
427 _kubectl(ctx
, config
, ['create', '-f', '-'], stdin
=yaml
.dump(cr
))
430 raise RuntimeError(f
'unrecognized pod_network {pnet}')
436 if pnet
== 'flannel':
437 _kubectl(ctx
, config
, [
439 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml',
442 elif pnet
== 'calico':
443 _kubectl(ctx
, config
, ['delete', 'installation', 'default'])
444 _kubectl(ctx
, config
, [
446 'https://docs.projectcalico.org/manifests/tigera-operator.yaml'
450 @contextlib.contextmanager
451 def setup_pvs(ctx
, config
):
453 Create PVs for all scratch LVs and set up a trivial provisioner
455 log
.info('Scanning for scratch devices')
457 for remote
in ctx
.cluster
.remotes
.keys():
458 ls
= remote
.read_file('/scratch_devs').decode('utf-8').strip().splitlines()
459 log
.info(f
'Scratch devices on {remote.shortname}: {ls}')
461 devname
= dev
.split('/')[-1].replace("_", "-")
464 'kind': 'PersistentVolume',
465 'metadata': {'name': f
'{remote.shortname}-{devname}'},
467 'volumeMode': 'Block',
468 'accessModes': ['ReadWriteOnce'],
469 'capacity': {'storage': '100Gi'}, # doesn't matter?
470 'persistentVolumeReclaimPolicy': 'Recycle',
471 'storageClassName': 'scratch',
472 'local': {'path': dev
},
475 'nodeSelectorTerms': [
477 'matchExpressions': [
479 'key': 'kubernetes.io/hostname',
481 'values': [remote
.shortname
]
490 # overwriting first few MB is enough to make k8s happy
492 'sudo', 'dd', 'if=/dev/zero', f
'of={dev}', 'bs=1M', 'count=10'
495 'kind': 'StorageClass',
496 'apiVersion': 'storage.k8s.io/v1',
497 'metadata': {'name': 'scratch'},
498 'provisioner': 'kubernetes.io/no-provisioner',
499 'volumeBindingMode': 'WaitForFirstConsumer',
501 y
= yaml
.dump_all(crs
)
502 log
.info('Creating PVs + StorageClass')
504 _kubectl(ctx
, config
, ['create', '-f', '-'], stdin
=y
)
509 @contextlib.contextmanager
510 def final(ctx
, config
):
511 cluster_name
= config
['cluster']
513 # remove master node taint
514 _kubectl(ctx
, config
, [
516 ctx
.kubeadm
[cluster_name
].bootstrap_remote
.shortname
,
517 'node-role.kubernetes.io/master-',
525 @contextlib.contextmanager
526 def task(ctx
, config
):
529 assert isinstance(config
, dict), \
530 "task only supports a dictionary for configuration"
532 log
.info('Kubeadm start')
534 overrides
= ctx
.config
.get('overrides', {})
535 teuthology
.deep_merge(config
, overrides
.get('kubeadm', {}))
536 log
.info('Config: ' + str(config
))
538 # set up cluster context
539 if not hasattr(ctx
, 'kubeadm'):
541 if 'cluster' not in config
:
542 config
['cluster'] = 'kubeadm'
543 cluster_name
= config
['cluster']
544 if cluster_name
not in ctx
.kubeadm
:
545 ctx
.kubeadm
[cluster_name
] = argparse
.Namespace()
547 with contextutil
.nested(
548 lambda: preflight(ctx
, config
),
549 lambda: allocate_pod_subnet(ctx
, config
),
550 lambda: kubeadm_install(ctx
, config
),
551 lambda: kubeadm_init_join(ctx
, config
),
552 lambda: kubectl_config(ctx
, config
),
553 lambda: pod_network(ctx
, config
),
554 lambda: setup_pvs(ctx
, config
),
555 lambda: final(ctx
, config
),
558 log
.info('Kubeadm complete, yielding')
562 log
.info('Tearing down kubeadm')