update ceph source to reef 18.1.2

[ceph.git] / ceph / src / cephadm / box / box.py
diff --git a/ceph/src/cephadm/box/box.py b/ceph/src/cephadm/box/box.py

index 6c4dd7fd782957e711963d2be4192f03e9b84949..db2f24233512a96c689ed81f6acb5be59b71e3e7 100755 (executable)
--- a/ceph/src/cephadm/box/box.py
+++ b/ceph/src/cephadm/box/box.py
@@ -2,11 +2,13 @@
  import argparse
  import os
  import stat
+import json
  import sys
-
  import host
  import osd
+from multiprocessing import Process, Pool
  from util import (
+    BoxType,
      Config,
      Target,
      ensure_inside_container,
@@ -14,17 +16,35 @@ from util import (
      get_boxes_container_info,
      run_cephadm_shell_command,
      run_dc_shell_command,
+    run_dc_shell_commands,
+    get_container_engine,
      run_shell_command,
+    run_shell_commands,
+    ContainerEngine,
+    DockerEngine,
+    PodmanEngine,
+    colored,
+    engine,
+    engine_compose,
+    Colors,
+    get_seed_name
  )
  
  CEPH_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main'
  BOX_IMAGE = 'cephadm-box:latest'
  
  # NOTE: this image tar is a trickeroo so cephadm won't pull the image everytime
-# we deploy a cluster. Keep in mind that you'll be responsible of pulling the
-# image yourself with `box cluster setup`
+# we deploy a cluster. Keep in mind that you'll be responsible for pulling the
+# image yourself with `./box.py -v cluster setup`
  CEPH_IMAGE_TAR = 'docker/ceph/image/quay.ceph.image.tar'
+CEPH_ROOT = '../../../'
+DASHBOARD_PATH = '../../../src/pybind/mgr/dashboard/frontend/'
  
+root_error_msg = """
+WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+sudo with this script can kill your computer, try again without sudo
+if you value your time.
+"""
  
  def remove_ceph_image_tar():
      if os.path.exists(CEPH_IMAGE_TAR):
@@ -32,7 +52,7 @@ def remove_ceph_image_tar():
  
  
  def cleanup_box() -> None:
-    osd.cleanup()
+    osd.cleanup_osds()
      remove_ceph_image_tar()
  
  
@@ -40,7 +60,8 @@ def image_exists(image_name: str):
      # extract_tag
      assert image_name.find(':')
      image_name, tag = image_name.split(':')
-    images = run_shell_command('docker image ls').split('\n')
+    engine = get_container_engine()
+    images = engine.run('image ls').split('\n')
      IMAGE_NAME = 0
      TAG = 1
      for image in images:
@@ -54,27 +75,50 @@ def image_exists(image_name: str):
  
  def get_ceph_image():
      print('Getting ceph image')
-    run_shell_command(f'docker pull {CEPH_IMAGE}')
+    engine = get_container_engine()
+    engine.run(f'pull {CEPH_IMAGE}')
      # update
-    run_shell_command(f'docker build -t {CEPH_IMAGE} docker/ceph')
+    engine.run(f'build -t {CEPH_IMAGE} docker/ceph')
      if not os.path.exists('docker/ceph/image'):
          os.mkdir('docker/ceph/image')
  
      remove_ceph_image_tar()
  
-    run_shell_command(f'docker save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}')
+    engine.run(f'save {CEPH_IMAGE} -o {CEPH_IMAGE_TAR}')
+    run_shell_command(f'chmod 777 {CEPH_IMAGE_TAR}')
      print('Ceph image added')
  
  
  def get_box_image():
      print('Getting box image')
-    run_shell_command('docker build -t cephadm-box -f Dockerfile .')
+    engine = get_container_engine()
+    engine.run(f'build -t cephadm-box -f {engine.dockerfile} .')
      print('Box image added')
  
+def check_dashboard():
+    if not os.path.exists(os.path.join(CEPH_ROOT, 'dist')):
+        print(colored('Missing build in dashboard', Colors.WARNING))
+
+def check_cgroups():
+    if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
+        print(colored('cgroups v1 is not supported', Colors.FAIL))
+        print('Enable cgroups v2 please')
+        sys.exit(666)
+
+def check_selinux():
+    selinux = run_shell_command('getenforce')
+    if 'Disabled' not in selinux:
+        print(colored('selinux should be disabled, please disable it if you '
+                       'don\'t want unexpected behaviour.', Colors.WARNING))
+def dashboard_setup():
+    command = f'cd {DASHBOARD_PATH} && npm install'
+    run_shell_command(command)
+    command = f'cd {DASHBOARD_PATH} && npm run build'
+    run_shell_command(command)
  
  class Cluster(Target):
      _help = 'Manage docker cephadm boxes'
-    actions = ['bootstrap', 'start', 'down', 'list', 'sh', 'setup', 'cleanup']
+    actions = ['bootstrap', 'start', 'down', 'list', 'bash', 'setup', 'cleanup']
  
      def set_args(self):
          self.parser.add_argument(
@@ -82,17 +126,37 @@ class Cluster(Target):
          )
          self.parser.add_argument('--osds', type=int, default=3, help='Number of osds')
  
-        self.parser.add_argument('--hosts', type=int, default=2, help='Number of hosts')
+        self.parser.add_argument('--hosts', type=int, default=1, help='Number of hosts')
          self.parser.add_argument('--skip-deploy-osds', action='store_true', help='skip deploy osd')
          self.parser.add_argument('--skip-create-loop', action='store_true', help='skip create loopback device')
          self.parser.add_argument('--skip-monitoring-stack', action='store_true', help='skip monitoring stack')
          self.parser.add_argument('--skip-dashboard', action='store_true', help='skip dashboard')
          self.parser.add_argument('--expanded', action='store_true', help='deploy 3 hosts and 3 osds')
+        self.parser.add_argument('--jobs', type=int, help='Number of jobs scheduled in parallel')
  
      @ensure_outside_container
      def setup(self):
-        get_ceph_image()
-        get_box_image()
+        check_cgroups()
+        check_selinux()
+
+        targets = [
+                get_ceph_image,
+                get_box_image,
+                dashboard_setup
+        ]
+        results = []
+        jobs = Config.get('jobs')
+        if jobs:
+            jobs = int(jobs)
+        else:
+            jobs = None
+        pool = Pool(jobs)
+        for target in targets:
+            results.append(pool.apply_async(target))
+
+        for result in results:
+            result.wait()
+
  
      @ensure_outside_container
      def cleanup(self):
@@ -101,16 +165,15 @@ class Cluster(Target):
      @ensure_inside_container
      def bootstrap(self):
          print('Running bootstrap on seed')
-        cephadm_path = os.environ.get('CEPHADM_PATH')
-        os.symlink('/cephadm/cephadm', cephadm_path)
-        run_shell_command(
-            'systemctl restart docker'
-        )  # restart to ensure docker is using daemon.json
+        cephadm_path = str(os.environ.get('CEPHADM_PATH'))
  
+        engine = get_container_engine()
+        if isinstance(engine, DockerEngine):
+            engine.restart()
          st = os.stat(cephadm_path)
          os.chmod(cephadm_path, st.st_mode | stat.S_IEXEC)
  
-        run_shell_command('docker load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar')
+        engine.run('load < /cephadm/box/docker/ceph/image/quay.ceph.image.tar')
          # cephadm guid error because it sometimes tries to use quay.ceph.io/ceph-ci/ceph:<none>
          # instead of main branch's tag
          run_shell_command('export CEPH_SOURCE_FOLDER=/ceph')
@@ -134,9 +197,9 @@ class Cluster(Target):
          skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
  
          fsid = Config.get('fsid')
-        config_folder = Config.get('config_folder')
-        config = Config.get('config')
-        keyring = Config.get('keyring')
+        config_folder = str(Config.get('config_folder'))
+        config = str(Config.get('config'))
+        keyring = str(Config.get('keyring'))
          if not os.path.exists(config_folder):
              os.mkdir(config_folder)
  
@@ -159,26 +222,34 @@ class Cluster(Target):
          )
  
          print('Running cephadm bootstrap...')
-        run_shell_command(cephadm_bootstrap_command)
+        run_shell_command(cephadm_bootstrap_command, expect_exit_code=120) 
          print('Cephadm bootstrap complete')
  
          run_shell_command('sudo vgchange --refresh')
          run_shell_command('cephadm ls')
          run_shell_command('ln -s /ceph/src/cephadm/box/box.py /usr/bin/box')
  
-        # NOTE: sometimes cephadm in the box takes a while to update the containers
-        # running in the cluster and it cannot deploy the osds. In this case
-        # run: box -v osd deploy --vg vg1 to deploy osds again.
          run_cephadm_shell_command('ceph -s')
+
          print('Bootstrap completed!')
  
      @ensure_outside_container
      def start(self):
-        osds = Config.get('osds')
-        hosts = Config.get('hosts')
+        check_cgroups()
+        check_selinux()
+        osds = int(Config.get('osds'))
+        hosts = int(Config.get('hosts'))
+        engine = get_container_engine()
  
          # ensure boxes don't exist
-        run_shell_command('docker-compose down')
+        self.down()
+
+        # podman is ran without sudo
+        if isinstance(engine, PodmanEngine):
+            I_am = run_shell_command('whoami')
+            if 'root' in I_am:
+                print(root_error_msg)
+                sys.exit(1)
  
          print('Checking docker images')
          if not image_exists(CEPH_IMAGE):
@@ -186,24 +257,42 @@ class Cluster(Target):
          if not image_exists(BOX_IMAGE):
              get_box_image()
  
+        used_loop = ""
          if not Config.get('skip_create_loop'):
-            print('Adding logical volumes (block devices) in loopback device...')
-            osd.create_loopback_devices(osds)
+            print('Creating OSD devices...')
+            used_loop = osd.create_loopback_devices(osds)
              print(f'Added {osds} logical volumes in a loopback device')
  
          print('Starting containers')
  
-        dcflags = '-f docker-compose.yml'
-        if not os.path.exists('/sys/fs/cgroup/cgroup.controllers'):
-            dcflags += ' -f docker-compose.cgroup1.yml'
-        run_shell_command(f'docker-compose {dcflags} up --scale hosts={hosts} -d')
+        engine.up(hosts)
+
+        containers = engine.get_containers()
+        seed = engine.get_seed()
+        # Umounting somehow brings back the contents of the host /sys/dev/block. 
+        # On startup /sys/dev/block is empty. After umount, we can see symlinks again
+        # so that lsblk is able to run as expected
+        run_dc_shell_command('umount /sys/dev/block', seed)
  
          run_shell_command('sudo sysctl net.ipv4.conf.all.forwarding=1')
          run_shell_command('sudo iptables -P FORWARD ACCEPT')
  
+        # don't update clock with chronyd / setup chronyd on all boxes
+        chronyd_setup = """
+        sed 's/$OPTIONS/-x/g' /usr/lib/systemd/system/chronyd.service -i
+        systemctl daemon-reload
+        systemctl start chronyd
+        systemctl status --no-pager chronyd
+        """
+        for container in containers:
+            print(colored('Got container:', Colors.OKCYAN), str(container))
+        for container in containers:
+            run_dc_shell_commands(chronyd_setup, container)
+
          print('Seting up host ssh servers')
-        for h in range(hosts):
-            host._setup_ssh(h + 1)
+        for container in containers:
+            print(colored('Setting up ssh server for:', Colors.OKCYAN), str(container))
+            host._setup_ssh(container)
  
          verbose = '-v' if Config.get('verbose') else ''
          skip_deploy = '--skip-deploy-osds' if Config.get('skip-deploy-osds') else ''
@@ -212,36 +301,59 @@ class Cluster(Target):
          )
          skip_dashboard = '--skip-dashboard' if Config.get('skip-dashboard') else ''
          box_bootstrap_command = (
-            f'/cephadm/box/box.py {verbose} cluster bootstrap '
+            f'/cephadm/box/box.py {verbose} --engine {engine.command} cluster bootstrap '
              f'--osds {osds} '
              f'--hosts {hosts} '
              f'{skip_deploy} '
              f'{skip_dashboard} '
              f'{skip_monitoring_stack} '
          )
-        run_dc_shell_command(box_bootstrap_command, 1, 'seed')
-
-        info = get_boxes_container_info()
-        ips = info['ips']
-        hostnames = info['hostnames']
-        print(ips)
-        host._copy_cluster_ssh_key(ips)
+        print(box_bootstrap_command)
+        run_dc_shell_command(box_bootstrap_command, seed)
  
          expanded = Config.get('expanded')
          if expanded:
-            host._add_hosts(ips, hostnames)
-
-        if expanded and not Config.get('skip-deploy-osds'):
-            print('Deploying osds... This could take up to minutes')
-            osd.deploy_osds_in_vg('vg1')
-            print('Osds deployed')
+            info = get_boxes_container_info()
+            ips = info['ips']
+            hostnames = info['hostnames']
+            print(ips)
+            if hosts > 0:
+                host._copy_cluster_ssh_key(ips)
+                host._add_hosts(ips, hostnames)
+            if not Config.get('skip-deploy-osds'):
+                print('Deploying osds... This could take up to minutes')
+                osd.deploy_osds(osds)
+                print('Osds deployed')
+
+
+        dashboard_ip = 'localhost'
+        info = get_boxes_container_info(with_seed=True)
+        if isinstance(engine, DockerEngine):
+            for i in range(info['size']):
+                if get_seed_name() in info['container_names'][i]:
+                    dashboard_ip = info["ips"][i]
+        print(colored(f'dashboard available at https://{dashboard_ip}:8443', Colors.OKGREEN))
  
          print('Bootstrap finished successfully')
  
      @ensure_outside_container
      def down(self):
-        run_shell_command('docker-compose down')
-        cleanup_box()
+        engine = get_container_engine()
+        if isinstance(engine, PodmanEngine):
+            containers = json.loads(engine.run('container ls --format json'))
+            for container in containers:
+                for name in container['Names']:
+                    if name.startswith('box_hosts_'):
+                        engine.run(f'container kill {name}')
+                        engine.run(f'container rm {name}')
+            pods = json.loads(engine.run('pod ls --format json'))
+            for pod in pods:
+                if 'Name' in pod and pod['Name'].startswith('box_pod_host'):
+                    name = pod['Name']
+                    engine.run(f'pod kill {name}')
+                    engine.run(f'pod rm {name}')
+        else:
+            run_shell_command(f'{engine_compose()} -f {Config.get("docker_yaml")} down')
          print('Successfully killed all boxes')
  
      @ensure_outside_container
@@ -254,11 +366,12 @@ class Cluster(Target):
              print(f'{name} \t{ip} \t{hostname}')
  
      @ensure_outside_container
-    def sh(self):
+    def bash(self):
          # we need verbose to see the prompt after running shell command
          Config.set('verbose', True)
          print('Seed bash')
-        run_shell_command('docker-compose exec seed bash')
+        engine = get_container_engine()
+        engine.run(f'exec -it {engine.seed_name} bash')
  
  
  targets = {
@@ -273,6 +386,10 @@ def main():
      parser.add_argument(
          '-v', action='store_true', dest='verbose', help='be more verbose'
      )
+    parser.add_argument(
+        '--engine', type=str, default='podman',
+        dest='engine', help='choose engine between "docker" and "podman"'
+    )
  
      subparsers = parser.add_subparsers()
      target_instances = {}