import ceph pacific 16.2.5

[ceph.git] / ceph / src / cephadm / cephadm
diff --git a/ceph/src/cephadm/cephadm b/ceph/src/cephadm/cephadm

index fef1ac1fa14bcb87214a0346b5433754334a44e0..92bb8d617823751002bfa69a66de8852c3708b7e 100755 (executable)
--- a/ceph/src/cephadm/cephadm
+++ b/ceph/src/cephadm/cephadm
@@ -61,6 +61,7 @@ DATA_DIR = '/var/lib/ceph'
  LOG_DIR = '/var/log/ceph'
  LOCK_DIR = '/run/cephadm'
  LOGROTATE_DIR = '/etc/logrotate.d'
+SYSCTL_DIR = '/usr/lib/sysctl.d'
  UNIT_DIR = '/etc/systemd/system'
  LOG_DIR_MODE = 0o770
  DATA_DIR_MODE = 0o700
@@ -109,6 +110,7 @@ class BaseConfig:
          self.data_dir: str = DATA_DIR
          self.log_dir: str = LOG_DIR
          self.logrotate_dir: str = LOGROTATE_DIR
+        self.sysctl_dir: str = SYSCTL_DIR
          self.unit_dir: str = UNIT_DIR
          self.verbose: bool = False
          self.timeout: Optional[int] = DEFAULT_TIMEOUT
@@ -210,11 +212,9 @@ logging_config = {
          },
          'log_file': {
              'level': 'DEBUG',
-            'class': 'logging.handlers.RotatingFileHandler',
+            'class': 'logging.handlers.WatchedFileHandler',
              'formatter': 'cephadm',
              'filename': '%s/cephadm.log' % LOG_DIR,
-            'maxBytes': 1024000,
-            'backupCount': 1,
          }
      },
      'loggers': {
@@ -249,6 +249,18 @@ class Ceph(object):
  ##################################
  
  
+class OSD(object):
+    @staticmethod
+    def get_sysctl_settings() -> List[str]:
+        return [
+            '# allow a large number of OSDs',
+            'fs.aio-max-nr = 1048576',
+            'kernel.pid_max = 4194304',
+        ]
+
+##################################
+
+
  class Monitoring(object):
      """Define the configs for the monitoring containers"""
  
@@ -267,7 +279,6 @@ class Monitoring(object):
              'args': [
                  '--config.file=/etc/prometheus/prometheus.yml',
                  '--storage.tsdb.path=/prometheus',
-                '--web.listen-address=:{}'.format(port_map['prometheus'][0]),
              ],
              'config-json-files': [
                  'prometheus.yml',
@@ -298,7 +309,6 @@ class Monitoring(object):
              'cpus': '2',
              'memory': '2GB',
              'args': [
-                '--web.listen-address=:{}'.format(port_map['alertmanager'][0]),
                  '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
              ],
              'config-json-files': [
@@ -349,7 +359,7 @@ def populate_files(config_dir, config_files, uid, gid):
          config_file = os.path.join(config_dir, fname)
          config_content = dict_get_join(config_files, fname)
          logger.info('Write file: %s' % (config_file))
-        with open(config_file, 'w') as f:
+        with open(config_file, 'w', encoding='utf-8') as f:
              os.fchown(f.fileno(), uid, gid)
              os.fchmod(f.fileno(), 0o600)
              f.write(config_content)
@@ -491,36 +501,6 @@ class NFSGanesha(object):
                  os.fchown(f.fileno(), uid, gid)
                  f.write(self.rgw.get('keyring', ''))
  
-    def get_rados_grace_container(self, action):
-        # type: (str) -> CephContainer
-        """Container for a ganesha action on the grace db"""
-        entrypoint = '/usr/bin/ganesha-rados-grace'
-
-        assert self.pool
-        args = ['--pool', self.pool]
-        if self.namespace:
-            args += ['--ns', self.namespace]
-        if self.userid:
-            args += ['--userid', self.userid]
-        args += [action, self.get_daemon_name()]
-
-        data_dir = get_data_dir(self.fsid, self.ctx.data_dir,
-                                self.daemon_type, self.daemon_id)
-        volume_mounts = self.get_container_mounts(data_dir)
-        envs = self.get_container_envs()
-
-        logger.info('Creating RADOS grace for action: %s' % action)
-        c = CephContainer(
-            self.ctx,
-            image=self.image,
-            entrypoint=entrypoint,
-            args=args,
-            volume_mounts=volume_mounts,
-            cname=self.get_container_name(desc='grace-%s' % action),
-            envs=envs
-        )
-        return c
-
  ##################################
  
  
@@ -732,6 +712,13 @@ class HAproxy(object):
          mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
          return mounts
  
+    @staticmethod
+    def get_sysctl_settings() -> List[str]:
+        return [
+            '# IP forwarding',
+            'net.ipv4.ip_forward = 1',
+        ]
+
  ##################################
  
  
@@ -811,12 +798,12 @@ class Keepalived(object):
          return envs
  
      @staticmethod
-    def get_prestart():
-        return (
-            '# keepalived needs IP forwarding and non-local bind\n'
-            'sysctl net.ipv4.ip_forward=1\n'
-            'sysctl net.ipv4.ip_nonlocal_bind=1\n'
-        )
+    def get_sysctl_settings() -> List[str]:
+        return [
+            '# IP forwarding and non-local bind',
+            'net.ipv4.ip_forward = 1',
+            'net.ipv4.ip_nonlocal_bind = 1',
+        ]
  
      def extract_uid_gid_keepalived(self):
          # better directory for this?
@@ -1020,13 +1007,15 @@ def attempt_bind(ctx, s, address, port):
      try:
          s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
          s.bind((address, port))
-    except (socket.error, OSError) as e:  # py2 and py3
+    except OSError as e:
          if e.errno == errno.EADDRINUSE:
              msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
              logger.warning(msg)
              raise PortOccupiedError(msg)
          else:
-            raise e
+            raise Error(e)
+    except Exception as e:
+        raise Error(e)
      finally:
          s.close()
  
@@ -1190,7 +1179,7 @@ class FileLock(object):
                          lock_id, lock_filename, poll_intervall
                      )
                      time.sleep(poll_intervall)
-        except:  # noqa
+        except Exception:
              # Something did go wrong, so decrement the counter.
              self._lock_counter = max(0, self._lock_counter - 1)
  
@@ -1484,6 +1473,16 @@ def call_timeout(ctx, command, timeout):
  ##################################
  
  
+def json_loads_retry(cli_func):
+    for sleep_secs in [1, 4, 4]:
+        try:
+            return json.loads(cli_func())
+        except json.JSONDecodeError:
+            logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
+            time.sleep(sleep_secs)
+    return json.loads(cli_func())
+
+
  def is_available(ctx, what, func):
      # type: (CephadmContext, str, Callable[[], bool]) -> None
      """
@@ -2061,10 +2060,11 @@ def check_units(ctx, units, enabler=None):
  
  
  def is_container_running(ctx: CephadmContext, name: str) -> bool:
-    out, err, ret = call_throws(ctx, [
-        ctx.container_engine.path, 'ps',
-        '--format', '{{.Names}}'])
-    return name in out
+    out, err, ret = call(ctx, [
+        ctx.container_engine.path, 'container', 'inspect',
+        '--format', '{{.State.Status}}', name
+    ])
+    return out == 'running'
  
  
  def get_legacy_config_fsid(cluster, legacy_dir=None):
@@ -2121,6 +2121,17 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
      elif daemon_type in Monitoring.components:
          metadata = Monitoring.components[daemon_type]
          r += metadata.get('args', list())
+        # set ip and port to bind to for nodeexporter,alertmanager,prometheus
+        if daemon_type != 'grafana':
+            ip = ''
+            port = Monitoring.port_map[daemon_type][0]
+            if 'meta_json' in ctx and ctx.meta_json:
+                meta = json.loads(ctx.meta_json) or {}
+                if 'ip' in meta and meta['ip']:
+                    ip = meta['ip']
+                if 'ports' in meta and meta['ports']:
+                    port = meta['ports'][0]
+            r += [f'--web.listen-address={ip}:{port}']
          if daemon_type == 'alertmanager':
              config = get_parm(ctx.config_json)
              peers = config.get('peers', list())  # type: ignore
@@ -2163,7 +2174,6 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
  
      if daemon_type in Monitoring.components.keys():
          config_json: Dict[str, Any] = get_parm(ctx.config_json)
-        required_files = Monitoring.components[daemon_type].get('config-json-files', list())
  
          # Set up directories specific to the monitoring component
          config_dir = ''
@@ -2192,10 +2202,14 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
              makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
  
          # populate the config directory for the component from the config-json
-        for fname in required_files:
-            if 'files' in config_json:  # type: ignore
+        if 'files' in config_json:
+            for fname in config_json['files']:
                  content = dict_get_join(config_json['files'], fname)
-                with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
+                if os.path.isabs(fname):
+                    fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
+                else:
+                    fpath = os.path.join(data_dir_root, config_dir, fname)
+                with open(fpath, 'w', encoding='utf-8') as f:
                      os.fchown(f.fileno(), uid, gid)
                      os.fchmod(f.fileno(), 0o600)
                      f.write(content)
@@ -2267,8 +2281,8 @@ def get_config_and_keyring(ctx):
          try:
              with open(ctx.config, 'r') as f:
                  config = f.read()
-        except FileNotFoundError:
-            raise Error('config file: %s does not exist' % ctx.config)
+        except FileNotFoundError as e:
+            raise Error(e)
  
      if 'key' in ctx and ctx.key:
          keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
@@ -2276,8 +2290,8 @@ def get_config_and_keyring(ctx):
          try:
              with open(ctx.keyring, 'r') as f:
                  keyring = f.read()
-        except FileNotFoundError:
-            raise Error('keyring file: %s does not exist' % ctx.keyring)
+        except FileNotFoundError as e:
+            raise Error(e)
  
      return config, keyring
  
@@ -2327,19 +2341,20 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
              # these do not search for their keyrings in a data directory
              mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
  
-    if daemon_type in ['mon', 'osd']:
+    if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
          mounts['/dev'] = '/dev'  # FIXME: narrow this down?
          mounts['/run/udev'] = '/run/udev'
-    if daemon_type == 'osd':
+    if daemon_type in ['osd', 'clusterless-ceph-volume']:
          mounts['/sys'] = '/sys'  # for numa.cc, pick_address, cgroups, ...
+        mounts['/run/lvm'] = '/run/lvm'
+        mounts['/run/lock/lvm'] = '/run/lock/lvm'
+    if daemon_type == 'osd':
          # selinux-policy in the container may not match the host.
          if HostFacts(ctx).selinux_enabled:
              selinux_folder = '/var/lib/ceph/%s/selinux' % fsid
              if not os.path.exists(selinux_folder):
                  os.makedirs(selinux_folder, mode=0o755)
              mounts[selinux_folder] = '/sys/fs/selinux:ro'
-        mounts['/run/lvm'] = '/run/lvm'
-        mounts['/run/lock/lvm'] = '/run/lock/lvm'
  
      try:
          if ctx.shared_ceph_folder:  # make easy manager modules/ceph-volume development
@@ -2413,7 +2428,9 @@ def get_container(ctx: CephadmContext,
      entrypoint: str = ''
      name: str = ''
      ceph_args: List[str] = []
-    envs: List[str] = []
+    envs: List[str] = [
+        'TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728',
+    ]
      host_network: bool = True
  
      if container_args is None:
@@ -2540,7 +2557,14 @@ def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
  
      ports = ports or []
      if any([port_in_use(ctx, port) for port in ports]):
-        raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type))
+        if daemon_type == 'mgr':
+            # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
+            # tell whether that is the case here.
+            logger.warning(
+                f"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
+            )
+        else:
+            raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type))
  
      data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
      if reconfig and not os.path.exists(data_dir):
@@ -2674,7 +2698,7 @@ def deploy_daemon_units(
      # cmd
      data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
      with open(data_dir + '/unit.run.new', 'w') as f, \
-         open(data_dir + '/unit.meta.new', 'w') as metaf:
+            open(data_dir + '/unit.meta.new', 'w') as metaf:
          f.write('set -e\n')
  
          if daemon_type in Ceph.daemons:
@@ -2710,18 +2734,11 @@ def deploy_daemon_units(
                      memory_limit=ctx.memory_limit,
                  )
                  _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
-        elif daemon_type == NFSGanesha.daemon_type:
-            # add nfs to the rados grace db
-            nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
-            prestart = nfs_ganesha.get_rados_grace_container('add')
-            _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace')
          elif daemon_type == CephIscsi.daemon_type:
              f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
              ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
              tcmu_container = ceph_iscsi.get_tcmu_runner_container()
              _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
-        elif daemon_type == Keepalived.daemon_type:
-            f.write(Keepalived.get_prestart())
  
          _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
  
@@ -2763,11 +2780,6 @@ def deploy_daemon_units(
                                                      daemon_id),
              )
              _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
-        elif daemon_type == NFSGanesha.daemon_type:
-            # remove nfs from the rados grace db
-            nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
-            poststop = nfs_ganesha.get_rados_grace_container('remove')
-            _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace')
          elif daemon_type == CephIscsi.daemon_type:
              # make sure we also stop the tcmu container
              ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
@@ -2785,6 +2797,9 @@ def deploy_daemon_units(
              os.rename(data_dir + '/unit.image.new',
                        data_dir + '/unit.image')
  
+    # sysctl
+    install_sysctl(ctx, fsid, daemon_type)
+
      # systemd
      install_base_units(ctx, fsid)
      unit = get_unit_file(ctx, fsid)
@@ -2915,16 +2930,38 @@ class Firewalld(object):
  def update_firewalld(ctx, daemon_type):
      # type: (CephadmContext, str) -> None
      firewall = Firewalld(ctx)
-
      firewall.enable_service_for(daemon_type)
+    firewall.apply_rules()
  
-    fw_ports = []
  
-    if daemon_type in Monitoring.port_map.keys():
-        fw_ports.extend(Monitoring.port_map[daemon_type])  # prometheus etc
+def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
+    """
+    Set up sysctl settings
+    """
+    def _write(conf: Path, lines: List[str]) -> None:
+        lines = [
+            '# created by cephadm',
+            '',
+            *lines,
+            '',
+        ]
+        with open(conf, 'w') as f:
+            f.write('\n'.join(lines))
  
-    firewall.open_ports(fw_ports)
-    firewall.apply_rules()
+    conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
+    lines: Optional[List] = None
+
+    if daemon_type == 'osd':
+        lines = OSD.get_sysctl_settings()
+    elif daemon_type == 'haproxy':
+        lines = HAproxy.get_sysctl_settings()
+    elif daemon_type == 'keepalived':
+        lines = Keepalived.get_sysctl_settings()
+
+    # apply the sysctl settings
+    if lines:
+        _write(conf, lines)
+        call_throws(ctx, ['sysctl', '--system'])
  
  
  def install_base_units(ctx, fsid):
@@ -3094,13 +3131,13 @@ class CephContainer:
              'run',
              '--rm',
              '--ipc=host',
+            # some containers (ahem, haproxy) override this, but we want a fast
+            # shutdown always (and, more importantly, a successful exit even if we
+            # fall back to SIGKILL).
+            '--stop-signal=SIGTERM',
          ]
  
          if isinstance(self.ctx.container_engine, Podman):
-            # podman adds the container *name* to /etc/hosts (for 127.0.1.1)
-            # by default, which makes python's socket.getfqdn() return that
-            # instead of a valid hostname.
-            cmd_args.append('--no-hosts')
              if os.path.exists('/etc/ceph/podman-auth.json'):
                  cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
  
@@ -3167,6 +3204,8 @@ class CephContainer:
  
          if self.host_network:
              cmd_args.append('--net=host')
+        if self.ctx.no_hosts:
+            cmd_args.append('--no-hosts')
          if self.privileged:
              cmd_args.extend([
                  '--privileged',
@@ -3306,7 +3345,7 @@ def normalize_image_digest(digest):
      #   quay.ceph.io/ceph/ceph -> ceph
      #   docker.io/ubuntu -> no change
      bits = digest.split('/')
-    if '.' not in bits[0] or len(bits) < 3:
+    if '.' not in bits[0] and len(bits) < 3:
          digest = DEFAULT_REGISTRY + '/' + digest
      return digest
  
@@ -3438,19 +3477,21 @@ def prepare_mon_addresses(
      if not ctx.skip_mon_network:
          # make sure IP is configured locally, and then figure out the
          # CIDR network
+        errmsg = f'Cannot infer CIDR network for mon IP `{base_ip}`'
          for net, ifaces in list_networks(ctx).items():
              ips: List[str] = []
              for iface, ls in ifaces.items():
                  ips.extend(ls)
-            if ipaddress.ip_address(unwrap_ipv6(base_ip)) in \
-                    [ipaddress.ip_address(ip) for ip in ips]:
-                mon_network = net
-                logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
-                                                                 mon_network))
-                break
+            try:
+                if ipaddress.ip_address(unwrap_ipv6(base_ip)) in \
+                        [ipaddress.ip_address(ip) for ip in ips]:
+                    mon_network = net
+                    logger.info(f'Mon IP `{base_ip}` is in CIDR network `{mon_network}`')
+                    break
+            except ValueError as e:
+                logger.warning(f'{errmsg}: {e}')
          if not mon_network:
-            raise Error('Failed to infer CIDR network for mon ip %s; pass '
-                        '--skip-mon-network to configure it later' % base_ip)
+            raise Error(f'{errmsg}: pass --skip-mon-network to configure it later')
  
      return (addr_arg, ipv6, mon_network)
  
@@ -3832,6 +3873,7 @@ def prepare_bootstrap_config(
      cp.set('global', 'fsid', fsid)
      cp.set('global', 'mon_host', mon_addr)
      cp.set('global', 'container_image', image)
+
      if not cp.has_section('mon'):
          cp.add_section('mon')
      if (
@@ -3839,6 +3881,30 @@ def prepare_bootstrap_config(
              and not cp.has_option('mon', 'auth allow insecure global id reclaim')
      ):
          cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
+
+    if ctx.single_host_defaults:
+        logger.info('Adjusting default settings to suit single-host cluster...')
+        # replicate across osds, not hosts
+        if (
+                not cp.has_option('global', 'osd_crush_choose_leaf_type')
+                and not cp.has_option('global', 'osd crush choose leaf type')
+        ):
+            cp.set('global', 'osd_crush_choose_leaf_type', '0')
+        # replica 2x
+        if (
+                not cp.has_option('global', 'osd_pool_default_size')
+                and not cp.has_option('global', 'osd pool default size')
+        ):
+            cp.set('global', 'osd_pool_default_size', '2')
+        # disable mgr standby modules (so we can colocate multiple mgrs on one host)
+        if not cp.has_section('mgr'):
+            cp.add_section('mgr')
+        if (
+                not cp.has_option('mgr', 'mgr_standby_modules')
+                and not cp.has_option('mgr', 'mgr standby modules')
+        ):
+            cp.set('mgr', 'mgr_standby_modules', 'false')
+
      cpf = StringIO()
      cp.write(cpf)
      config = cpf.getvalue()
@@ -3931,6 +3997,8 @@ def command_bootstrap(ctx):
              except PermissionError:
                  raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
  
+    (user_conf, _) = get_config_and_keyring(ctx)
+
      if not ctx.skip_prepare_host:
          command_prepare_host(ctx)
      else:
@@ -3938,12 +4006,15 @@ def command_bootstrap(ctx):
  
      # initial vars
      fsid = ctx.fsid or make_fsid()
+    if not is_fsid(fsid):
+        raise Error('not an fsid: %s' % fsid)
+    logger.info('Cluster fsid: %s' % fsid)
+
      hostname = get_hostname()
      if '.' in hostname and not ctx.allow_fqdn_hostname:
          raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
      mon_id = ctx.mon_id or hostname
      mgr_id = ctx.mgr_id or generate_service_id()
-    logger.info('Cluster fsid: %s' % fsid)
  
      lock = FileLock(ctx, fsid)
      lock.acquire()
@@ -3958,14 +4029,15 @@ def command_bootstrap(ctx):
  
      image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
      logger.info(f'Ceph version: {image_ver}')
-    image_release = image_ver.split()[4]
-    if (
-        not ctx.allow_mismatched_release
-        and image_release not in [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]
-    ):
-        raise Error(
-            f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE}; please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
-        )
+
+    if not ctx.allow_mismatched_release:
+        image_release = image_ver.split()[4]
+        if image_release not in \
+                [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]:
+            raise Error(
+                f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
+                ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
+            )
  
      logger.info('Extracting ceph user uid/gid from container image...')
      (uid, gid) = extract_uid_gid(ctx)
@@ -4025,14 +4097,18 @@ def command_bootstrap(ctx):
      # create mgr
      create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
  
-    def json_loads_retry(cli_func):
-        for sleep_secs in [1, 4, 4]:
-            try:
-                return json.loads(cli_func())
-            except json.JSONDecodeError:
-                logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
-                time.sleep(sleep_secs)
-        return json.loads(cli_func())
+    if user_conf:
+        # user given config settings were already assimilated earlier
+        # but if the given settings contained any attributes in
+        # the mgr (e.g. mgr/cephadm/container_image_prometheus)
+        # they don't seem to be stored if there isn't a mgr yet.
+        # Since re-assimilating the same conf settings should be
+        # idempotent we can just do it again here.
+        with tempfile.NamedTemporaryFile(buffering=0) as tmp:
+            tmp.write(user_conf.encode('utf-8'))
+            cli(['config', 'assimilate-conf',
+                 '-i', '/var/lib/ceph/user.conf'],
+                {tmp.name: '/var/lib/ceph/user.conf:z'})
  
      # wait for mgr to restart (after enabling a module)
      def wait_for_mgr_restart():
@@ -4096,6 +4172,14 @@ def command_bootstrap(ctx):
      if not ctx.skip_dashboard:
          prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
  
+    if ctx.output_config == '/etc/ceph/ceph.conf' and not ctx.skip_admin_label:
+        logger.info('Enabling client.admin keyring and conf on hosts with "admin" label')
+        try:
+            cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
+            cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
+        except Exception:
+            logger.info('Unable to set up "admin" label; assuming older version of Ceph')
+
      if ctx.apply_spec:
          logger.info('Applying %s to cluster' % ctx.apply_spec)
  
@@ -4244,9 +4328,6 @@ def command_deploy(ctx):
      elif daemon_type in Monitoring.components:
          # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
          # Default Checks
-        if not ctx.reconfig and not redeploy:
-            daemon_ports.extend(Monitoring.port_map[daemon_type])
-
          # make sure provided config-json is sufficient
          config = get_parm(ctx.config_json)  # type: ignore
          required_files = Monitoring.components[daemon_type].get('config-json-files', list())
@@ -4267,8 +4348,8 @@ def command_deploy(ctx):
                        ports=daemon_ports)
  
      elif daemon_type == NFSGanesha.daemon_type:
-        if not ctx.reconfig and not redeploy:
-            daemon_ports.extend(NFSGanesha.port_map.values())
+        if not ctx.reconfig and not redeploy and not daemon_ports:
+            daemon_ports = list(NFSGanesha.port_map.values())
  
          config, keyring = get_config_and_keyring(ctx)
          # TODO: extract ganesha uid/gid (997, 994) ?
@@ -4427,6 +4508,10 @@ def command_shell(ctx):
                                              os.path.join(home, f))
              mounts[home] = '/root'
  
+    for i in ctx.volume:
+        a, b = i.split(':', 1)
+        mounts[a] = b
+
      c = CephContainer(
          ctx,
          image=ctx.image,
@@ -4510,8 +4595,8 @@ def command_ceph_volume(ctx):
          privileged=True,
          volume_mounts=mounts,
      )
-    verbosity = CallVerbosity.VERBOSE if ctx.log_output else CallVerbosity.VERBOSE_ON_FAILURE
-    out, err, code = call_throws(ctx, c.run_cmd(), verbosity=verbosity)
+
+    out, err, code = call_throws(ctx, c.run_cmd())
      if not code:
          print(out)
  
@@ -5342,6 +5427,68 @@ def command_rm_daemon(ctx):
  ##################################
  
  
+def _zap(ctx, what):
+    mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
+    c = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='/usr/sbin/ceph-volume',
+        envs=ctx.env,
+        args=['lvm', 'zap', '--destroy', what],
+        privileged=True,
+        volume_mounts=mounts,
+    )
+    logger.info(f'Zapping {what}...')
+    out, err, code = call_throws(ctx, c.run_cmd())
+
+
+@infer_image
+def _zap_osds(ctx):
+    # assume fsid lock already held
+
+    # list
+    mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
+    c = CephContainer(
+        ctx,
+        image=ctx.image,
+        entrypoint='/usr/sbin/ceph-volume',
+        envs=ctx.env,
+        args=['inventory', '--format', 'json'],
+        privileged=True,
+        volume_mounts=mounts,
+    )
+    out, err, code = call_throws(ctx, c.run_cmd())
+    if code:
+        raise Error('failed to list osd inventory')
+    try:
+        ls = json.loads(out)
+    except ValueError as e:
+        raise Error(f'Invalid JSON in ceph-volume inventory: {e}')
+
+    for i in ls:
+        matches = [lv.get('cluster_fsid') == ctx.fsid for lv in i.get('lvs', [])]
+        if any(matches) and all(matches):
+            _zap(ctx, i.get('path'))
+        elif any(matches):
+            lv_names = [lv['name'] for lv in i.get('lvs', [])]
+            # TODO: we need to map the lv_names back to device paths (the vg
+            # id isn't part of the output here!)
+            logger.warning(f'Not zapping LVs (not implemented): {lv_names}')
+
+
+def command_zap_osds(ctx):
+    if not ctx.force:
+        raise Error('must pass --force to proceed: '
+                    'this command may destroy precious data!')
+
+    lock = FileLock(ctx, ctx.fsid)
+    lock.acquire()
+
+    _zap_osds(ctx)
+
+##################################
+
+
  def command_rm_cluster(ctx):
      # type: (CephadmContext) -> None
      if not ctx.force:
@@ -5378,11 +5525,15 @@ def command_rm_cluster(ctx):
      call(ctx, ['systemctl', 'stop', slice_name],
           verbosity=CallVerbosity.DEBUG)
  
+    # osds?
+    if ctx.zap_osds:
+        _zap_osds(ctx)
+
      # rm units
-    call_throws(ctx, ['rm', '-f', ctx.unit_dir +  # noqa: W504
-                      '/ceph-%s@.service' % ctx.fsid])
-    call_throws(ctx, ['rm', '-f', ctx.unit_dir +  # noqa: W504
-                      '/ceph-%s.target' % ctx.fsid])
+    call_throws(ctx, ['rm', '-f', ctx.unit_dir
+                      + '/ceph-%s@.service' % ctx.fsid])
+    call_throws(ctx, ['rm', '-f', ctx.unit_dir
+                      + '/ceph-%s.target' % ctx.fsid])
      call_throws(ctx, ['rm', '-rf',
                        ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
      # rm data
@@ -5391,12 +5542,21 @@ def command_rm_cluster(ctx):
      if not ctx.keep_logs:
          # rm logs
          call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
-        call_throws(ctx, ['rm', '-rf', ctx.log_dir +  # noqa: W504
-                          '/*.wants/ceph-%s@*' % ctx.fsid])
+        call_throws(ctx, ['rm', '-rf', ctx.log_dir
+                          + '/*.wants/ceph-%s@*' % ctx.fsid])
  
      # rm logrotate config
      call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
  
+    # rm cephadm logrotate config if last cluster on host
+    if not os.listdir(ctx.data_dir):
+        call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
+
+    # rm sysctl settings
+    sysctl_dir = Path(ctx.sysctl_dir)
+    for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):
+        p.unlink()
+
      # clean up config, keyring, and pub key files
      files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
  
@@ -5410,6 +5570,7 @@ def command_rm_cluster(ctx):
                  if os.path.exists(files[n]):
                      os.remove(files[n])
  
+
  ##################################
  
  
@@ -5608,9 +5769,9 @@ class Packager(object):
          if self.ctx.gpg_url:
              return self.ctx.gpg_url
          if self.stable or self.version:
-            return 'https://download.ceph.com/keys/release.asc', 'release'
+            return 'https://download.ceph.com/keys/release.gpg', 'release'
          else:
-            return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
+            return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
  
      def enable_service(self, service):
          """
@@ -5648,8 +5809,8 @@ class Apt(Packager):
              logger.error('failed to fetch GPG repo key from %s: %s' % (
                  url, err))
              raise Error('failed to fetch GPG key')
-        key = response.read().decode('utf-8')
-        with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
+        key = response.read()
+        with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f:
              f.write(key)
  
          if self.version:
@@ -5666,6 +5827,8 @@ class Apt(Packager):
          with open(self.repo_path(), 'w') as f:
              f.write(content)
  
+        self.update()
+
      def rm_repo(self):
          for name in ['autobuild', 'release']:
              p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
@@ -5683,11 +5846,15 @@ class Apt(Packager):
          logger.info('Installing packages %s...' % ls)
          call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)
  
+    def update(self):
+        logger.info('Updating package list...')
+        call_throws(self.ctx, ['apt-get', 'update'])
+
      def install_podman(self):
          if self.distro == 'ubuntu':
              logger.info('Setting up repo for podman...')
              self.add_kubic_repo()
-            call_throws(self.ctx, ['apt-get', 'update'])
+            self.update()
  
          logger.info('Attempting podman install...')
          try:
@@ -5745,6 +5912,7 @@ class YumDnf(Packager):
          'centos': ('centos', 'el'),
          'rhel': ('centos', 'el'),
          'scientific': ('centos', 'el'),
+        'rocky': ('centos', 'el'),
          'fedora': ('fedora', 'fc'),
      }
  
@@ -5832,6 +6000,13 @@ class YumDnf(Packager):
                                       self.distro_code)
  
      def add_repo(self):
+        if self.distro_code.startswith('fc'):
+            raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
+        if self.distro_code == 'el7':
+            if self.stable and self.stable >= 'pacific':
+                raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
+            if self.version and self.version.split('.')[0] >= '16':
+                raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
          if self.stable or self.version:
              content = ''
              for n, t in {
@@ -5998,12 +6173,16 @@ def command_add_repo(ctx: CephadmContext):
              (x, y, z) = ctx.version.split('.')
          except Exception:
              raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
+    if ctx.release:
+        # Pacific =/= pacific in this case, set to undercase to avoid confision
+        ctx.release = ctx.release.lower()
  
      pkg = create_packager(ctx, stable=ctx.release,
                            version=ctx.version,
                            branch=ctx.dev,
                            commit=ctx.dev_commit)
      pkg.add_repo()
+    logger.info('Completed adding repo.')
  
  
  def command_rm_repo(ctx: CephadmContext):
@@ -6501,6 +6680,8 @@ class HostFacts():
                      security['description'] = 'AppArmor: Enabled'
                      try:
                          profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
+                        if len(profiles) == 0:
+                            return {}
                      except OSError:
                          pass
                      else:
@@ -6580,16 +6761,6 @@ def command_gather_facts(ctx: CephadmContext):
      host = HostFacts(ctx)
      print(host.dump())
  
-##################################
-
-
-def command_verify_prereqs(ctx: CephadmContext):
-    if ctx.service_type == 'haproxy' or ctx.service_type == 'keepalived':
-        out, err, code = call(
-            ctx, ['sysctl', '-n', 'net.ipv4.ip_nonlocal_bind']
-        )
-        if out.strip() != '1':
-            raise Error('net.ipv4.ip_nonlocal_bind not set to 1')
  
  ##################################
  
@@ -6671,12 +6842,14 @@ class CephadmDaemonHandler(BaseHTTPRequestHandler):
              The token is installed at deployment time and must be provided to
              ensure we only respond to callers who know our token i.e. mgr
              """
+
              def wrapper(self, *args, **kwargs):
                  auth = self.headers.get('Authorization', None)
                  if auth != 'Bearer ' + self.server.token:
                      self.send_error(401)
                      return
                  f(self, *args, **kwargs)
+
              return wrapper
  
      def _help_page(self):
@@ -6950,7 +7123,6 @@ class CephadmDaemon():
          # expects to use
          self.ctx.command = 'inventory --format=json'.split()
          self.ctx.fsid = self.fsid
-        self.ctx.log_output = False
  
          ctr = 0
          exception_encountered = False
@@ -7352,6 +7524,10 @@ def _get_parser():
          '--logrotate-dir',
          default=LOGROTATE_DIR,
          help='location of logrotate configuration files')
+    parser.add_argument(
+        '--sysctl-dir',
+        default=SYSCTL_DIR,
+        help='location of sysctl configuration files')
      parser.add_argument(
          '--unit-dir',
          default=UNIT_DIR,
@@ -7487,6 +7663,10 @@ def _get_parser():
          '--keep-logs',
          action='store_true',
          help='do not remove log files')
+    parser_rm_cluster.add_argument(
+        '--zap-osds',
+        action='store_true',
+        help='zap OSD devices for this cluster')
  
      parser_run = subparsers.add_parser(
          'run', help='run a ceph daemon, in a container, in the foreground')
@@ -7527,9 +7707,18 @@ def _get_parser():
          action='append',
          default=[],
          help='set environment variable')
+    parser_shell.add_argument(
+        '--volume', '-v',
+        action='append',
+        default=[],
+        help='set environment variable')
      parser_shell.add_argument(
          'command', nargs=argparse.REMAINDER,
          help='command (optional)')
+    parser_shell.add_argument(
+        '--no-hosts',
+        action='store_true',
+        help='dont pass /etc/hosts through to the container')
  
      parser_enter = subparsers.add_parser(
          'enter', help='run an interactive shell inside a running daemon container')
@@ -7560,15 +7749,22 @@ def _get_parser():
      parser_ceph_volume.add_argument(
          '--keyring', '-k',
          help='ceph.keyring to pass through to the container')
-    parser_ceph_volume.add_argument(
-        '--log-output',
-        action='store_true',
-        default=True,
-        help='suppress ceph volume output from the log')
      parser_ceph_volume.add_argument(
          'command', nargs=argparse.REMAINDER,
          help='command')
  
+    parser_zap_osds = subparsers.add_parser(
+        'zap-osds', help='zap all OSDs associated with a particular fsid')
+    parser_zap_osds.set_defaults(func=command_zap_osds)
+    parser_zap_osds.add_argument(
+        '--fsid',
+        required=True,
+        help='cluster FSID')
+    parser_zap_osds.add_argument(
+        '--force',
+        action='store_true',
+        help='proceed, even though this may destroy valuable data')
+
      parser_unit = subparsers.add_parser(
          'unit', help="operate on the daemon's systemd unit")
      parser_unit.set_defaults(func=command_unit)
@@ -7633,6 +7829,10 @@ def _get_parser():
      parser_bootstrap.add_argument(
          '--output-pub-ssh-key',
          help="location to write the cluster's public SSH key")
+    parser_bootstrap.add_argument(
+        '--skip-admin-label',
+        action='store_true',
+        help='do not create admin label for ceph.conf and client.admin keyring distribution')
      parser_bootstrap.add_argument(
          '--skip-ssh',
          action='store_true',
@@ -7764,6 +7964,10 @@ def _get_parser():
      parser_bootstrap.add_argument(
          '--cluster-network',
          help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
+    parser_bootstrap.add_argument(
+        '--single-host-defaults',
+        action='store_true',
+        help='adjust configuration defaults to suit a single-host cluster')
  
      parser_deploy = subparsers.add_parser(
          'deploy', help='deploy a daemon')
@@ -7929,15 +8133,6 @@ def _get_parser():
          help='Maintenance action - enter maintenance, or exit maintenance')
      parser_maintenance.set_defaults(func=command_maintenance)
  
-    parser_verify_prereqs = subparsers.add_parser(
-        'verify-prereqs',
-        help='verify system prerequisites for a given service are met on this host')
-    parser_verify_prereqs.set_defaults(func=command_verify_prereqs)
-    parser_verify_prereqs.add_argument(
-        '--daemon-type',
-        required=True,
-        help='service type of service to whose prereqs will be checked')
-
      return parser
  
  
@@ -7962,18 +8157,15 @@ def _parse_args(av):
      return args
  
  
-def cephadm_init_ctx(args: List[str]) -> Optional[CephadmContext]:
-
+def cephadm_init_ctx(args: List[str]) -> CephadmContext:
      ctx = CephadmContext()
      ctx.set_args(_parse_args(args))
      return ctx
  
  
-def cephadm_init(args: List[str]) -> Optional[CephadmContext]:
-
+def cephadm_init(args: List[str]) -> CephadmContext:
      global logger
      ctx = cephadm_init_ctx(args)
-    assert ctx is not None
  
      # Logger configuration
      if not os.path.exists(LOG_DIR):
@@ -7981,15 +8173,23 @@ def cephadm_init(args: List[str]) -> Optional[CephadmContext]:
      dictConfig(logging_config)
      logger = logging.getLogger()
  
+    if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
+        with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
+            f.write("""# created by cephadm
+/var/log/ceph/cephadm.log {
+    rotate 7
+    daily
+    compress
+    missingok
+    notifempty
+}
+""")
+
      if ctx.verbose:
          for handler in logger.handlers:
              if handler.name == 'console':
                  handler.setLevel(logging.DEBUG)
  
-    if not ctx.has_function():
-        sys.stderr.write('No command specified; pass -h or --help for usage\n')
-        return None
-
      return ctx
  
  
@@ -8004,14 +8204,15 @@ def main():
      av = sys.argv[1:]
  
      ctx = cephadm_init(av)
-    if not ctx:  # error, exit
+    if not ctx.has_function():
+        sys.stderr.write('No command specified; pass -h or --help for usage\n')
          sys.exit(1)
  
      try:
          # podman or docker?
          ctx.container_engine = find_container_engine(ctx)
          if ctx.func not in \
-                [command_check_host, command_prepare_host, command_add_repo]:
+                [command_check_host, command_prepare_host, command_add_repo, command_install]:
              check_container_engine(ctx)
          # command handler
          r = ctx.func(ctx)