]> git.proxmox.com Git - ceph.git/commitdiff
update sources to v12.1.3
authorFabian Grünbichler <f.gruenbichler@proxmox.com>
Mon, 14 Aug 2017 06:44:16 +0000 (08:44 +0200)
committerFabian Grünbichler <f.gruenbichler@proxmox.com>
Mon, 14 Aug 2017 06:44:16 +0000 (08:44 +0200)
503 files changed:
ceph/CMakeLists.txt
ceph/PendingReleaseNotes
ceph/alpine/APKBUILD
ceph/ceph.spec
ceph/ceph.spec.in
ceph/cmake/modules/Distutils.cmake
ceph/debian/ceph-base.dirs
ceph/debian/ceph-osd.install
ceph/debian/changelog
ceph/debian/rules
ceph/doc/cephfs/client-auth.rst
ceph/doc/dev/index.rst
ceph/doc/dev/logging.rst [new file with mode: 0644]
ceph/doc/index.rst
ceph/doc/install/manual-deployment.rst
ceph/doc/install/manual-freebsd-deployment.rst
ceph/doc/man/8/CMakeLists.txt
ceph/doc/man/8/rados.rst
ceph/doc/man/8/radosgw-admin.rst
ceph/doc/man/8/rbd-ggate.rst [new file with mode: 0644]
ceph/doc/rados/configuration/bluestore-config-ref.rst [new file with mode: 0644]
ceph/doc/rados/configuration/filesystem-recommendations.rst [deleted file]
ceph/doc/rados/configuration/index.rst
ceph/doc/rados/configuration/osd-config-ref.rst
ceph/doc/rados/configuration/storage-devices.rst [new file with mode: 0644]
ceph/doc/rados/index.rst
ceph/doc/rados/operations/crush-map.rst
ceph/doc/rados/operations/health-checks.rst
ceph/doc/rados/operations/pools.rst
ceph/doc/rados/operations/user-management.rst
ceph/doc/radosgw/adminops.rst
ceph/doc/rbd/api/index.rst [new file with mode: 0644]
ceph/doc/rbd/api/librbdpy.rst [new file with mode: 0644]
ceph/doc/rbd/index.rst [new file with mode: 0644]
ceph/doc/rbd/librbdpy.rst [deleted file]
ceph/doc/rbd/man/index.rst [new file with mode: 0644]
ceph/doc/rbd/rbd-config-ref.rst
ceph/doc/rbd/rbd-mirroring.rst
ceph/doc/rbd/rbd.rst [deleted file]
ceph/doc/release-notes.rst
ceph/doc/start/quick-ceph-deploy.rst
ceph/doc/start/quick-rbd.rst
ceph/qa/cephfs/overrides/whitelist_health.yaml [new file with mode: 0644]
ceph/qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml
ceph/qa/clusters/fixed-2.yaml
ceph/qa/clusters/fixed-3.yaml
ceph/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml [new file with mode: 0644]
ceph/qa/standalone/crush/crush-classes.sh
ceph/qa/standalone/osd/osd-fast-mark-down.sh
ceph/qa/suites/big/rados-thrash/openstack.yaml [new file with mode: 0644]
ceph/qa/suites/ceph-disk/basic/tasks/ceph-disk.yaml
ceph/qa/suites/fs/32bits/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/basic_functional/overrides/whitelist_health.yaml [changed from file to symlink]
ceph/qa/suites/fs/basic_functional/tasks/auto-repair.yaml
ceph/qa/suites/fs/basic_functional/tasks/client-limits.yaml
ceph/qa/suites/fs/basic_functional/tasks/journal-repair.yaml
ceph/qa/suites/fs/basic_workload/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/multiclient/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/multifs/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/multifs/tasks/failover.yaml
ceph/qa/suites/fs/permission/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/snaps/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/thrash/overrides/whitelist_health.yaml [changed from file to symlink]
ceph/qa/suites/fs/traceless/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/fs/verify/overrides/whitelist_health.yaml [new symlink]
ceph/qa/suites/kcephfs/recovery/tasks/journal-repair.yaml
ceph/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml
ceph/qa/suites/powercycle/osd/thrashosds-health.yaml [new symlink]
ceph/qa/suites/powercycle/osd/whitelist_health.yaml
ceph/qa/suites/rados/basic/clusters/openstack.yaml
ceph/qa/suites/rados/basic/tasks/rados_python.yaml
ceph/qa/suites/rados/basic/tasks/rados_stress_watch.yaml
ceph/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml
ceph/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml
ceph/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml
ceph/qa/suites/rados/mgr/tasks/failover.yaml
ceph/qa/suites/rados/monthrash/ceph.yaml
ceph/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml
ceph/qa/suites/rados/monthrash/thrashers/many.yaml
ceph/qa/suites/rados/monthrash/thrashers/one.yaml
ceph/qa/suites/rados/monthrash/thrashers/sync-many.yaml
ceph/qa/suites/rados/monthrash/thrashers/sync.yaml
ceph/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml
ceph/qa/suites/rados/monthrash/workloads/rados_5925.yaml
ceph/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml
ceph/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml
ceph/qa/suites/rados/multimon/tasks/mon_recovery.yaml
ceph/qa/suites/rados/objectstore/ceph_objectstore_tool.yaml
ceph/qa/suites/rados/rest/mgr-restful.yaml
ceph/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml
ceph/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml
ceph/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml
ceph/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml
ceph/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml
ceph/qa/suites/rados/singleton-nomsgr/all/valgrind-leaks.yaml
ceph/qa/suites/rados/singleton/all/divergent_priors.yaml
ceph/qa/suites/rados/singleton/all/divergent_priors2.yaml
ceph/qa/suites/rados/singleton/all/dump-stuck.yaml
ceph/qa/suites/rados/singleton/all/ec-lost-unfound.yaml
ceph/qa/suites/rados/singleton/all/lost-unfound-delete.yaml
ceph/qa/suites/rados/singleton/all/lost-unfound.yaml
ceph/qa/suites/rados/singleton/all/osd-backfill.yaml
ceph/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml
ceph/qa/suites/rados/singleton/all/osd-recovery.yaml
ceph/qa/suites/rados/singleton/all/peer.yaml
ceph/qa/suites/rados/singleton/all/radostool.yaml
ceph/qa/suites/rados/singleton/all/rebuild-mondb.yaml
ceph/qa/suites/rados/singleton/all/reg11184.yaml
ceph/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml
ceph/qa/suites/rados/singleton/all/rest-api.yaml
ceph/qa/suites/rados/singleton/all/test_envlibrados_for_rocksdb.yaml
ceph/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml
ceph/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml
ceph/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml
ceph/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml [new symlink]
ceph/qa/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml [deleted file]
ceph/qa/suites/rados/thrash/clusters/openstack.yaml
ceph/qa/suites/rados/thrash/d-require-luminous/at-end.yaml
ceph/qa/suites/rados/thrash/workloads/cache-agent-big.yaml
ceph/qa/suites/rados/thrash/workloads/rados_api_tests.yaml
ceph/qa/suites/rados/upgrade/jewel-x-singleton/6-finish-upgrade.yaml
ceph/qa/suites/rados/verify/clusters/openstack.yaml
ceph/qa/suites/rados/verify/tasks/mon_recovery.yaml
ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml
ceph/qa/suites/rbd/basic/cachepool/small.yaml
ceph/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml
ceph/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml
ceph/qa/suites/rbd/cli/pool/ec-data-pool.yaml
ceph/qa/suites/rbd/cli/pool/small-cache-pool.yaml
ceph/qa/suites/rbd/librbd/clusters/openstack.yaml
ceph/qa/suites/rbd/librbd/pool/ec-data-pool.yaml
ceph/qa/suites/rbd/librbd/pool/small-cache-pool.yaml
ceph/qa/suites/rbd/librbd/workloads/c_api_tests.yaml
ceph/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml
ceph/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml
ceph/qa/suites/rbd/openstack/base/install.yaml
ceph/qa/suites/rbd/qemu/clusters/openstack.yaml
ceph/qa/suites/rbd/qemu/pool/ec-cache-pool.yaml
ceph/qa/suites/rbd/qemu/pool/small-cache-pool.yaml
ceph/qa/suites/rbd/singleton-bluestore/% [new file with mode: 0644]
ceph/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml [new file with mode: 0644]
ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp.yaml [new symlink]
ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml [new symlink]
ceph/qa/suites/rbd/singleton-bluestore/openstack.yaml [new file with mode: 0644]
ceph/qa/suites/rbd/singleton/all/rbd_mirror.yaml
ceph/qa/suites/rbd/thrash/clusters/openstack.yaml
ceph/qa/suites/rbd/thrash/thrashers/cache.yaml
ceph/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml
ceph/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml
ceph/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml
ceph/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml
ceph/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml
ceph/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml
ceph/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml
ceph/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml
ceph/qa/suites/smoke/basic/clusters/openstack.yaml
ceph/qa/suites/smoke/basic/tasks/mon_thrash.yaml
ceph/qa/suites/upgrade/jewel-x/parallel/0-cluster/start.yaml
ceph/qa/suites/upgrade/jewel-x/parallel/1-jewel-install/jewel.yaml
ceph/qa/suites/upgrade/jewel-x/point-to-point-x/point-to-point-upgrade.yaml
ceph/qa/suites/upgrade/kraken-x/parallel/1-kraken-install/kraken.yaml
ceph/qa/suites/upgrade/kraken-x/stress-split-erasure-code/thrashosds-health.yaml [new symlink]
ceph/qa/suites/upgrade/kraken-x/stress-split/0-cluster/start.yaml
ceph/qa/suites/upgrade/kraken-x/stress-split/thrashosds-health.yaml [new symlink]
ceph/qa/tasks/ceph.py
ceph/qa/tasks/ceph_test_case.py
ceph/qa/tasks/cephfs/test_exports.py
ceph/qa/tasks/cephfs/test_fragment.py
ceph/qa/tasks/cephfs/test_misc.py
ceph/qa/tasks/cephfs/test_volume_client.py
ceph/qa/tasks/s3tests.py
ceph/qa/tasks/thrashosds-health.yaml
ceph/qa/workunits/ceph-disk/ceph-disk.sh
ceph/qa/workunits/cephtool/test.sh
ceph/qa/workunits/mon/crush_ops.sh
ceph/qa/workunits/mon/rbd_snaps_ops.sh
ceph/qa/workunits/rbd/cli_generic.sh
ceph/qa/workunits/rbd/issue-20295.sh [new file with mode: 0755]
ceph/qa/workunits/rbd/rbd-ggate.sh [new file with mode: 0755]
ceph/qa/workunits/rbd/rbd_mirror.sh
ceph/qa/workunits/rbd/rbd_mirror_helpers.sh
ceph/qa/workunits/rbd/test_admin_socket.sh
ceph/run-make-check.sh
ceph/src/.git_version
ceph/src/CMakeLists.txt
ceph/src/ceph-create-keys
ceph/src/ceph-disk/ceph_disk/main.py
ceph/src/ceph-disk/tests/test_main.py
ceph/src/ceph-volume/CMakeLists.txt [new file with mode: 0644]
ceph/src/ceph-volume/MANIFEST.in [new file with mode: 0644]
ceph/src/ceph-volume/bin/ceph-volume [new file with mode: 0755]
ceph/src/ceph-volume/bin/ceph-volume-systemd [new file with mode: 0755]
ceph/src/ceph-volume/ceph_volume/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/configuration.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/decorators.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/devices/lvm/trigger.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/exceptions.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/log.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/main.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/process.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/systemd/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/systemd/main.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/terminal.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/conftest.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/devices/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_trigger.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/Vagrantfile [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/Vagrantfile [new symlink]
ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/hosts [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/vagrant_variables.yml [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/scripts/generate_ssh_config.sh [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/tox.ini [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/Vagrantfile [new symlink]
ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/hosts [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/vagrant_variables.yml [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/systemd/test_main.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/test_decorators.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/test_main.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/test_terminal.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/util/__init__.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/util/constants.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/util/prepare.py [new file with mode: 0644]
ceph/src/ceph-volume/ceph_volume/util/system.py [new file with mode: 0644]
ceph/src/ceph-volume/setup.py [new file with mode: 0644]
ceph/src/ceph-volume/tox.ini [new file with mode: 0644]
ceph/src/ceph.in
ceph/src/ceph_osd.cc
ceph/src/client/Client.cc
ceph/src/client/Client.h
ceph/src/cls/lua/cls_lua.cc
ceph/src/common/backport14.h
ceph/src/common/buffer.cc
ceph/src/common/cmdparse.cc
ceph/src/common/mClockPriorityQueue.h
ceph/src/common/options.cc
ceph/src/crush/CrushCompiler.cc
ceph/src/crush/CrushCompiler.h
ceph/src/crush/CrushTreeDumper.h
ceph/src/crush/CrushWrapper.cc
ceph/src/crush/CrushWrapper.h
ceph/src/dmclock/CMakeLists.txt
ceph/src/dmclock/cmake/modules/BuildGTest.cmake [new file with mode: 0644]
ceph/src/dmclock/sim/src/sim_client.h
ceph/src/dmclock/sim/src/sim_server.h
ceph/src/dmclock/sim/src/ssched/ssched_server.h
ceph/src/dmclock/sim/src/test_dmclock_main.cc
ceph/src/dmclock/sim/src/test_ssched_main.cc
ceph/src/dmclock/src/CMakeLists.txt
ceph/src/dmclock/src/dmclock_client.h
ceph/src/dmclock/src/dmclock_server.h
ceph/src/dmclock/src/dmclock_util.h
ceph/src/dmclock/test/CMakeLists.txt
ceph/src/dmclock/test/dmcPrCtl.h [new file with mode: 0644]
ceph/src/dmclock/test/dmtest-config.h.in [new file with mode: 0644]
ceph/src/dmclock/test/test_dmclock_server.cc
ceph/src/include/cephfs/libcephfs.h
ceph/src/include/fs_types.h
ceph/src/include/rbd/librbd.h
ceph/src/include/rbd/librbd.hpp
ceph/src/java/java/com/ceph/fs/CephMount.java
ceph/src/java/native/libcephfs_jni.cc
ceph/src/libcephfs.cc
ceph/src/librados/RadosClient.cc
ceph/src/libradosstriper/RadosStriperImpl.cc
ceph/src/librbd/CMakeLists.txt
ceph/src/librbd/Operations.cc
ceph/src/librbd/Utils.h
ceph/src/librbd/api/Group.cc [deleted file]
ceph/src/librbd/api/Group.h [deleted file]
ceph/src/librbd/image/CloneRequest.cc
ceph/src/librbd/librbd.cc
ceph/src/mds/CInode.cc
ceph/src/mds/FSMap.cc
ceph/src/mds/MDCache.cc
ceph/src/mds/MDCache.h
ceph/src/mds/MDLog.cc
ceph/src/mds/MDSDaemon.cc
ceph/src/mds/MDSDaemon.h
ceph/src/mds/MDSMap.cc
ceph/src/mds/MDSMap.h
ceph/src/mds/MDSRank.cc
ceph/src/mds/ScrubStack.cc
ceph/src/mds/Server.cc
ceph/src/mds/Server.h
ceph/src/mds/SessionMap.cc
ceph/src/mds/SessionMap.h
ceph/src/mds/StrayManager.cc
ceph/src/mds/StrayManager.h
ceph/src/messages/MMDSCacheRejoin.h
ceph/src/messages/MMDSTableRequest.h
ceph/src/messages/MMonGetVersion.h
ceph/src/messages/MMonGetVersionReply.h
ceph/src/messages/MMonHealth.h
ceph/src/messages/MMonPaxos.h
ceph/src/messages/MMonProbe.h
ceph/src/messages/MMonSync.h
ceph/src/messages/MOSDAlive.h
ceph/src/messages/MOSDFailure.h
ceph/src/messages/MOSDMap.h
ceph/src/messages/MOSDMarkMeDown.h
ceph/src/messages/MOSDOp.h
ceph/src/messages/MOSDOpReply.h
ceph/src/messages/MOSDPGBackfill.h
ceph/src/messages/MOSDPGCreate.h
ceph/src/messages/MOSDPGInfo.h
ceph/src/messages/MOSDPGLog.h
ceph/src/messages/MOSDPGNotify.h
ceph/src/messages/MOSDPGQuery.h
ceph/src/messages/MOSDPGRemove.h
ceph/src/messages/MOSDPGScan.h
ceph/src/messages/MOSDPGTemp.h
ceph/src/messages/MOSDPGTrim.h
ceph/src/messages/MOSDPing.h
ceph/src/messages/MOSDRepScrub.h
ceph/src/messages/MOSDScrub.h
ceph/src/messages/MOSDSubOp.h
ceph/src/messages/MOSDSubOpReply.h
ceph/src/messages/MPGStats.h
ceph/src/messages/MPoolOp.h
ceph/src/messages/MPoolOpReply.h
ceph/src/messages/MStatfs.h
ceph/src/messages/MTimeCheck.h
ceph/src/mgr/PyModules.cc
ceph/src/mgr/PyModules.h
ceph/src/mgr/PyState.cc
ceph/src/mon/AuthMonitor.cc
ceph/src/mon/Elector.cc
ceph/src/mon/MDSMonitor.cc
ceph/src/mon/MDSMonitor.h
ceph/src/mon/MgrMonitor.cc
ceph/src/mon/MgrMonitor.h
ceph/src/mon/MgrStatMonitor.cc
ceph/src/mon/MonCap.cc
ceph/src/mon/MonCommand.h
ceph/src/mon/MonCommands.h
ceph/src/mon/Monitor.cc
ceph/src/mon/Monitor.h
ceph/src/mon/OSDMonitor.cc
ceph/src/mon/PGMap.cc
ceph/src/mon/PGMap.h
ceph/src/mon/PGMonitorCommands.h [new file with mode: 0644]
ceph/src/mon/PGStatService.h
ceph/src/mon/health_check.h
ceph/src/msg/async/rdma/Infiniband.h
ceph/src/msg/async/rdma/RDMAStack.h
ceph/src/os/ObjectStore.h
ceph/src/os/bluestore/BitmapFreelistManager.cc
ceph/src/os/bluestore/BlueFS.cc
ceph/src/os/bluestore/BlueFS.h
ceph/src/os/bluestore/BlueRocksEnv.cc
ceph/src/os/bluestore/BlueStore.cc
ceph/src/os/bluestore/BlueStore.h
ceph/src/os/filestore/FileStore.cc
ceph/src/os/filestore/FileStore.h
ceph/src/os/filestore/GenericFileStoreBackend.cc
ceph/src/os/filestore/GenericFileStoreBackend.h
ceph/src/osd/OSD.cc
ceph/src/osd/OSD.h
ceph/src/osd/OSDMap.cc
ceph/src/osd/PG.cc
ceph/src/osd/PG.h
ceph/src/osd/PGLog.h
ceph/src/osdc/Objecter.cc
ceph/src/osdc/Objecter.h
ceph/src/pybind/mgr/dashboard/health.html
ceph/src/pybind/rados/rados.pyx
ceph/src/rbd_fuse/CMakeLists.txt
ceph/src/rbd_fuse/rbd-fuse.cc
ceph/src/rgw/CMakeLists.txt
ceph/src/rgw/rgw_admin.cc
ceph/src/rgw/rgw_auth.cc
ceph/src/rgw/rgw_auth_registry.h
ceph/src/rgw/rgw_auth_s3.h
ceph/src/rgw/rgw_bucket.cc
ceph/src/rgw/rgw_bucket.h
ceph/src/rgw/rgw_common.cc
ceph/src/rgw/rgw_common.h
ceph/src/rgw/rgw_crypt.cc
ceph/src/rgw/rgw_es_main.cc
ceph/src/rgw/rgw_iam_policy.cc
ceph/src/rgw/rgw_iam_policy.h
ceph/src/rgw/rgw_op.cc
ceph/src/rgw/rgw_op.h
ceph/src/rgw/rgw_rados.cc
ceph/src/rgw/rgw_rados.h
ceph/src/rgw/rgw_rest.cc
ceph/src/rgw/rgw_rest.h
ceph/src/rgw/rgw_rest_s3.cc
ceph/src/rgw/rgw_rest_s3.h
ceph/src/rgw/rgw_rest_swift.cc
ceph/src/rgw/rgw_rest_user.cc
ceph/src/rgw/rgw_string.cc [new file with mode: 0644]
ceph/src/rgw/rgw_string.h
ceph/src/rgw/rgw_swift_auth.cc
ceph/src/rgw/rgw_swift_auth.h
ceph/src/rgw/rgw_user.cc
ceph/src/rgw/rgw_user.h
ceph/src/test/CMakeLists.txt
ceph/src/test/cli-integration/rbd/formatted-output.t
ceph/src/test/cli/crushtool/arg-order-checks.t
ceph/src/test/cli/rbd/help.t
ceph/src/test/cls_rbd/test_cls_rbd.cc
ceph/src/test/common/CMakeLists.txt
ceph/src/test/common/test_backport14.cc [new file with mode: 0644]
ceph/src/test/crush/CrushWrapper.cc
ceph/src/test/encoding/readable.sh
ceph/src/test/journal/mock/MockJournaler.h
ceph/src/test/librados/aio.cc
ceph/src/test/librados_test_stub/TestMemIoCtxImpl.h
ceph/src/test/librbd/CMakeLists.txt
ceph/src/test/librbd/fsx.cc
ceph/src/test/librbd/test_Groups.cc [deleted file]
ceph/src/test/librbd/test_internal.cc
ceph/src/test/librbd/test_librbd.cc
ceph/src/test/librbd/test_main.cc
ceph/src/test/mon/moncap.cc
ceph/src/test/mon/test-mon-msg.cc
ceph/src/test/os/TestLFNIndex.cc
ceph/src/test/osd/TestPGLog.cc
ceph/src/test/pybind/test_rbd.py
ceph/src/test/rbd-ggate.sh [new file with mode: 0755]
ceph/src/test/rbd_mirror/CMakeLists.txt
ceph/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
ceph/src/test/rbd_mirror/image_replayer/test_mock_CreateImageRequest.cc
ceph/src/test/rbd_mirror/image_replayer/test_mock_GetMirrorImageIdRequest.cc [new file with mode: 0644]
ceph/src/test/rbd_mirror/image_replayer/test_mock_PrepareLocalImageRequest.cc
ceph/src/test/rbd_mirror/image_replayer/test_mock_PrepareRemoteImageRequest.cc [new file with mode: 0644]
ceph/src/test/rbd_mirror/mock/MockSafeTimer.h
ceph/src/test/rbd_mirror/test_ImageDeleter.cc
ceph/src/test/rbd_mirror/test_ImageReplayer.cc
ceph/src/test/rbd_mirror/test_fixture.cc
ceph/src/test/rbd_mirror/test_mock_ImageReplayer.cc
ceph/src/test/rbd_mirror/test_mock_InstanceReplayer.cc
ceph/src/test/rbd_mirror/test_mock_InstanceWatcher.cc
ceph/src/test/rgw/CMakeLists.txt
ceph/src/test/rgw/test_rgw_iam_policy.cc
ceph/src/tools/CMakeLists.txt
ceph/src/tools/ceph_objectstore_tool.cc
ceph/src/tools/cephfs/Dumper.cc
ceph/src/tools/cephfs/JournalScanner.h
ceph/src/tools/rbd/CMakeLists.txt
ceph/src/tools/rbd/action/Ggate.cc [new file with mode: 0644]
ceph/src/tools/rbd/action/Group.cc [deleted file]
ceph/src/tools/rbd/action/ImageMeta.cc
ceph/src/tools/rbd/action/Info.cc
ceph/src/tools/rbd/action/List.cc
ceph/src/tools/rbd/action/Remove.cc
ceph/src/tools/rbd_ggate/CMakeLists.txt [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Driver.cc [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Driver.h [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Request.h [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Server.cc [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Server.h [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Watcher.cc [new file with mode: 0644]
ceph/src/tools/rbd_ggate/Watcher.h [new file with mode: 0644]
ceph/src/tools/rbd_ggate/debug.cc [new file with mode: 0644]
ceph/src/tools/rbd_ggate/debug.h [new file with mode: 0644]
ceph/src/tools/rbd_ggate/ggate_drv.c [new file with mode: 0644]
ceph/src/tools/rbd_ggate/ggate_drv.h [new file with mode: 0644]
ceph/src/tools/rbd_ggate/main.cc [new file with mode: 0644]
ceph/src/tools/rbd_mirror/CMakeLists.txt
ceph/src/tools/rbd_mirror/ImageReplayer.cc
ceph/src/tools/rbd_mirror/ImageReplayer.h
ceph/src/tools/rbd_mirror/InstanceReplayer.cc
ceph/src/tools/rbd_mirror/InstanceReplayer.h
ceph/src/tools/rbd_mirror/InstanceWatcher.cc
ceph/src/tools/rbd_mirror/InstanceWatcher.h
ceph/src/tools/rbd_mirror/PoolReplayer.cc
ceph/src/tools/rbd_mirror/PoolReplayer.h
ceph/src/tools/rbd_mirror/PoolWatcher.h
ceph/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
ceph/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
ceph/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc
ceph/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h
ceph/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc [new file with mode: 0644]
ceph/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h [new file with mode: 0644]
ceph/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc
ceph/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc [new file with mode: 0644]
ceph/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h [new file with mode: 0644]
ceph/src/tools/rbd_mirror/instance_watcher/Types.cc
ceph/src/tools/rbd_mirror/instance_watcher/Types.h
ceph/src/tools/rbd_mirror/types.h
ceph/src/tools/rbd_nbd/rbd-nbd.cc
ceph/src/tracing/librbd.tp
ceph/systemd/CMakeLists.txt
ceph/systemd/ceph-volume@.service [new file with mode: 0644]

index f04d09925a3e4814615cefa32e6e7d7efe785a66..0aaf7dbc68269babc90a660cc298ccbea3a8877a 100644 (file)
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 2.8.11)
 
 project(ceph)
-set(VERSION 12.1.2)
+set(VERSION 12.1.3)
 
 if(POLICY CMP0046)
   # Tweak policies (this one disables "missing" dependency warning)
index d705d58fa6747c8f653ca6a9947890275d8fabed..ac1a79f6d468d6e839d3b4d3de68656276387178 100644 (file)
->= 12.0.0
-------
-* The "journaler allow split entries" config setting has been removed.
-* The 'apply' mode of cephfs-journal-tool has been removed
-* Added new configuration "public bind addr" to support dynamic environments
-  like Kubernetes. When set the Ceph MON daemon could bind locally to an IP
-  address and advertise a different IP address "public addr" on the network.
-* RGW: bucket index resharding now uses the reshard namespace in upgrade scenarios as well
-  this is a changed behaviour from RC1 where a new pool for reshard was created
+>= 12.1.2
+---------
+* When running 'df' on a CephFS filesystem comprising exactly one data pool,
+  the result now reflects the file storage space used and available in that
+  data pool (fuse client only).
+* Added new commands "pg force-recovery" and "pg-force-backfill". Use them
+  to boost recovery or backfill priority of specified pgs, so they're
+  recovered/backfilled before any other. Note that these commands don't
+  interrupt ongoing recovery/backfill, but merely queue specified pgs
+  before others so they're recovered/backfilled as soon as possible.
+  New commands "pg cancel-force-recovery" and "pg cancel-force-backfill"
+  restore default recovery/backfill priority of previously forced pgs.
 
-12.0.0
-------
-
- * When assigning a network to the public network and not to
-   the cluster network the network specification of the public
-   network will be used for the cluster network as well.
-   In older versions this would lead to cluster services
-   being bound to 0.0.0.0:<port>, thus making the
-   cluster service even more publicly available than the
-   public services. When only specifying a cluster network it
-   will still result in the public services binding to 0.0.0.0.
-
-*  Some variants of the omap_get_keys and omap_get_vals librados
-   functions have been deprecated in favor of omap_get_vals2 and
-   omap_get_keys2.  The new methods include an output argument
-   indicating whether there are additional keys left to fetch.
-   Previously this had to be inferred from the requested key count vs
-   the number of keys returned, but this breaks with new OSD-side
-   limits on the number of keys or bytes that can be returned by a
-   single omap request.  These limits were introduced by kraken but
-   are effectively disabled by default (by setting a very large limit
-   of 1 GB) because users of the newly deprecated interface cannot
-   tell whether they should fetch more keys or not.  In the case of
-   the standalone calls in the C++ interface
-   (IoCtx::get_omap_{keys,vals}), librados has been updated to loop on
-   the client side to provide a correct result via multiple calls to
-   the OSD.  In the case of the methods used for building
-   multi-operation transactions, however, client-side looping is not
-   practical, and the methods have been deprecated.  Note that use of
-   either the IoCtx methods on older librados versions or the
-   deprecated methods on any version of librados will lead to
-   incomplete results if/when the new OSD limits are enabled.
-
-* In previous versions, if a client sent an op to the wrong OSD, the OSD
-  would reply with ENXIO.  The rationale here is that the client or OSD is
-  clearly buggy and we want to surface the error as clearly as possible.
-  We now only send the ENXIO reply if the osd_enxio_on_misdirected_op option
-  is enabled (it's off by default).  This means that a VM using librbd that
-  previously would have gotten an EIO and gone read-only will now see a
-  blocked/hung IO instead.
-
-*  When configuring ceph-fuse mounts in /etc/fstab, a new syntax is
-   available that uses "ceph.<arg>=<val>" in the options column, instead
-   of putting configuration in the device column.  The old style syntax
-   still works.  See the documentation page "Mount CephFS in your
-   file systems table" for details.
-
-12.0.1
-------
-
-* The original librados rados_objects_list_open (C) and objects_begin
-  (C++) object listing API, deprecated in Hammer, has finally been
-  removed.  Users of this interface must update their software to use
-  either the rados_nobjects_list_open (C) and nobjects_begin (C++) API or
-  the new rados_object_list_begin (C) and object_list_begin (C++) API
-  before updating the client-side librados library to Luminous.
-
-  Object enumeration (via any API) with the latest librados version
-  and pre-Hammer OSDs is no longer supported.  Note that no in-tree
-  Ceph services rely on object enumeration via the deprecated APIs, so
-  only external librados users might be affected.
-
-  The newest (and recommended) rados_object_list_begin (C) and
-  object_list_begin (C++) API is only usable on clusters with the
-  SORTBITWISE flag enabled (Jewel and later).  (Note that this flag is
-  required to be set before upgrading beyond Jewel.)
-
-* The rados copy-get-classic operation has been removed since it has not been
-  used by the OSD since before hammer.  It is unlikely any librados user is
-  using this operation explicitly since there is also the more modern copy-get.
-
-* The RGW api for getting object torrent has changed its params from 'get_torrent'
-  to 'torrent' so that it can be compatible with Amazon S3. Now the request for 
-  object torrent is like 'GET /ObjectName?torrent'.
-
-* The configuration option "osd pool erasure code stripe width" has
-  been replaced by "osd pool erasure code stripe unit", and given the
-  ability to be overridden by the erasure code profile setting
-  "stripe_unit". For more details see "Erasure Code Profiles" in the
-  documentation.
-
-* rbd and cephfs can use erasure coding with bluestore. This may be
-  enabled by setting 'allow_ec_overwrites' to 'true' for a pool. Since
-  this relies on bluestore's checksumming to do deep scrubbing,
-  enabling this on a pool stored on filestore is not allowed.
-
-* The 'rados df' JSON output now prints numeric values as numbers instead of
-  strings.
-
-* There was a bug introduced in Jewel (#19119) that broke the mapping behavior
-  when an "out" OSD that still existed in the CRUSH map was removed with 'osd rm'.
-  This could result in 'misdirected op' and other errors.  The bug is now fixed,
-  but the fix itself introduces the same risk because the behavior may vary between
-  clients and OSDs.  To avoid problems, please ensure that all OSDs are removed
-  from the CRUSH map before deleting them.  That is, be sure to do::
-
-     ceph osd crush rm osd.123
-
-  before::
-
-     ceph osd rm osd.123
-
-12.0.2
-------
-
-* The original librados rados_objects_list_open (C) and objects_begin
-  (C++) object listing API, deprecated in Hammer, has finally been
-  removed.  Users of this interface must update their software to use
-  either the rados_nobjects_list_open (C) and nobjects_begin (C++) API or
-  the new rados_object_list_begin (C) and object_list_begin (C++) API
-  before updating the client-side librados library to Luminous.
-
-  Object enumeration (via any API) with the latest librados version
-  and pre-Hammer OSDs is no longer supported.  Note that no in-tree
-  Ceph services rely on object enumeration via the deprecated APIs, so
-  only external librados users might be affected.
-
-  The newest (and recommended) rados_object_list_begin (C) and
-  object_list_begin (C++) API is only usable on clusters with the
-  SORTBITWISE flag enabled (Jewel and later).  (Note that this flag is
-  required to be set before upgrading beyond Jewel.)
-* CephFS clients without the 'p' flag in their authentication capability
-  string will no longer be able to set quotas or any layout fields.  This
-  flag previously only restricted modification of the pool and namespace
-  fields in layouts.
-* CephFS directory fragmentation (large directory support) is enabled
-  by default on new filesystems.  To enable it on existing filesystems
-  use "ceph fs set <fs_name> allow_dirfrags".
-* CephFS will generate a health warning if you have fewer standby daemons
-  than it thinks you wanted.  By default this will be 1 if you ever had
-  a standby, and 0 if you did not.  You can customize this using
-  ``ceph fs set <fs> standby_count_wanted <number>``.  Setting it
-  to zero will effectively disable the health check.
-* The "ceph mds tell ..." command has been removed.  It is superceded
-  by "ceph tell mds.<id> ..."
-
-12.1.0
-------
-
-* The ``mon_osd_max_op_age`` option has been renamed to
-  ``mon_osd_warn_op_age`` (default: 32 seconds), to indicate we
-  generate a warning at this age.  There is also a new
-  ``mon_osd_err_op_age_ratio`` that is a expressed as a multitple of
-  ``mon_osd_warn_op_age`` (default: 128, for roughly 60 minutes) to
-  control when an error is generated.
-
-* The default maximum size for a single RADOS object has been reduced from
-  100GB to 128MB.  The 100GB limit was completely impractical in practice
-  while the 128MB limit is a bit high but not unreasonable.  If you have an
-  application written directly to librados that is using objects larger than
-  128MB you may need to adjust ``osd_max_object_size``.
-
-* The semantics of the 'rados ls' and librados object listing
-  operations have always been a bit confusing in that "whiteout"
-  objects (which logically don't exist and will return ENOENT if you
-  try to access them) are included in the results.  Previously
-  whiteouts only occurred in cache tier pools.  In luminous, logically
-  deleted but snapshotted objects now result in a whiteout object, and
-  as a result they will appear in 'rados ls' results, even though
-  trying to read such an object will result in ENOENT.  The 'rados
-  listsnaps' operation can be used in such a case to enumerate which
-  snapshots are present.
-
-  This may seem a bit strange, but is less strange than having a
-  deleted-but-snapshotted object not appear at all and be completely
-  hidden from librados's ability to enumerate objects.  Future
-  versions of Ceph will likely include an alternative object
-  enumeration interface that makes it more natural and efficient to
-  enumerate all objects along with their snapshot and clone metadata.
-
-* The deprecated 'crush_ruleset' property has finally been removed; please use
-  'crush_rule' instead for the 'osd pool get ...' and 'osd pool set ..' commands.
-
-* The 'osd pool default crush replicated ruleset' option has been
-  removed and replaced by the 'osd pool default crush rule' option.
-  By default it is -1, which means the mon will pick the first type
-  replicated rule in the CRUSH map for replicated pools.  Erasure
-  coded pools have rules that are automatically created for them if they are
-  not specified at pool creation time.
-
-* The `status` ceph-mgr module is enabled by default, and initially provides two
-  commands: `ceph tell mgr osd status` and `ceph tell mgr fs status`.  These
-  are high level colorized views to complement the existing CLI.
-
-12.1.1
-------
-
-* choose_args encoding has been changed to make it architecture-independent.
-  If you deployed Luminous dev releases or 12.1.0 rc release and made use of
-  the CRUSH choose_args feature, you need to remove all choose_args mappings
-  from your CRUSH map before starting the upgrade.
-
-* The 'ceph health' structured output (JSON or XML) no longer contains
-  a 'timechecks' section describing the time sync status.  This
-  information is now available via the 'ceph time-sync-status'
-  command.
-
-* Certain extra fields in the 'ceph health' structured output that
-  used to appear if the mons were low on disk space (which duplicated
-  the information in the normal health warning messages) are now gone.
-
-* The "ceph -w" output no longer contains audit log entries by default.
-  Add a "--watch-channel=audit" or "--watch-channel=*" to see them.
-
-12.1.2
-------
-
-* New "ceph -w" behavior - the "ceph -w" output no longer contains I/O rates,
-  available space, pg info, etc. because these are no longer logged to the
-  central log (which is what "ceph -w" shows). The same information can be
-  obtained by running "ceph pg stat"; alternatively, I/O rates per pool can
-  be determined using "ceph osd pool stats". Although these commands do not
-  self-update like "ceph -w" did, they do have the ability to return formatted
-  output by providing a "--format=<format>" option.
-
-* Pools are now expected to be associated with the application using them.
-  Upon completing the upgrade to Luminous, the cluster will attempt to associate
-  existing pools to known applications (i.e. CephFS, RBD, and RGW). In-use pools
-  that are not associated to an application will generate a health warning. Any
-  unassociated pools can be manually associated using the new
-  "ceph osd pool application enable" command. For more details see
-  "Associate Pool to Application" in the documentation.
-
-* ceph-mgr now has a Zabbix plugin. Using zabbix_sender it sends trapper
-  events to a Zabbix server containing high-level information of the Ceph
-  cluster. This makes it easy to monitor a Ceph cluster's status and send
-  out notifications in case of a malfunction.
-
-* The 'mon_warn_osd_usage_min_max_delta' config option has been
-  removed and the associated health warning has been disabled because
-  it does not address clusters undergoing recovery or CRUSH rules that do
-  not target all devices in the cluster.
-
-* Specifying user authorization capabilities for RBD clients has been
-  simplified. The general syntax for using RBD capability profiles is
-  "mon 'profile rbd' osd 'profile rbd[-read-only][ pool={pool-name}[, ...]]'".
-  For more details see "User Management" in the documentation.
-
-* ``ceph config-key put`` has been deprecated in favor of ``ceph config-key set``.
\ No newline at end of file
index e080499d8caeafa64e47968c21a52d5c3b7d7e08..c3a300c8870785c42a919723bfe30a313c49dc37 100644 (file)
@@ -1,7 +1,7 @@
 # Contributor: John Coyle <dx9err@gmail.com>
 # Maintainer: John Coyle <dx9err@gmail.com>
 pkgname=ceph
-pkgver=12.1.2
+pkgver=12.1.3
 pkgrel=0
 pkgdesc="Ceph is a distributed object store and file system"
 pkgusers="ceph"
@@ -63,7 +63,7 @@ makedepends="
        xmlstarlet
        yasm
 "
-source="ceph-12.1.2.tar.bz2"
+source="ceph-12.1.3.tar.bz2"
 subpackages="
        $pkgname-base
        $pkgname-common
@@ -116,7 +116,7 @@ _sysconfdir=/etc
 _udevrulesdir=/etc/udev/rules.d
 _python_sitelib=/usr/lib/python2.7/site-packages
 
-builddir=$srcdir/ceph-12.1.2
+builddir=$srcdir/ceph-12.1.3
 
 build() {
        export CEPH_BUILD_VIRTUALENV=$builddir
index 993c00cd8430c2cb6dd4494dba95175befcd647c..8473b8ac06758a8d1e5a4bab83fb1661f959d952 100644 (file)
@@ -13,7 +13,7 @@
 # This file is under the GNU Lesser General Public License, version 2.1
 #
 # Please submit bugfixes or comments via http://tracker.ceph.com/
-# 
+#
 %bcond_without ocf
 %bcond_without cephfs_java
 %if 0%{?suse_version}
@@ -61,7 +61,7 @@
 # main package definition
 #################################################################################
 Name:          ceph
-Version:       12.1.2
+Version:       12.1.3
 Release:       0%{?dist}
 %if 0%{?fedora} || 0%{?rhel}
 Epoch:         2
@@ -76,7 +76,7 @@ License:      LGPL-2.1 and CC-BY-SA-1.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and
 Group:         System/Filesystems
 %endif
 URL:           http://ceph.com/
-Source0:       http://ceph.com/download/ceph-12.1.2.tar.bz2
+Source0:       http://ceph.com/download/ceph-12.1.3.tar.bz2
 %if 0%{?suse_version}
 %if 0%{?is_opensuse}
 ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
@@ -172,7 +172,7 @@ BuildRequires:      python-PrettyTable
 BuildRequires: python-Sphinx
 BuildRequires:  rdma-core-devel
 %endif
-%if 0%{?fedora} || 0%{?rhel} 
+%if 0%{?fedora} || 0%{?rhel}
 Requires:      systemd
 BuildRequires:  boost-random
 BuildRequires: btrfs-progs
@@ -772,7 +772,7 @@ python-rbd, python-rgw or python-cephfs instead.
 # common
 #################################################################################
 %prep
-%autosetup -p1 -n ceph-12.1.2
+%autosetup -p1 -n ceph-12.1.3
 
 %build
 %if 0%{with cephfs_java}
@@ -910,6 +910,7 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-osd
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mds
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rgw
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mgr
+mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd
 
 %if 0%{?suse_version}
 # create __pycache__ directories and their contents
@@ -933,6 +934,8 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/ceph-disk
+%{_sbindir}/ceph-volume
+%{_sbindir}/ceph-volume-systemd
 %{_sbindir}/rcceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
@@ -961,9 +964,13 @@ rm -rf %{buildroot}
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
 %{_unitdir}/ceph-disk@.service
+%{_unitdir}/ceph-volume@.service
 %{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
 %{python_sitelib}/ceph_disk*
+%dir %{python_sitelib}/ceph_volume
+%{python_sitelib}/ceph_volume/*
+%{python_sitelib}/ceph_volume-*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
 %{_mandir}/man8/ceph-create-keys.8*
@@ -978,6 +985,7 @@ rm -rf %{buildroot}
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mgr
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rbd
 
 %post base
 /sbin/ldconfig
@@ -989,6 +997,7 @@ fi
 %endif
 %if 0%{?fedora} || 0%{?rhel}
 %systemd_post ceph-disk@\*.service ceph.target
+%systemd_post ceph-volume@\*.service ceph.target
 %endif
 if [ $1 -eq 1 ] ; then
 /usr/bin/systemctl start ceph.target >/dev/null 2>&1 || :
@@ -1000,6 +1009,7 @@ fi
 %endif
 %if 0%{?fedora} || 0%{?rhel}
 %systemd_preun ceph-disk@\*.service ceph.target
+%systemd_preun ceph-volume@\*.service ceph.target
 %endif
 
 %postun base
@@ -1020,7 +1030,7 @@ if [ $FIRST_ARG -ge 1 ] ; then
     source $SYSCONF_CEPH
   fi
   if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-    /usr/bin/systemctl try-restart ceph-disk@\*.service > /dev/null 2>&1 || :
+    /usr/bin/systemctl try-restart ceph-disk@\*.service ceph-volume@\*.service > /dev/null 2>&1 || :
   fi
 fi
 
index 93aabf8279d6e6c7ae44e75d4928c7c9a09af3fe..66babf6c21be4f4ea4f4cefc37ce4e52c63ffc13 100644 (file)
@@ -13,7 +13,7 @@
 # This file is under the GNU Lesser General Public License, version 2.1
 #
 # Please submit bugfixes or comments via http://tracker.ceph.com/
-# 
+#
 %bcond_without ocf
 %bcond_without cephfs_java
 %if 0%{?suse_version}
@@ -172,7 +172,7 @@ BuildRequires:      python-PrettyTable
 BuildRequires: python-Sphinx
 BuildRequires:  rdma-core-devel
 %endif
-%if 0%{?fedora} || 0%{?rhel} 
+%if 0%{?fedora} || 0%{?rhel}
 Requires:      systemd
 BuildRequires:  boost-random
 BuildRequires: btrfs-progs
@@ -910,6 +910,7 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-osd
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mds
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rgw
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mgr
+mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd
 
 %if 0%{?suse_version}
 # create __pycache__ directories and their contents
@@ -933,6 +934,8 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/ceph-disk
+%{_sbindir}/ceph-volume
+%{_sbindir}/ceph-volume-systemd
 %{_sbindir}/rcceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
@@ -961,9 +964,13 @@ rm -rf %{buildroot}
 %config %{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
 %endif
 %{_unitdir}/ceph-disk@.service
+%{_unitdir}/ceph-volume@.service
 %{_unitdir}/ceph.target
 %{python_sitelib}/ceph_detect_init*
 %{python_sitelib}/ceph_disk*
+%dir %{python_sitelib}/ceph_volume
+%{python_sitelib}/ceph_volume/*
+%{python_sitelib}/ceph_volume-*
 %{_mandir}/man8/ceph-deploy.8*
 %{_mandir}/man8/ceph-detect-init.8*
 %{_mandir}/man8/ceph-create-keys.8*
@@ -978,6 +985,7 @@ rm -rf %{buildroot}
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rgw
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mgr
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-rbd
 
 %post base
 /sbin/ldconfig
@@ -989,6 +997,7 @@ fi
 %endif
 %if 0%{?fedora} || 0%{?rhel}
 %systemd_post ceph-disk@\*.service ceph.target
+%systemd_post ceph-volume@\*.service ceph.target
 %endif
 if [ $1 -eq 1 ] ; then
 /usr/bin/systemctl start ceph.target >/dev/null 2>&1 || :
@@ -1000,6 +1009,7 @@ fi
 %endif
 %if 0%{?fedora} || 0%{?rhel}
 %systemd_preun ceph-disk@\*.service ceph.target
+%systemd_preun ceph-volume@\*.service ceph.target
 %endif
 
 %postun base
@@ -1020,7 +1030,7 @@ if [ $FIRST_ARG -ge 1 ] ; then
     source $SYSCONF_CEPH
   fi
   if [ "X$CEPH_AUTO_RESTART_ON_UPGRADE" = "Xyes" ] ; then
-    /usr/bin/systemctl try-restart ceph-disk@\*.service > /dev/null 2>&1 || :
+    /usr/bin/systemctl try-restart ceph-disk@\*.service ceph-volume@\*.service > /dev/null 2>&1 || :
   fi
 fi
 
index d6de8da27e8fc096a7e0f570d1ee82d7cd4eded3..24d1a50654865722fa547e141df6dca5d0b5cd6e 100644 (file)
@@ -1,13 +1,15 @@
 include(CMakeParseArguments)
 
 function(distutils_install_module name)
-  set(py_srcs setup.py README.rst requirements.txt test-requirements.txt ${name})
+  set(py_srcs setup.py README.rst requirements.txt test-requirements.txt bin ${name})
   foreach(src ${py_srcs})
-    list(APPEND py_clone ${CMAKE_CURRENT_BINARY_DIR}/${src})
-    add_custom_command(
-      OUTPUT ${src}
-      DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src}
-      COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/${src} ${src})
+    if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src})
+      list(APPEND py_clone ${CMAKE_CURRENT_BINARY_DIR}/${src})
+      add_custom_command(
+        OUTPUT ${src}
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src}
+        COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/${src} ${src})
+    endif()
   endforeach()
   add_custom_target(${name}-clone ALL
     DEPENDS ${py_clone})
index a60a331caac3bad477298e65bdfd879e6da5a0fe..2ae6860022e038126cfdd67d1f32f1a6c866f46a 100644 (file)
@@ -2,4 +2,5 @@ var/lib/ceph/bootstrap-mds
 var/lib/ceph/bootstrap-mgr
 var/lib/ceph/bootstrap-osd
 var/lib/ceph/bootstrap-rgw
+var/lib/ceph/bootstrap-rbd
 var/lib/ceph/tmp
index b87ec1715b6bc2de9a7f95c91039776ac9db28a2..bda15ad1789011840d5bc99e67ae3d573cdc571e 100644 (file)
@@ -10,6 +10,9 @@ usr/lib/libos_tp.so*
 usr/lib/libosd_tp.so*
 usr/lib/python*/dist-packages/ceph_disk*
 usr/sbin/ceph-disk
+usr/sbin/ceph-volume
+usr/lib/python*/dist-packages/ceph_volume/*
+usr/lib/python*/dist-packages/ceph_volume-*
 usr/share/man/man8/ceph-clsinfo.8
 usr/share/man/man8/ceph-disk.8
 usr/share/man/man8/ceph-osd.8
index 25b6f5830bc4b7a47ff014af64a832d8137643ca..481ec56e4c66272b502ef210d2ada9b4fd869777 100644 (file)
@@ -1,3 +1,9 @@
+ceph (12.1.3-1) stable; urgency=medium
+
+  * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.com>  Thu, 10 Aug 2017 19:22:38 +0000
+
 ceph (12.1.2-1) stable; urgency=medium
 
   * New upstream release
index 2e4ec9d22a33ecaa2f689604db0b212bc770fea7..92bc0b5877660b60adfd5aac156d35af382b6a0f 100755 (executable)
@@ -96,10 +96,12 @@ override_dh_installinit:
        install -m0644 systemd/ceph-mon@.service debian/ceph-mon/lib/systemd/system
        install -m0644 systemd/ceph-osd@.service debian/ceph-osd/lib/systemd/system
        install -m0644 systemd/ceph-disk@.service debian/ceph-osd/lib/systemd/system
+       install -m0644 systemd/ceph-volume@.service debian/ceph-osd/lib/systemd/system
        install -m0644 systemd/rbdmap.service debian/ceph-common/lib/systemd/system
        sed -i s./etc/sysconfig/./etc/default/.g debian/ceph-mon/lib/systemd/system/ceph-mon@.service
        sed -i s./etc/sysconfig/./etc/default/.g debian/ceph-osd/lib/systemd/system/ceph-osd@.service
        sed -i s./etc/sysconfig/./etc/default/.g debian/ceph-osd/lib/systemd/system/ceph-disk@.service
+       sed -i s./etc/sysconfig/./etc/default/.g debian/ceph-osd/lib/systemd/system/ceph-volume@.service
        install -m0644 systemd/ceph-mon.target debian/ceph-mon/lib/systemd/system
        install -m0644 systemd/ceph-osd.target debian/ceph-osd/lib/systemd/system
 
index bf9f35584018d93b1c750218ffc6a9c7d9f3deca..fbf694b51b47f661e687b30ab5ad9a3353a10c3a 100644 (file)
@@ -25,19 +25,18 @@ Syntax
 ------
 
 To grant rw access to the specified directory only, we mention the specified
-directory while creating key for a client following the undermentioned syntax. ::
+directory while creating key for a client using the following syntax. ::
 
-./ceph auth get-or-create client.*client_name* mon 'allow r' mds 'allow r, allow rw path=/*specified_directory*' osd 'allow rw pool=data'
+ ceph fs authorize *filesystem_name* client.*client_name* /*specified_directory* rw
 
-for example, to restrict client ``foo`` to writing only in the ``bar`` directory,
-we will use: ::
+for example, to restrict client ``foo`` to writing only in the ``bar`` directory of filesystem ``cephfs``, use ::
 
-./ceph auth get-or-create client.foo mon 'allow r' mds 'allow r, allow rw path=/bar' osd 'allow rw pool=data'
+ ceph fs authorize cephfs client.foo / r /bar rw
 
 To completely restrict the client to the ``bar`` directory, omit the
-unqualified "allow r" clause: ::
+root directory ::
 
-./ceph auth get-or-create client.foo mon 'allow r' mds 'allow rw path=/bar' osd 'allow rw pool=data'
+ ceph fs authorize cephfs client.foo /bar rw
 
 Note that if a client's read access is restricted to a path, they will only
 be able to mount the filesystem when specifying a readable path in the
@@ -47,13 +46,13 @@ mount command (see below).
 See `User Management - Add a User to a Keyring`_. for additional details on user management
 
 To restrict a client to the specfied sub-directory only, we mention the specified
-directory while mounting following the undermentioned syntax. ::
+directory while mounting using the following syntax. ::
 
-./ceph-fuse -n client.*client_name* *mount_path* -r *directory_to_be_mounted*
+ ./ceph-fuse -n client.*client_name* *mount_path* -r *directory_to_be_mounted*
 
 for example, to restrict client ``foo`` to ``mnt/bar`` directory, we will use. ::
 
-./ceph-fuse -n client.foo mnt -r /bar
+ ./ceph-fuse -n client.foo mnt -r /bar
 
 Free space reporting
 --------------------
@@ -74,32 +73,6 @@ If quotas are not enabled, or no quota is set on the sub-directory mounted,
 then the overall usage of the filesystem will be reported irrespective of
 the value of this setting.
 
-OSD restriction
-===============
-
-To prevent clients from writing or reading data to pools other than
-those in use for CephFS, set an OSD authentication capability that
-restricts access to the CephFS data pool(s):
-
-::
-
-    client.0
-        key: AQAz7EVWygILFRAAdIcuJ12opU/JKyfFmxhuaw==
-        caps: [mds] allow rw
-        caps: [mon] allow r
-        caps: [osd] allow rw pool=data1, allow rw pool=data2
-
-.. note::
-
-    Without a corresponding MDS path restriction, the OSD capabilities above do
-    **not** restrict file deletions outside of the ``data1`` and ``data2``
-    pools.
-
-You may also restrict clients from writing data by using 'r' instead of
-'rw' in OSD capabilities.  This does not affect the ability of the client
-to update filesystem metadata for these files, but it will prevent them
-from persistently writing data in a way that would be visible to other clients.
-
 Layout and Quota restriction (the 'p' flag)
 ===========================================
 
index 59813b656596a9bf36097b35eac43b2cd3f05005..b76f2f2fbdafe11f94855ca11158c9924bbde6fc 100644 (file)
@@ -216,6 +216,12 @@ The rest (including the actual backporting) will be taken care of by the
 .. _`tracker issue`: http://tracker.ceph.com/
 .. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki
 
+Guidance for use of cluster log
+-------------------------------
+
+If your patches emit messages to the Ceph cluster log, please consult
+this guidance: :doc:`/dev/logging`.
+
 
 What is merged where and when ?
 ===============================
diff --git a/ceph/doc/dev/logging.rst b/ceph/doc/dev/logging.rst
new file mode 100644 (file)
index 0000000..9c2a6f3
--- /dev/null
@@ -0,0 +1,106 @@
+
+Use of the cluster log
+======================
+
+(Note: none of this applies to the local "dout" logging.  This is about
+the cluster log that we send through the mon daemons)
+
+Severity
+--------
+
+Use ERR for situations where the cluster cannot do its job for some reason.
+For example: we tried to do a write, but it returned an error, or we tried
+to read something, but it's corrupt so we can't, or we scrubbed a PG but
+the data was inconsistent so we can't recover.
+
+Use WRN for incidents that the cluster can handle, but have some abnormal/negative
+aspect, such as a temporary degredation of service, or an unexpected internal
+value.  For example, a metadata error that can be auto-fixed, or a slow operation.
+
+Use INFO for ordinary cluster operations that do not indicate a fault in
+Ceph.  It is especially important that INFO level messages are clearly
+worded and do not cause confusion or alarm.
+
+Frequency
+---------
+
+It is important that messages of all severities are not excessively
+frequent.  Consumers may be using a rotating log buffer that contains
+messages of all severities, so even DEBUG messages could interfere
+with proper display of the latest INFO messages if the DEBUG messages
+are too frequent.
+
+Remember that if you have a bad state (as opposed to event), that is
+what health checks are for -- do not spam the cluster log to indicate
+a continuing unhealthy state.
+
+Do not emit cluster log messages for events that scale with
+the number of clients or level of activity on the system, or for
+events that occur regularly in normal operation.  For example, it
+would be inappropriate to emit a INFO message about every
+new client that connects (scales with #clients), or to emit and INFO
+message about every CephFS subtree migration (occurs regularly).
+
+Language and formatting
+-----------------------
+
+(Note: these guidelines matter much less for DEBUG-level messages than
+ for INFO and above.  Concentrate your efforts on making INFO/WRN/ERR
+ messages as readable as possible.)
+
+Use the passive voice.  For example, use "Object xyz could not be read", rather
+than "I could not read the object xyz".
+
+Print long/big identifiers, such as inode numbers, as hex, prefixed
+with an 0x so that the user can tell it is hex.  We do this because
+the 0x makes it unambiguous (no equivalent for decimal), and because
+the hex form is more likely to fit on the screen.
+
+Print size quantities as a human readable MB/GB/etc, including the unit
+at the end of the number.  Exception: if you are specifying an offset,
+where precision is essential to the meaning, then you can specify
+the value in bytes (but print it as hex).
+
+Make a good faith effort to fit your message on a single line.  It does
+not have to be guaranteed, but it should at least usually be
+the case.  That means, generally, no printing of lists unless there
+are only a few items in the list.
+
+Use nouns that are meaningful to the user, and defined in the
+documentation.  Common acronyms are OK -- don't waste screen space
+typing "Rados Object Gateway" instead of RGW.  Do not use internal
+class names like "MDCache" or "Objecter".  It is okay to mention
+internal structures if they are the direct subject of the message,
+for example in a corruption, but use plain english.
+Example: instead of "Objecter requests" say "OSD client requests"
+Example: it is okay to mention internal structure in the context
+        of "Corrupt session table" (but don't say "Corrupt SessionTable")
+
+Where possible, describe the consequence for system availability, rather
+than only describing the underlying state.  For example, rather than
+saying "MDS myfs.0 is replaying", say that "myfs is degraded, waiting
+for myfs.0 to finish starting".
+
+While common acronyms are fine, don't randomly truncate words.  It's not
+"dir ino", it's "directory inode".
+
+If you're logging something that "should never happen", i.e. a situation
+where it would be an assertion, but we're helpfully not crashing, then
+make that clear in the language -- this is probably not a situation
+that the user can remediate themselves.
+
+Avoid UNIX/programmer jargon.  Instead of "errno", just say "error" (or
+preferably give something more descriptive than the number!)
+
+Do not mention cluster map epochs unless they are essential to
+the meaning of the message.  For example, "OSDMap epoch 123 is corrupt"
+would be okay (the epoch is the point of the message), but saying "OSD
+123 is down in OSDMap epoch 456" would not be (the osdmap and epoch
+concepts are an implementation detail, the down-ness of the OSD
+is the real message).  Feel free to send additional detail to
+the daemon's local log (via `dout`/`derr`).
+
+If you log a problem that may go away in the future, make sure you
+also log when it goes away.  Whatever priority you logged the original
+message at, log the "going away" message at INFO.
+
index d070c4726183f145414634b2ce6112b424d2da6f..253e2a4f54911bea0db778610562105b512cc533 100644 (file)
@@ -82,7 +82,7 @@ about Ceph, see our `Architecture`_ section.
 
 
 .. _Ceph Object Store: radosgw
-.. _Ceph Block Device: rbd/rbd
+.. _Ceph Block Device: rbd
 .. _Ceph Filesystem: cephfs
 .. _Getting Started: start
 .. _Architecture: architecture
@@ -96,7 +96,7 @@ about Ceph, see our `Architecture`_ section.
    install/index
    rados/index
    cephfs/index
-   rbd/rbd
+   rbd/index
    radosgw/index
    mgr/index
    api/index
index bf5b77f58015a64b92d6667bd64485f83993eda3..2e8bb86729cd6dbda0d744ac24c42f8278cb269c 100644 (file)
@@ -162,7 +162,7 @@ The procedure is as follows:
 #. Generate an administrator keyring, generate a ``client.admin`` user and add
    the user to the keyring. :: 
 
-       sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow' --cap mgr 'allow *'
+       sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'
 
 
 #. Add the ``client.admin`` key to the ``ceph.mon.keyring``. :: 
index bb323e23aa03b8faaa4ad447d93fd00909e01e15..99386aef5db86fc1fa66f3538c97d2b6b3952cbe 100644 (file)
@@ -211,7 +211,7 @@ The procedure is as follows:
 #. Generate an administrator keyring, generate a ``client.admin`` user and add
    the user to the keyring. :: 
 
-       sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow' --cap mgr 'allow *'
+       sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'
 
 
 #. Add the ``client.admin`` key to the ``ceph.mon.keyring``. :: 
index 76bab3a9b35603cdf3f53b684487b44425d476ec..8a2204c713730204df510f18bb27a78d1bef9ef3 100644 (file)
@@ -54,12 +54,17 @@ if(WITH_RBD)
   list(APPEND man_srcs
        ceph-rbdnamer.rst
        rbd-mirror.rst
-       rbd-nbd.rst
        rbd-replay-many.rst
        rbd-replay-prep.rst
        rbd-replay.rst
        rbdmap.rst
        rbd.rst)
+  if(LINUX)
+    list(APPEND man_srcs rbd-nbd.rst)
+  endif()
+  if(FREEBSD)
+    list(APPEND man_srcs rbd-ggate.rst)
+  endif()
 endif()
 
 foreach(man ${man_srcs})
index 07004ec1ea237d195a51369b20df6d9515d115dc..949010537c48731786bfa76fc8cf4c03289c59a0 100644 (file)
@@ -136,7 +136,9 @@ Pool specific commands
   Note: *write* and *seq* must be run on the same host otherwise the
   objects created by *write* will have names that will fail *seq*.
 
-:command:`cleanup`
+:command:`cleanup` [ --run-name *run_name* ] [ --prefix *prefix* ]
+  Clean up a previous benchmark operation.
+  Note: the default run-name is "benchmark_last_metadata"
 
 :command:`listxattr` *name*
   List all extended attributes of an object.
index 0a34df80b78a0e780c7932e2e62589a786c22ec3..ab33f7e8cd77a87f6601fb679b8bf00883d96dfe 100644 (file)
@@ -382,6 +382,10 @@ Options
 
        List of caps (e.g., "usage=read, write; user=read".
 
+.. option:: --compression=<compression-algorithm>
+
+    Placement target compression algorithm (lz4|snappy|zlib|zstd)
+
 .. option:: --yes-i-really-mean-it
 
        Required for certain operations.
diff --git a/ceph/doc/man/8/rbd-ggate.rst b/ceph/doc/man/8/rbd-ggate.rst
new file mode 100644 (file)
index 0000000..67d0c81
--- /dev/null
@@ -0,0 +1,79 @@
+:orphan:
+
+==================================================
+ rbd-ggate -- map rbd images via FreeBSD GEOM Gate
+==================================================
+
+.. program:: rbd-ggate
+
+Synopsis
+========
+
+| **rbd-ggate** [--read-only] [--exclusive] [--device *ggate device*] map *image-spec* | *snap-spec*
+| **rbd-ggate** unmap *ggate device*
+| **rbd-ggate** list
+
+Description
+===========
+
+**rbd-ggate** is a client for RADOS block device (rbd) images. It will
+map a rbd image to a ggate (FreeBSD GEOM Gate class) device, allowing
+access it as regular local block device.
+
+Commands
+========
+
+map
+---
+
+Spawn a process responsible for the creation of ggate device and
+forwarding I/O requests between the GEOM Gate kernel subsystem and
+RADOS.
+
+unmap
+-----
+
+Destroy ggate device and terminate the process responsible for it.
+
+list
+----
+
+List mapped ggate devices.
+
+Options
+=======
+
+.. option:: --device *ggate device*
+
+   Specify ggate device path.
+
+.. option:: --read-only
+
+   Map read-only.
+
+.. option:: --exclusive
+
+   Forbid writes by other clients.
+
+Image and snap specs
+====================
+
+| *image-spec* is [*pool-name*]/*image-name*
+| *snap-spec*  is [*pool-name*]/*image-name*\ @\ *snap-name*
+
+The default for *pool-name* is "rbd".  If an image name contains a slash
+character ('/'), *pool-name* is required.
+
+Availability
+============
+
+**rbd-ggate** is part of Ceph, a massively scalable, open-source,
+distributed storage system. Please refer to the Ceph documentation at
+http://ceph.com/docs for more information.
+
+
+See also
+========
+
+:doc:`rbd <rbd>`\(8)
+:doc:`ceph <ceph>`\(8)
diff --git a/ceph/doc/rados/configuration/bluestore-config-ref.rst b/ceph/doc/rados/configuration/bluestore-config-ref.rst
new file mode 100644 (file)
index 0000000..8d8ace6
--- /dev/null
@@ -0,0 +1,297 @@
+==========================
+BlueStore Config Reference
+==========================
+
+Devices
+=======
+
+BlueStore manages either one, two, or (in certain cases) three storage
+devices.
+
+In the simplest case, BlueStore consumes a single (primary) storage
+device.  The storage device is normally partitioned into two parts:
+
+#. A small partition is formatted with XFS and contains basic metadata
+   for the OSD.  This *data directory* includes information about the
+   OSD (its identifier, which cluster it belongs to, and its private
+   keyring.
+
+#. The rest of the device is normally a large partition occupying the
+   rest of the device that is managed directly by BlueStore contains
+   all of the actual data.  This *primary device* is normally identifed
+   by a ``block`` symlink in data directory.
+
+It is also possible to deploy BlueStore across two additional devices:
+
+* A *WAL device* can be used for BlueStore's internal journal or
+  write-ahead log.  It is identified by the ``block.wal`` symlink in
+  the data directory.  It is only useful to use a WAL device if the
+  device is faster than the primary device (e.g., when it is on an SSD
+  and the primary device is an HDD).
+* A *DB device* can be used for storing BlueStore's internal metadata.
+  BlueStore (or rather, the embedded RocksDB) will put as much
+  metadata as it can on the DB device to improve performance.  If the
+  DB device fills up, metadata will spill back onto the primary device
+  (where it would have been otherwise).  Again, it is only helpful to
+  provision a DB device if it is faster than the primary device.
+
+If there is only a small amount of fast storage available (e.g., less
+than a gigabyte), we recommend using it as a WAL device.  If there is
+more, provisioning a DB device makes more sense.  The BlueStore
+journal will always be placed on the fastest device available, so
+using a DB device will provide the same benefit that the WAL device
+would while *also* allowing additional metadata to be stored there (if
+it will fix).
+
+A single-device BlueStore OSD can be provisioned with::
+
+  ceph-disk prepare --bluestore <device>
+
+To specify a WAL device and/or DB device, ::
+
+  ceph-disk prepare --bluestore <device> --block.wal <wal-device> --block-db <db-device>
+
+Cache size
+==========
+
+The amount of memory consumed by each OSD for BlueStore's cache is
+determined by the ``bluestore_cache_size`` configuration option.  If
+that config option is not set (i.e., remains at 0), there is a
+different default value that is used depending on whether an HDD or
+SSD is used for the primary device (set by the
+``bluestore_cache_size_ssd`` and ``bluestore_cache_size_hdd`` config
+options).
+
+BlueStore and the rest of the Ceph OSD does the best it can currently
+to stick to the budgeted memory.  Note that on top of the configured
+cache size, there is also memory consumed by the OSD itself, and
+generally some overhead due to memory fragmentation and other
+allocator overhead.
+
+The configured cache memory budget can be used in a few different ways:
+
+* Key/Value metadata (i.e., RocksDB's internal cache)
+* BlueStore metadata
+* BlueStore data (i.e., recently read or written object data)
+
+Cache memory usage is governed by the following options:
+``bluestore_cache_meta_ratio``, ``bluestore_cache_kv_ratio``, and
+``bluestore_cache_kv_max``.  The fraction of the cache devoted to data
+is 1.0 minus the meta and kv ratios.  The memory devoted to kv
+metadata (the RocksDB cache) is capped by ``bluestore_cache_kv_max``
+since our testing indicates there are diminishing returns beyond a
+certain point.
+
+``bluestore_cache_size``
+
+:Description: The amount of memory BlueStore will use for its cache.  If zero, ``bluestore_cache_size_hdd`` or ``bluestore_cache_size_ssd`` will be used instead.
+:Type: Integer
+:Required: Yes
+:Default: ``0``
+
+``bluestore_cache_size_hdd``
+
+:Description: The default amount of memory BlueStore will use for its cache when backed by an HDD.
+:Type: Integer
+:Required: Yes
+:Default: ``1 * 1024 * 1024 * 1024`` (1 GB)
+
+``bluestore_cache_size_ssd``
+
+:Description: The default amount of memory BlueStore will use for its cache when backed by an SSD.
+:Type: Integer
+:Required: Yes
+:Default: ``3 * 1024 * 1024 * 1024`` (3 GB)
+
+``bluestore_cache_meta_ratio``
+
+:Description: The ratio of cache devoted to metadata.
+:Type: Floating point
+:Required: Yes
+:Default: ``.01``
+
+``bluestore_cache_kv_ratio``
+
+:Description: The ratio of cache devoted to key/value data (rocksdb).
+:Type: Floating point
+:Required: Yes
+:Default: ``.99``
+
+``bluestore_cache_kv_max``
+
+:Description: The maximum amount of cache devoted to key/value data (rocksdb).
+:Type: Floating point
+:Required: Yes
+:Default: ``512 * 1024*1024`` (512 MB)
+
+
+Checksums
+=========
+
+BlueStore checksums all metadata and data written to disk.  Metadata
+checksumming is handled by RocksDB and uses `crc32c`. Data
+checksumming is done by BlueStore and can make use of `crc32c`,
+`xxhash32`, or `xxhash64`.  The default is `crc32c` and should be
+suitable for most purposes.
+
+Full data checksumming does increase the amount of metadata that
+BlueStore must store and manage.  When possible, e.g., when clients
+hint that data is written and read sequentially, BlueStore will
+checksum larger blocks, but in many cases it must store a checksum
+value (usually 4 bytes) for every 4 kilobyte block of data.
+
+It is possible to use a smaller checksum value by truncating the
+checksum to two or one byte, reducing the metadata overhead.  The
+trade-off is that the probability that a random error will not be
+detected is higher with a smaller checksum, going from about one if
+four billion with a 32-bit (4 byte) checksum to one is 65,536 for a
+16-bit (2 byte) checksum or one in 256 for an 8-bit (1 byte) checksum.
+The smaller checksum values can be used by selecting `crc32c_16` or
+`crc32c_8` as the checksum algorithm.
+
+The *checksum algorithm* can be set either via a per-pool
+``csum_type`` property or the global config option.  For example, ::
+
+  ceph osd pool set <pool-name> csum_type <algorithm>
+
+``bluestore_csum_type``
+
+:Description: The default checksum algorithm to use.
+:Type: String
+:Required: Yes
+:Valid Settings: ``none``, ``crc32c``, ``crc32c_16``, ``crc32c_8``, ``xxhash32``, ``xxhash64``
+:Default: ``crc32c``
+
+
+Inline Compression
+==================
+
+BlueStore supports inline compression using `snappy`, `zlib`, or
+`lz4`. Please note that the `lz4` compression plugin is not
+distributed in the official release.
+
+Whether data in BlueStore is compressed is determined by a combination
+of the *compression mode* and any hints associated with a write
+operation.  The modes are:
+
+* **none**: Never compress data.
+* **passive**: Do not compress data unless the write operation as a
+  *compressible* hint set.
+* **aggressive**: Compress data unless the write operation as an
+  *incompressible* hint set.
+* **force**: Try to compress data no matter what.
+
+For more information about the *compressible* and *incompressible* IO
+hints, see :doc:`/api/librados/#rados_set_alloc_hint`.
+
+Note that regardless of the mode, if the size of the data chunk is not
+reduced sufficiently it will not be used and the original
+(uncompressed) data will be stored.  For example, if the ``bluestore
+compression required ratio`` is set to ``.7`` then the compressed data
+must be 70% of the size of the original (or smaller).
+
+The *compression mode*, *compression algorithm*, *compression required
+ratio*, *min blob size*, and *max blob size* can be set either via a
+per-pool property or a global config option.  Pool properties can be
+set with::
+
+  ceph osd pool set <pool-name> compression_algorithm <algorithm>
+  ceph osd pool set <pool-name> compression_mode <mode>
+  ceph osd pool set <pool-name> compression_required_ratio <ratio>
+  ceph osd pool set <pool-name> compression_min_blob_size <size>
+  ceph osd pool set <pool-name> compression_max_blob_size <size>
+
+``bluestore compression algorithm``
+
+:Description: The default compressor to use (if any) if the per-pool property
+              ``compression_algorithm`` is not set. Note that zstd is *not*
+              recommended for bluestore due to high CPU overhead when
+              compressing small amounts of data.
+:Type: String
+:Required: No
+:Valid Settings: ``lz4``, ``snappy``, ``zlib``, ``zstd``
+:Default: ``snappy``
+
+``bluestore compression mode``
+
+:Description: The default policy for using compression if the per-pool property
+              ``compression_mode`` is not set. ``none`` means never use
+              compression.  ``passive`` means use compression when
+              `clients hint`_ that data is compressible.  ``aggressive`` means
+              use compression unless clients hint that data is not compressible.
+              ``force`` means use compression under all circumstances even if
+              the clients hint that the data is not compressible.
+:Type: String
+:Required: No
+:Valid Settings: ``none``, ``passive``, ``aggressive``, ``force``
+:Default: ``none``
+
+``bluestore compression required ratio``
+
+:Description: The ratio of the size of the data chunk after
+              compression relative to the original size must be at
+              least this small in order to store the compressed
+              version.
+
+:Type: Floating point
+:Required: No
+:Default: .875
+
+``bluestore compression min blob size``
+
+:Description: Chunks smaller than this are never compressed.
+              The per-pool property ``compression_min_blob_size`` overrides
+              this setting.
+
+:Type: Unsigned Integer
+:Required: No
+:Default: 0
+
+``bluestore compression min blob size hdd``
+
+:Description: Default value of ``bluestore compression min blob size``
+              for rotational media.
+
+:Type: Unsigned Integer
+:Required: No
+:Default: 128K
+
+``bluestore compression min blob size ssd``
+
+:Description: Default value of ``bluestore compression min blob size``
+              for non-rotational (solid state) media.
+
+:Type: Unsigned Integer
+:Required: No
+:Default: 8K
+
+``bluestore compression max blob size``
+
+:Description: Chunks larger than this are broken into smaller blobs sizing
+              ``bluestore compression max blob size`` before being compressed.
+              The per-pool property ``compression_max_blob_size`` overrides
+              this setting.
+
+:Type: Unsigned Integer
+:Required: No
+:Default: 0
+
+``bluestore compression max blob size hdd``
+
+:Description: Default value of ``bluestore compression max blob size``
+              for rotational media.
+
+:Type: Unsigned Integer
+:Required: No
+:Default: 512K
+
+``bluestore compression max blob size ssd``
+
+:Description: Default value of ``bluestore compression max blob size``
+              for non-rotational (solid state) media.
+
+:Type: Unsigned Integer
+:Required: No
+:Default: 64K
+
+.. _clients hint: ../../api/librados/#rados_set_alloc_hint
diff --git a/ceph/doc/rados/configuration/filesystem-recommendations.rst b/ceph/doc/rados/configuration/filesystem-recommendations.rst
deleted file mode 100644 (file)
index c967d60..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-===========================================
- Hard Disk and File System Recommendations
-===========================================
-
-.. index:: hard drive preparation
-
-Hard Drive Prep
-===============
-
-Ceph aims for data safety, which means that when the :term:`Ceph Client`
-receives notice that data was written to a storage drive, that data was actually
-written to the storage drive. For old kernels (<2.6.33), disable the write cache
-if the journal is on a raw drive. Newer kernels should work fine.
-
-Use ``hdparm`` to disable write caching on the hard disk::
-
-       sudo hdparm -W 0 /dev/hda 0
-
-In production environments, we recommend running a :term:`Ceph OSD Daemon` with
-separate drives for the operating system and the data. If you run data and an
-operating system on a single disk, we recommend creating a separate partition
-for your data.
-
-.. index:: filesystems
-
-Filesystems
-===========
-
-Ceph OSD Daemons rely heavily upon the stability and performance of the
-underlying filesystem.
-
-Recommended
------------
-
-We currently recommend ``XFS`` for production deployments.
-
-Not recommended
----------------
-
-We recommand *against* using ``btrfs`` due to the lack of a stable
-version to test against and frequent bugs in the ENOSPC handling.
-
-We recommend *against* using ``ext4`` due to limitations in the size
-of xattrs it can store, and the problems this causes with the way Ceph
-handles long RADOS object names.  Although these issues will generally
-not surface with Ceph clusters using only short object names (e.g., an
-RBD workload that does not include long RBD image names), other users
-like RGW make extensive use of long object names and can break.
-
-Starting with the Jewel release, the ``ceph-osd`` daemon will refuse
-to start if the configured max object name cannot be safely stored on
-``ext4``.  If the cluster is only being used with short object names
-(e.g., RBD only), you can continue using ``ext4`` by setting the
-following configuration option::
-
-  osd max object name len = 256
-  osd max object namespace len = 64
-
-.. note:: This may result in difficult-to-diagnose errors if you try
-          to use RGW or other librados clients that do not properly
-          handle or politely surface any resulting ENAMETOOLONG
-          errors.
index b609b155e4b8d7751595a2cbfb490135b0e7f54a..48b58efb707434a1f21b2b66ef98357671341ac7 100644 (file)
@@ -32,7 +32,7 @@ For general object store configuration, refer to the following:
 .. toctree::
    :maxdepth: 1
 
-   Disks and Filesystems <filesystem-recommendations>
+   Storage devices <storage-devices>
    ceph-conf
 
 
@@ -51,7 +51,8 @@ To optimize the performance of your cluster, refer to the following:
    mon-lookup-dns
    Heartbeat Settings <mon-osd-interaction>
    OSD Settings <osd-config-ref>
-   Filestore Settings <filestore-config-ref>
+   BlueStore Settings <bluestore-config-ref>
+   FileStore Settings <filestore-config-ref>
    Journal Settings <journal-ref>
    Pool, PG & CRUSH Settings <pool-pg-config-ref.rst>
    Messaging Settings <ms-ref>
index 30e679fd2732e1d01e47ac8828642c16e907371b..fae7078930b25c8bc19aa1181b660163ea14a342 100644 (file)
@@ -988,6 +988,15 @@ perform well in a degraded state.
 :Type: Float
 :Default: ``0``
 
+
+``osd recovery sleep hybrid``
+
+:Description: Time in seconds to sleep before next recovery or backfill op
+              when osd data is on HDD and osd journal is on SSD.
+
+:Type: Float
+:Default: ``0.025``
+
 Tiering
 =======
 
diff --git a/ceph/doc/rados/configuration/storage-devices.rst b/ceph/doc/rados/configuration/storage-devices.rst
new file mode 100644 (file)
index 0000000..83c0c9b
--- /dev/null
@@ -0,0 +1,83 @@
+=================
+ Storage Devices
+=================
+
+There are two Ceph daemons that store data on disk:
+
+* **Ceph OSDs** (or Object Storage Daemons) are where most of the
+  data is stored in Ceph.  Generally speaking, each OSD is backed by
+  a single storage device, like a traditional hard disk (HDD) or
+  solid state disk (SSD).  OSDs can also be backed by a combination
+  of devices, like a HDD for most data and an SSD (or partition of an
+  SSD) for some metadata.  The number of OSDs in a cluster is
+  generally a function of how much data will be stored, how big each
+  storage device will be, and the level and type of redundancy
+  (replication or erasure coding).
+* **Ceph Monitor** daemons manage critical cluster state like cluster
+  membership and authentication information.  For smaller clusters a
+  few gigabytes is all that is needed, although for larger clusters
+  the monitor database can reach tens or possibly hundreds of
+  gigabytes.
+
+
+OSD Backends
+============
+
+There are two ways that OSDs can manage the data they store.  Starting
+with the Luminous 12.2.z release, the new default (and recommended) backend is
+*BlueStore*.  Prior to Luminous, the default (and only option) was
+*FileStore*.
+
+BlueStore
+---------
+
+BlueStore is a special-purpose storage backend designed specifically
+for managing data on disk for Ceph OSD workloads.  It is motivated by
+experience supporting and managing OSDs using FileStore over the
+last ten years.  Key BlueStore features include:
+
+* Direct management of storage devices.  BlueStore consumes raw block
+  devices or partitions.  This avoids any intervening layers of
+  abstraction (such as local file systems like XFS) that may limit
+  performance or add complexity.
+* Metadata management with RocksDB.  We embed RocksDB's key/value database
+  in order to manage internal metadata, such as the mapping from object
+  names to block locations on disk.
+* Full data and metadata checksumming.  By default all data and
+  metadata written to BlueStore is protected by one or more
+  checksums.  No data or metadata will be read from disk or returned
+  to the user without being verified.
+* Inline compression.  Data written may be optionally compressed
+  before being written to disk.
+* Multi-device metadata tiering.  BlueStore allows its internal
+  journal (write-ahead log) to be written to a separate, high-speed
+  device (like an SSD, NVMe, or NVDIMM) to increased performance.  If
+  a significant amount of faster storage is available, internal
+  metadata can also be stored on the faster device.
+* Efficient copy-on-write.  RBD and CephFS snapshots rely on a
+  copy-on-write *clone* mechanism that is implemented efficiently in
+  BlueStore.  This results in efficient IO both for regular snapshots
+  and for erasure coded pools (which rely on cloning to implement
+  efficient two-phase commits).
+
+For more information, see :doc:`bluestore-config-ref`.
+
+FileStore
+---------
+
+FileStore is the legacy approach to storing objects in Ceph.  It
+relies on a standard file system (normally XFS) in combination with a
+key/value database (traditionally LevelDB, now RocksDB) for some
+metadata.
+
+FileStore is well-tested and widely used in production but suffers
+from many performance deficiencies due to its overall design and
+reliance on a traditional file system for storing object data.
+
+Although FileStore is generally capable of functioning on most
+POSIX-compatible file systems (including btrfs and ext4), we only
+recommend that XFS be used.  Both btrfs and ext4 have known bugs and
+deficiencies and their use may lead to data loss.  By default all Ceph
+provisioning tools will use XFS.
+
+For more information, see :doc:`filestore-config-ref`.
index 9ff756c97d36b9de8fe07b0c4ea9dd3cec6fc2f0..929bb7efacbcffd0d898deea46ce38b57579311f 100644 (file)
@@ -70,7 +70,7 @@ the Ceph Storage Cluster.
 
        </td></tr></tbody></table>
 
-.. _Ceph Block Devices: ../rbd/rbd
+.. _Ceph Block Devices: ../rbd/
 .. _Ceph Filesystem: ../cephfs/
 .. _Ceph Object Storage: ../radosgw/
 .. _Deployment: ../rados/deployment/
index d07844c351a69dac5258125cf60258266ad00938..05fa4ff691aef6a21590b1e796f3c7286224c31b 100644 (file)
@@ -206,6 +206,43 @@ You can view the contents of the rules with::
 
   ceph osd crush rule dump
 
+Device classes
+--------------
+
+Each device can optionally have a *class* associated with it.  By
+default, OSDs automatically set their class on startup to either
+`hdd`, `ssd`, or `nvme` based on the type of device they are backed
+by.
+
+The device class for one or more OSDs can be explicitly set with::
+
+  ceph osd crush set-device-class <class> <osd-name> [...]
+
+Once a device class is set, it cannot be changed to another class
+until the old class is unset with::
+
+  ceph osd crush rm-device-class <osd-name> [...]
+
+This allows administrators to set device classes without the class
+being changed on OSD restart or by some other script.
+
+A placement rule that targets a specific device class can be created with::
+
+  ceph osd crush rule create-replicated <rule-name> <root> <failure-domain> <class>
+
+A pool can then be changed to use the new rule with::
+
+  ceph osd pool set <pool-name> crush_rule <rule-name>
+
+Device classes are implemented by creating a "shadow" CRUSH hierarchy
+for each device class in use that contains only devices of that class.
+Rules can then distribute data over the shadow hierarchy.  One nice
+thing about this approach is that it is fully backward compatible with
+old Ceph clients.  You can view the CRUSH hierarchy with shadow items
+with::
+
+  ceph osd crush tree --show-shadow
+
 
 Weights sets
 ------------
index 68106352e1749e522c6e7f9e67ccf57c2e6a34e1..6164355798680374f3889d8b079cb1ceaaac7758 100644 (file)
@@ -220,7 +220,7 @@ or delete some existing data to reduce utilization.
 
 
 Data health (pools & placement groups)
-------------------------------
+--------------------------------------
 
 PG_AVAILABILITY
 _______________
@@ -523,23 +523,3 @@ happen if they are misplaced or degraded (see *PG_AVAILABILITY* and
 You can manually initiate a scrub of a clean PG with::
 
   ceph pg deep-scrub <pgid>
-
-CephFS
-------
-
-FS_WITH_FAILED_MDS
-__________________
-
-
-FS_DEGRADED
-___________
-
-
-MDS_INSUFFICIENT_STANDBY
-________________________
-
-
-MDS_DAMAGED
-___________
-
-
index ce0ae9095240240d121473de1b31a393450cb39b..70155937cad0d81ded21f7e88137b69440a9df10 100644 (file)
@@ -275,6 +275,37 @@ To set a value to a pool, execute the following::
        
 You may set values for the following keys: 
 
+.. _compression_algorithm:
+
+``compression_algorithm``
+:Description: Sets inline compression algorithm to use for underlying BlueStore.
+              This setting overrides the `global setting <rados/configuration/bluestore-config-ref/#inline-compression>`_ of ``bluestore compression algorithm``.
+
+:Type: String
+:Valid Settings: ``lz4``, ``snappy``, ``zlib``, ``zstd``
+
+``compression_mode``
+
+:Description: Sets the policy for the inline compression algorithm for underlying BlueStore.
+              This setting overrides the `global setting <rados/configuration/bluestore-config-ref/#inline-compression>`_ of ``bluestore compression mode``.
+
+:Type: String
+:Valid Settings: ``none``, ``passive``, ``aggressive``, ``force``
+
+``compression_min_blob_size``
+
+:Description: Chunks smaller than this are never compressed.
+              This setting overrides the `global setting <rados/configuration/bluestore-config-ref/#inline-compression>`_ of ``bluestore compression min blob *``.
+
+:Type: Unsigned Integer
+
+``compression_max_blob_size``
+
+:Description: Chunks larger than this are broken into smaller blobs sizing
+              ``compression_max_blob_size`` before being compressed.
+
+:Type: Unsigned Integer
+
 .. _size:
 
 ``size``
index a4c5884004600a17de2bd5d327931ae4825921d6..8a35a501ab110dfc9ddbac9c566801bd109da869 100644 (file)
@@ -113,10 +113,8 @@ Capability syntax follows the form::
        osd 'allow {access} [pool={pool-name} [namespace={namespace-name}]]'
        osd 'profile {name} [pool={pool-name} [namespace={namespace-name}]]'
 
-- **Metadata Server Caps:** Metadata server capability simply requires ``allow``, 
-  or blank and does not parse anything further. :: 
-
-       mds 'allow'
+- **Metadata Server Caps:** For administrators, use ``allow *``.  For all
+  other users, such as CephFS clients, consult :doc:`/cephfs/client-auth`
 
 
 .. note:: The Ceph Object Gateway daemon (``radosgw``) is a client of the 
index 28c02164b0d84749afd3c3194d4ccaeb5abb15d7..422dd16527a446264c2cbbad123d792b4b2d9f6e 100644 (file)
@@ -321,6 +321,11 @@ generated key is added to the keyring without replacing an existing key pair.
 If ``access-key`` is specified and refers to an existing key owned by the user
 then it will be modified.
 
+.. versionadded:: Luminous
+
+A ``tenant`` may either be specified as a part of uid or as an additional
+request param.
+
 :caps: users=write
 
 Syntax
@@ -342,6 +347,7 @@ Request Parameters
 :Type: String
 :Example: ``foo_user``
 :Required: Yes
+A tenant name may also specified as a part of ``uid``, by following the syntax ``tenant$user``, refer to `Multitenancy`_ for more details.
 
 ``display-name``
 
@@ -408,6 +414,14 @@ Request Parameters
 :Example: False [False]
 :Required: No
 
+.. versionadded:: Jewel
+``tenant``
+
+:Description: the Tenant under which a user is a part of.
+:Type: string
+:Example: tenant1
+:Required: No
+
 Response Entities
 ~~~~~~~~~~~~~~~~~
 
@@ -418,6 +432,11 @@ If successful, the response contains the user information.
 :Description: A container for the user data information.
 :Type: Container
 
+``tenant``
+:Description: The tenant which user is a part of
+:Type: String
+:Parent: ``user``
+
 ``user_id``
 
 :Description: The user id.
@@ -1924,3 +1943,4 @@ Standard Error Responses
 
 .. _Admin Guide: ../admin
 .. _Quota Management: ../admin#quota-management
+.. _Multitenancy: ./multitenancy
diff --git a/ceph/doc/rbd/api/index.rst b/ceph/doc/rbd/api/index.rst
new file mode 100644 (file)
index 0000000..71f6809
--- /dev/null
@@ -0,0 +1,8 @@
+========================
+ Ceph Block Device APIs
+========================
+
+.. toctree::
+   :maxdepth: 2
+
+   librados (Python) <librbdpy>
diff --git a/ceph/doc/rbd/api/librbdpy.rst b/ceph/doc/rbd/api/librbdpy.rst
new file mode 100644 (file)
index 0000000..fa90331
--- /dev/null
@@ -0,0 +1,82 @@
+================
+ Librbd (Python)
+================
+
+.. highlight:: python
+
+The `rbd` python module provides file-like access to RBD images.
+
+
+Example: Creating and writing to an image
+=========================================
+
+To use `rbd`, you must first connect to RADOS and open an IO
+context::
+
+    cluster = rados.Rados(conffile='my_ceph.conf')
+    cluster.connect()
+    ioctx = cluster.open_ioctx('mypool')
+
+Then you instantiate an :class:rbd.RBD object, which you use to create the
+image::
+
+    rbd_inst = rbd.RBD()
+    size = 4 * 1024**3  # 4 GiB
+    rbd_inst.create(ioctx, 'myimage', size)
+
+To perform I/O on the image, you instantiate an :class:rbd.Image object::
+
+    image = rbd.Image(ioctx, 'myimage')
+    data = 'foo' * 200
+    image.write(data, 0)
+
+This writes 'foo' to the first 600 bytes of the image. Note that data
+cannot be :type:unicode - `Librbd` does not know how to deal with
+characters wider than a :c:type:char.
+
+In the end, you will want to close the image, the IO context and the connection to RADOS::
+
+    image.close()
+    ioctx.close()
+    cluster.shutdown()
+
+To be safe, each of these calls would need to be in a separate :finally
+block::
+
+    cluster = rados.Rados(conffile='my_ceph_conf')
+    try:
+        ioctx = cluster.open_ioctx('my_pool')
+        try:
+            rbd_inst = rbd.RBD()
+            size = 4 * 1024**3  # 4 GiB
+            rbd_inst.create(ioctx, 'myimage', size)
+            image = rbd.Image(ioctx, 'myimage')
+            try:
+                data = 'foo' * 200
+                image.write(data, 0)
+            finally:
+                image.close()
+        finally:
+            ioctx.close()
+    finally:
+        cluster.shutdown()
+
+This can be cumbersome, so the :class:`Rados`, :class:`Ioctx`, and
+:class:`Image` classes can be used as context managers that close/shutdown
+automatically (see :pep:`343`). Using them as context managers, the
+above example becomes::
+
+    with rados.Rados(conffile='my_ceph.conf') as cluster:
+        with cluster.open_ioctx('mypool') as ioctx:
+            rbd_inst = rbd.RBD()
+            size = 4 * 1024**3  # 4 GiB
+            rbd_inst.create(ioctx, 'myimage', size)
+            with rbd.Image(ioctx, 'myimage') as image:
+                data = 'foo' * 200
+                image.write(data, 0)
+
+API Reference
+=============
+
+.. automodule:: rbd
+    :members: RBD, Image, SnapIterator
diff --git a/ceph/doc/rbd/index.rst b/ceph/doc/rbd/index.rst
new file mode 100644 (file)
index 0000000..5d9d433
--- /dev/null
@@ -0,0 +1,74 @@
+===================
+ Ceph Block Device
+===================
+
+.. index:: Ceph Block Device; introduction
+
+A block is a sequence of bytes (for example, a 512-byte block of data).
+Block-based storage interfaces are the most common way to store data with
+rotating media such as hard disks, CDs, floppy disks, and even traditional
+9-track tape. The ubiquity of block device interfaces makes a virtual block
+device an ideal candidate to interact with a mass data storage system like Ceph.
+
+Ceph block devices are thin-provisioned, resizable and store data striped over
+multiple OSDs in a Ceph cluster.  Ceph block devices leverage
+:abbr:`RADOS (Reliable Autonomic Distributed Object Store)` capabilities
+such as snapshotting, replication and consistency. Ceph's 
+:abbr:`RADOS (Reliable Autonomic Distributed Object Store)` Block Devices (RBD) 
+interact with OSDs using kernel modules or the ``librbd`` library.
+
+.. ditaa::  +------------------------+ +------------------------+
+            |     Kernel Module      | |        librbd          |
+            +------------------------+-+------------------------+
+            |                   RADOS Protocol                  |
+            +------------------------+-+------------------------+
+            |          OSDs          | |        Monitors        |
+            +------------------------+ +------------------------+
+
+.. note:: Kernel modules can use Linux page caching. For ``librbd``-based 
+   applications, Ceph supports `RBD Caching`_.
+
+Ceph's block devices deliver high performance with infinite scalability to
+`kernel modules`_, or to :abbr:`KVMs (kernel virtual machines)` such as `QEMU`_, and
+cloud-based computing systems like `OpenStack`_ and `CloudStack`_ that rely on
+libvirt and QEMU to integrate with Ceph block devices. You can use the same cluster
+to operate the `Ceph RADOS Gateway`_, the `Ceph FS filesystem`_, and Ceph block
+devices simultaneously.
+
+.. important:: To use Ceph Block Devices, you must have access to a running 
+   Ceph cluster.
+
+.. toctree::
+       :maxdepth: 1
+
+       Commands <rados-rbd-cmds>
+       Kernel Modules <rbd-ko>
+       Snapshots<rbd-snapshot>
+        Mirroring <rbd-mirroring>
+       QEMU <qemu-rbd>
+       libvirt <libvirt>
+       Cache Settings <rbd-config-ref/>
+       OpenStack <rbd-openstack>
+       CloudStack <rbd-cloudstack>
+       RBD Replay <rbd-replay>
+
+.. toctree::
+       :maxdepth: 2
+
+       Manpages <man/index>
+
+.. toctree::
+       :maxdepth: 2
+
+       APIs <api/index>
+
+       
+       
+
+.. _RBD Caching: ../rbd-config-ref/
+.. _kernel modules: ../rbd-ko/
+.. _QEMU: ../qemu-rbd/
+.. _OpenStack: ../rbd-openstack
+.. _CloudStack: ../rbd-cloudstack
+.. _Ceph RADOS Gateway: ../../radosgw/
+.. _Ceph FS filesystem: ../../cephfs/
diff --git a/ceph/doc/rbd/librbdpy.rst b/ceph/doc/rbd/librbdpy.rst
deleted file mode 100644 (file)
index fa90331..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-================
- Librbd (Python)
-================
-
-.. highlight:: python
-
-The `rbd` python module provides file-like access to RBD images.
-
-
-Example: Creating and writing to an image
-=========================================
-
-To use `rbd`, you must first connect to RADOS and open an IO
-context::
-
-    cluster = rados.Rados(conffile='my_ceph.conf')
-    cluster.connect()
-    ioctx = cluster.open_ioctx('mypool')
-
-Then you instantiate an :class:rbd.RBD object, which you use to create the
-image::
-
-    rbd_inst = rbd.RBD()
-    size = 4 * 1024**3  # 4 GiB
-    rbd_inst.create(ioctx, 'myimage', size)
-
-To perform I/O on the image, you instantiate an :class:rbd.Image object::
-
-    image = rbd.Image(ioctx, 'myimage')
-    data = 'foo' * 200
-    image.write(data, 0)
-
-This writes 'foo' to the first 600 bytes of the image. Note that data
-cannot be :type:unicode - `Librbd` does not know how to deal with
-characters wider than a :c:type:char.
-
-In the end, you will want to close the image, the IO context and the connection to RADOS::
-
-    image.close()
-    ioctx.close()
-    cluster.shutdown()
-
-To be safe, each of these calls would need to be in a separate :finally
-block::
-
-    cluster = rados.Rados(conffile='my_ceph_conf')
-    try:
-        ioctx = cluster.open_ioctx('my_pool')
-        try:
-            rbd_inst = rbd.RBD()
-            size = 4 * 1024**3  # 4 GiB
-            rbd_inst.create(ioctx, 'myimage', size)
-            image = rbd.Image(ioctx, 'myimage')
-            try:
-                data = 'foo' * 200
-                image.write(data, 0)
-            finally:
-                image.close()
-        finally:
-            ioctx.close()
-    finally:
-        cluster.shutdown()
-
-This can be cumbersome, so the :class:`Rados`, :class:`Ioctx`, and
-:class:`Image` classes can be used as context managers that close/shutdown
-automatically (see :pep:`343`). Using them as context managers, the
-above example becomes::
-
-    with rados.Rados(conffile='my_ceph.conf') as cluster:
-        with cluster.open_ioctx('mypool') as ioctx:
-            rbd_inst = rbd.RBD()
-            size = 4 * 1024**3  # 4 GiB
-            rbd_inst.create(ioctx, 'myimage', size)
-            with rbd.Image(ioctx, 'myimage') as image:
-                data = 'foo' * 200
-                image.write(data, 0)
-
-API Reference
-=============
-
-.. automodule:: rbd
-    :members: RBD, Image, SnapIterator
diff --git a/ceph/doc/rbd/man/index.rst b/ceph/doc/rbd/man/index.rst
new file mode 100644 (file)
index 0000000..33a192a
--- /dev/null
@@ -0,0 +1,16 @@
+============================
+ Ceph Block Device Manpages
+============================
+
+.. toctree::
+   :maxdepth: 1
+
+   rbd <../../man/8/rbd>
+   rbd-fuse <../../man/8/rbd-fuse>
+   rbd-nbd <../../man/8/rbd-nbd>
+   rbd-ggate <../../man/8/rbd-ggate>
+   ceph-rbdnamer <../../man/8/ceph-rbdnamer>
+   rbd-replay-prep <../../man/8/rbd-replay-prep>
+   rbd-replay <../../man/8/rbd-replay>
+   rbd-replay-many <../../man/8/rbd-replay-many>
+   rbd-map <../../man/8/rbdmap>
index 6ce2fdc2c2c26627745cd7868a81dceef4bfd6bb..db942f88c786b251884e275847679de73faded08 100644 (file)
@@ -98,7 +98,7 @@ section of your configuration file. The settings include:
 :Required: No
 :Default: ``true``
 
-.. _Block Device: ../../rbd/rbd/
+.. _Block Device: ../../rbd
 
 
 Read-ahead Settings
index 5f1c1148907d0cc9a16c961072e5ada5cd6a51e5..e4db92832768a118240c0af36f899c31fd7d1707 100644 (file)
@@ -288,13 +288,24 @@ distribution package.
 
 .. important:: Each ``rbd-mirror`` daemon requires the ability to connect
    to both clusters simultaneously.
-.. warning:: Only run a single ``rbd-mirror`` daemon per Ceph cluster. A
-   future Ceph release will add support for horizontal scale-out of the
-   ``rbd-mirror`` daemon.
+.. warning:: Pre-Luminous releases: only run a single ``rbd-mirror`` daemon per
+   Ceph cluster.
+
+Each ``rbd-mirror`` daemon should use a unique Ceph user ID. To
+`create a Ceph user`_, with ``ceph`` specify the ``auth get-or-create``
+command, user name, monitor caps, and OSD caps::
+
+  ceph auth get-or-create client.rbd-mirror.{unique id} mon 'profile rbd' osd 'profile rbd'
+
+The ``rbd-mirror`` daemon can be managed by ``systemd`` by specifying the user
+ID as the daemon instance::
+
+  systemctl enable ceph-rbd-mirror@rbd-mirror.{unique id}
 
 .. _rbd: ../../man/8/rbd
 .. _ceph-conf: ../../rados/configuration/ceph-conf/#running-multiple-clusters
 .. _explicitly enabled: #enable-image-mirroring
 .. _force resync command: #force-image-resync
 .. _demote the image: #image-promotion-and-demotion
+.. _create a Ceph user: ../../rados/operations/user-management#add-a-user
 
diff --git a/ceph/doc/rbd/rbd.rst b/ceph/doc/rbd/rbd.rst
deleted file mode 100644 (file)
index e27e8c6..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-===================
- Ceph Block Device
-===================
-
-.. index:: Ceph Block Device; introduction
-
-A block is a sequence of bytes (for example, a 512-byte block of data).
-Block-based storage interfaces are the most common way to store data with
-rotating media such as hard disks, CDs, floppy disks, and even traditional
-9-track tape. The ubiquity of block device interfaces makes a virtual block
-device an ideal candidate to interact with a mass data storage system like Ceph.
-
-Ceph block devices are thin-provisioned, resizable and store data striped over
-multiple OSDs in a Ceph cluster.  Ceph block devices leverage
-:abbr:`RADOS (Reliable Autonomic Distributed Object Store)` capabilities
-such as snapshotting, replication and consistency. Ceph's 
-:abbr:`RADOS (Reliable Autonomic Distributed Object Store)` Block Devices (RBD) 
-interact with OSDs using kernel modules or the ``librbd`` library.
-
-.. ditaa::  +------------------------+ +------------------------+
-            |     Kernel Module      | |        librbd          |
-            +------------------------+-+------------------------+
-            |                   RADOS Protocol                  |
-            +------------------------+-+------------------------+
-            |          OSDs          | |        Monitors        |
-            +------------------------+ +------------------------+
-
-.. note:: Kernel modules can use Linux page caching. For ``librbd``-based 
-   applications, Ceph supports `RBD Caching`_.
-
-Ceph's block devices deliver high performance with infinite scalability to
-`kernel modules`_, or to :abbr:`KVMs (kernel virtual machines)` such as `QEMU`_, and
-cloud-based computing systems like `OpenStack`_ and `CloudStack`_ that rely on
-libvirt and QEMU to integrate with Ceph block devices. You can use the same cluster
-to operate the `Ceph RADOS Gateway`_, the `Ceph FS filesystem`_, and Ceph block
-devices simultaneously.
-
-.. important:: To use Ceph Block Devices, you must have access to a running 
-   Ceph cluster.
-
-.. toctree::
-       :maxdepth: 1
-
-       Commands <rados-rbd-cmds>
-       Kernel Modules <rbd-ko>
-       Snapshots<rbd-snapshot>
-        Mirroring <rbd-mirroring>
-       QEMU <qemu-rbd>
-       libvirt <libvirt>
-       Cache Settings <rbd-config-ref/>
-       OpenStack <rbd-openstack>
-       CloudStack <rbd-cloudstack>
-       Manpage rbd <../../man/8/rbd>
-       Manpage rbd-fuse <../../man/8/rbd-fuse>
-       Manpage rbd-nbd <../../man/8/rbd-nbd>
-       Manpage ceph-rbdnamer <../../man/8/ceph-rbdnamer>
-       RBD Replay <rbd-replay>
-       Manpage rbd-replay-prep <../../man/8/rbd-replay-prep>
-       Manpage rbd-replay <../../man/8/rbd-replay>
-       Manpage rbd-replay-many <../../man/8/rbd-replay-many>
-       Manpage rbdmap <../../man/8/rbdmap>
-       librbd <librbdpy>
-       
-       
-
-.. _RBD Caching: ../rbd-config-ref/
-.. _kernel modules: ../rbd-ko/
-.. _QEMU: ../qemu-rbd/
-.. _OpenStack: ../rbd-openstack
-.. _CloudStack: ../rbd-cloudstack
-.. _Ceph RADOS Gateway: ../../radosgw/
-.. _Ceph FS filesystem: ../../cephfs/
index 299b0a3d82aa6ac54c8d04461dfa0c2537726a06..db0dbbc8b049b9c917007dc0665c822b4946486a 100644 (file)
@@ -2,11 +2,11 @@
  Release Notes
 ===============
 
-v12.1.0 Luminous (RC)
+v12.1.2 Luminous (RC)
 =====================
 
-This is the first release candidate for Luminous, the next long term
-stable release.
+This is the third release candidate for Luminous, the next long term stable
+release.
 
 Ceph Luminous (v12.2.0) will be the foundation for the next long-term
 stable release series.  There have been major changes since Kraken
@@ -18,26 +18,32 @@ Major Changes from Kraken
 
 - *General*:
 
-  * Ceph now has a simple, built-in web-based dashboard for monitoring
-    cluster status.  See :doc:`/mgr/dashboard/`.
+  * Ceph now has a simple, `built-in web-based dashboard
+    <../mgr/dashboard>`_ for monitoring cluster status.
 
 - *RADOS*:
 
   * *BlueStore*:
 
-    - The new *BlueStore* backend for *ceph-osd* is now stable and the new
-      default for newly created OSDs.  BlueStore manages data stored by each OSD
-      by directly managing the physical HDDs or SSDs without the use of an
-      intervening file system like XFS.  This provides greater performance
-      and features. FIXME DOCS
-    - BlueStore supports *full data and metadata checksums* of all
+    - The new *BlueStore* backend for *ceph-osd* is now stable and the
+      new default for newly created OSDs.  BlueStore manages data
+      stored by each OSD by directly managing the physical HDDs or
+      SSDs without the use of an intervening file system like XFS.
+      This provides greater performance and features. See
+      :doc:`/rados/configuration/storage-devices` and
+      :doc:`/rados/configuration/bluestore-config-ref`.
+    - BlueStore supports `full data and metadata checksums
+      <../rados/configuration/bluestore-config-ref/#checksums`_ of all
       data stored by Ceph.
-    - BlueStore supports inline compression using zlib, snappy, or LZ4.  (Ceph
-      also supports zstd for RGW compression but zstd is not recommended for
-      BlueStore for performance reasons.)  FIXME DOCS
+    - BlueStore supports `inline compression
+      <../rados/configuration/bluestore-config-ref/#inline-compression>`_ using
+      zlib, snappy, or LZ4. (Ceph also supports zstd for `RGW compression
+      <../man/8/radosgw-admin/#options>`_ but zstd is not recommended for
+      BlueStore for performance reasons.)
 
-  * *Erasure coded* pools now have full support for *overwrites*,
-    allowing them to be used with RBD and CephFS.  See :doc:`/rados/operations/erasure-code/#erasure-coding-with-overwrites`.
+  * *Erasure coded* pools now have `full support for overwrites
+    <../rados/operations/erasure-code/#erasure-coding-with-overwrites>`_,
+    allowing them to be used with RBD and CephFS.
 
   * *ceph-mgr*:
 
@@ -46,34 +52,38 @@ Major Changes from Kraken
       down, metrics will not refresh and some metrics-related calls
       (e.g., ``ceph df``) may block.  We recommend deploying several instances of
       *ceph-mgr* for reliability.  See the notes on `Upgrading`_ below.
-    - The *ceph-mgr* daemon includes a REST-based management API.  The
-      API is still experimental and somewhat limited but will form the basis
-      for API-based management of Ceph going forward.  See :doc:`/mgr/restful`.
-    - *ceph-mgr* also includes a Prometheus exporter plugin, which can
-      provide Ceph perfcounters to Prometheus.  See :doc:`/mgr/prometheus`.
+    - The *ceph-mgr* daemon includes a `REST-based management API
+      <../mgr/restful>`_. The API is still experimental and somewhat limited but
+      will form the basis for API-based management of Ceph going forward.
+    - *ceph-mgr* also includes a `Prometheus exporter <../mgr/prometheus>`_
+       plugin, which can provide Ceph perfcounters to Prometheus.
 
   * The overall *scalability* of the cluster has improved. We have
     successfully tested clusters with up to 10,000 OSDs.
-  * Each OSD can now have a *device class* associated with it (e.g.,
-    `hdd` or `ssd`), allowing CRUSH rules to trivially map data to a
-    subset of devices in the system.  Manually writing CRUSH rules or
-    manual editing of the CRUSH is normally not required.  See
-    :doc:`/rados/operations/crush-map/#crush-structure`.
+  * Each OSD can now have a `device class
+    <../rados/operations/crush-map/#device-classes>`_ associated with
+    it (e.g., `hdd` or `ssd`), allowing CRUSH rules to trivially map
+    data to a subset of devices in the system.  Manually writing CRUSH
+    rules or manual editing of the CRUSH is normally not required.
   * You can now *optimize CRUSH weights* to maintain a *near-perfect
     distribution of data* across OSDs.  FIXME DOCS
-  * There is also a new `upmap` exception mechanism that allows
-    individual PGs to be moved around to achieve a *perfect
-    distribution* (this requires luminous clients). See
-    :doc:`/rados/operations/upmap`.
+  * There is also a new `upmap <../rados/operations/upmap>`_ exception
+    mechanism that allows individual PGs to be moved around to achieve
+    a *perfect distribution* (this requires luminous clients).
   * Each OSD now adjusts its default configuration based on whether the
     backing device is an HDD or SSD.  Manual tuning generally not required.
-  * The prototype `mClock QoS queueing algorithm </rados/configuration/osd-config-ref/#qos-based-on-mclock>` is now available.
+  * The prototype `mClock QoS queueing algorithm
+    <../rados/configuration/osd-config-ref/#qos-based-on-mclock>`_ is now
+    available.
   * There is now a *backoff* mechanism that prevents OSDs from being
     overloaded by requests to objects or PGs that are not currently able to
     process IO.
-  * There is a simplified OSD replacement process that is more robust (see :doc:`/rados/operations/add-or-rm-osds/#replacing-an-osd`).
+  * There is a simplified `OSD replacement process
+    <../rados/operations/add-or-rm-osds/#replacing-an-osd>`_ that is more
+    robust.
   * You can query the supported features and (apparent) releases of
-    all connected daemons and clients with `ceph features </man/8/ceph#features>`_.
+    all connected daemons and clients with `ceph features
+    <../man/8/ceph#features>`_.
   * You can configure the oldest Ceph client version you wish to allow to
     connect to the cluster via ``ceph osd set-require-min-compat-client`` and
     Ceph will prevent you from enabling features that will break compatibility
@@ -115,6 +125,8 @@ Major Changes from Kraken
   * RBD mirroring's rbd-mirror daemon is now highly available. We
     recommend deploying several instances of rbd-mirror for
     reliability.
+  * RBD mirroring's rbd-mirror daemon should utilize unique Ceph user
+    IDs per instance to support the new mirroring dashboard.
   * The default 'rbd' pool is no longer created automatically during
     cluster creation. Additionally, the name of the default pool used
     by the rbd CLI when no pool is specified can be overridden via a
@@ -148,7 +160,8 @@ Major Changes from Kraken
 
 - *Miscellaneous*:
 
-  * Release packages are now being built for *Debian Stretch*.  The
+  * Release packages are now being built for *Debian Stretch*.  Note
+    that QA is limited to CentOS and Ubuntu (xenial and trusty).  The
     distributions we build for now includes:
 
     - CentOS 7 (x86_64 and aarch64)
@@ -157,8 +170,6 @@ Major Changes from Kraken
     - Ubuntu 16.04 Xenial (x86_64 and aarch64)
     - Ubuntu 14.04 Trusty (x86_64)
 
-    Note that QA is limited to CentOS and Ubuntu (xenial and trusty).
-
   * *CLI changes*:
 
     - The ``ceph -s`` or ``ceph status`` command has a fresh look.
@@ -208,6 +219,8 @@ Major Changes from Kraken
       disable the named mgr module.  The module must be present in the
       configured `mgr_module_path` on the host(s) where `ceph-mgr` is
       running.
+    - ``ceph osd crush ls <node>`` will list items (OSDs or other CRUSH nodes)
+      directly beneath a given CRUSH node.
     - ``ceph osd crush swap-bucket <src> <dest>`` will swap the
       contents of two CRUSH buckets in the hierarchy while preserving
       the buckets' ids.  This allows an entire subtree of devices to
@@ -234,14 +247,12 @@ Major Changes from Kraken
     - ``ceph osd reweightn`` will specify the `reweight` values for
       multiple OSDs in a single command.  This is equivalent to a series of
       ``ceph osd reweight`` commands.
-    - ``ceph osd crush class {rm,ls,ls-osd}`` manage the new
-      CRUSH *device class* feature.  ``ceph crush set-device-class
-      <class> <osd> [<osd>...]`` will set the class for particular devices.
-      Note that if you specify a non-existent class, it will be created
-      automatically. ``ceph crush rm-device-class <osd> [<osd>...]``
-      will instead remove the class for particular devices.
-      And if a class contains no more devices, it will be automatically
-      destoryed.
+    - ``ceph osd crush {set,rm}-device-class`` manage the new
+      CRUSH *device class* feature. Note that manually creating or deleting
+      a device class name is generally not necessary as it will be smart
+      enough to be self-managed. ``ceph osd crush class ls`` and 
+      ``ceph osd crush class ls-osd`` will output all existing device classes 
+      and a list of OSD ids under the given device class respectively.
     - ``ceph osd crush rule create-replicated`` replaces the old
       ``ceph osd crush rule create-simple`` command to create a CRUSH
       rule for a replicated pool.  Notably it takes a `class` argument
@@ -418,8 +429,64 @@ upgrade to Luminous.
 Upgrade compatibility notes, Kraken to Luminous
 -----------------------------------------------
 
+* The configuration option ``osd pool erasure code stripe width`` has
+  been replaced by ``osd pool erasure code stripe unit``, and given
+  the ability to be overridden by the erasure code profile setting
+  ``stripe_unit``. For more details see
+  :doc:`/rados/operations/erasure-code/#erasure-code-profiles`.
+
+* rbd and cephfs can use erasure coding with bluestore. This may be
+  enabled by setting ``allow_ec_overwrites`` to ``true`` for a pool. Since
+  this relies on bluestore's checksumming to do deep scrubbing,
+  enabling this on a pool stored on filestore is not allowed.
+
+* The ``rados df`` JSON output now prints numeric values as numbers instead of
+  strings.
+
+* The ``mon_osd_max_op_age`` option has been renamed to
+  ``mon_osd_warn_op_age`` (default: 32 seconds), to indicate we
+  generate a warning at this age.  There is also a new
+  ``mon_osd_err_op_age_ratio`` that is a expressed as a multitple of
+  ``mon_osd_warn_op_age`` (default: 128, for roughly 60 minutes) to
+  control when an error is generated.
+
+* The default maximum size for a single RADOS object has been reduced from
+  100GB to 128MB.  The 100GB limit was completely impractical in practice
+  while the 128MB limit is a bit high but not unreasonable.  If you have an
+  application written directly to librados that is using objects larger than
+  128MB you may need to adjust ``osd_max_object_size``.
+
+* The semantics of the ``rados ls`` and librados object listing
+  operations have always been a bit confusing in that "whiteout"
+  objects (which logically don't exist and will return ENOENT if you
+  try to access them) are included in the results.  Previously
+  whiteouts only occurred in cache tier pools.  In luminous, logically
+  deleted but snapshotted objects now result in a whiteout object, and
+  as a result they will appear in ``rados ls`` results, even though
+  trying to read such an object will result in ENOENT.  The ``rados
+  listsnaps`` operation can be used in such a case to enumerate which
+  snapshots are present.
+
+  This may seem a bit strange, but is less strange than having a
+  deleted-but-snapshotted object not appear at all and be completely
+  hidden from librados's ability to enumerate objects.  Future
+  versions of Ceph will likely include an alternative object
+  enumeration interface that makes it more natural and efficient to
+  enumerate all objects along with their snapshot and clone metadata.
+
+* The deprecated ``crush_ruleset`` property has finally been removed;
+  please use  ``crush_rule`` instead for the ``osd pool get ...`` and ``osd
+  pool set ...`` commands.
+
+* The ``osd pool default crush replicated ruleset`` option has been
+  removed and replaced by the ``psd pool default crush rule`` option.
+  By default it is -1, which means the mon will pick the first type
+  replicated rule in the CRUSH map for replicated pools.  Erasure
+  coded pools have rules that are automatically created for them if
+  they are not specified at pool creation time.
+
 * We no longer test the FileStore ceph-osd backend in combination with
-  ``btrfs``.  We recommend against using btrfs.  If you are using
+  btrfs.  We recommend against using btrfs.  If you are using
   btrfs-based OSDs and want to upgrade to luminous you will need to
   add the follwing to your ceph.conf::
 
@@ -488,6 +555,7 @@ Upgrade compatibility notes, Kraken to Luminous
     either the rados_nobjects_list_open (C) and nobjects_begin (C++) API or
     the new rados_object_list_begin (C) and object_list_begin (C++) API
     before updating the client-side librados library to Luminous.
+
     Object enumeration (via any API) with the latest librados version
     and pre-Hammer OSDs is no longer supported.  Note that no in-tree
     Ceph services rely on object enumeration via the deprecated APIs, so
@@ -519,6 +587,678 @@ Upgrade compatibility notes, Kraken to Luminous
     by "ceph tell mds.<id> ..."
 
 
+Notable Changes since v12.1.1 (RC1)
+-----------------------------------
+
+* choose_args encoding has been changed to make it architecture-independent.
+  If you deployed Luminous dev releases or 12.1.0 rc release and made use of
+  the CRUSH choose_args feature, you need to remove all choose_args mappings
+  from your CRUSH map before starting the upgrade.
+
+* The 'ceph health' structured output (JSON or XML) no longer contains
+  a 'timechecks' section describing the time sync status.  This
+  information is now available via the 'ceph time-sync-status'
+  command.
+
+* Certain extra fields in the 'ceph health' structured output that
+  used to appear if the mons were low on disk space (which duplicated
+  the information in the normal health warning messages) are now gone.
+
+* The "ceph -w" output no longer contains audit log entries by default.
+  Add a "--watch-channel=audit" or "--watch-channel=*" to see them.
+
+* The 'apply' mode of cephfs-journal-tool has been removed
+
+* Added new configuration "public bind addr" to support dynamic environments
+  like Kubernetes. When set the Ceph MON daemon could bind locally to an IP
+  address and advertise a different IP address "public addr" on the network.
+
+* New "ceph -w" behavior - the "ceph -w" output no longer contains I/O rates,
+  available space, pg info, etc. because these are no longer logged to the
+  central log (which is what "ceph -w" shows). The same information can be
+  obtained by running "ceph pg stat"; alternatively, I/O rates per pool can
+  be determined using "ceph osd pool stats". Although these commands do not
+  self-update like "ceph -w" did, they do have the ability to return formatted
+  output by providing a "--format=<format>" option.
+
+* Pools are now expected to be associated with the application using them.
+  Upon completing the upgrade to Luminous, the cluster will attempt to associate
+  existing pools to known applications (i.e. CephFS, RBD, and RGW). In-use pools
+  that are not associated to an application will generate a health warning. Any
+  unassociated pools can be manually associated using the new
+  "ceph osd pool application enable" command. For more details see
+  "Associate Pool to Application" in the documentation.
+
+* ceph-mgr now has a Zabbix plugin. Using zabbix_sender it sends trapper
+  events to a Zabbix server containing high-level information of the Ceph
+  cluster. This makes it easy to monitor a Ceph cluster's status and send
+  out notifications in case of a malfunction.
+
+* The 'mon_warn_osd_usage_min_max_delta' config option has been
+  removed and the associated health warning has been disabled because
+  it does not address clusters undergoing recovery or CRUSH rules that do
+  not target all devices in the cluster.
+
+* Specifying user authorization capabilities for RBD clients has been
+  simplified. The general syntax for using RBD capability profiles is
+  "mon 'profile rbd' osd 'profile rbd[-read-only][ pool={pool-name}[, ...]]'".
+  For more details see "User Management" in the documentation.
+
+* ``ceph config-key put`` has been deprecated in favor of ``ceph config-key set``.
+
+
+Notable Changes since v12.1.1 (RC2)
+-----------------------------------
+
+* New "ceph -w" behavior - the "ceph -w" output no longer contains I/O rates,
+  available space, pg info, etc. because these are no longer logged to the
+  central log (which is what "ceph -w" shows). The same information can be
+  obtained by running "ceph pg stat"; alternatively, I/O rates per pool can
+  be determined using "ceph osd pool stats". Although these commands do not
+  self-update like "ceph -w" did, they do have the ability to return formatted
+  output by providing a "--format=<format>" option.
+
+* Pools are now expected to be associated with the application using them.
+  Upon completing the upgrade to Luminous, the cluster will attempt to associate
+  existing pools to known applications (i.e. CephFS, RBD, and RGW). In-use pools
+  that are not associated to an application will generate a health warning. Any
+  unassociated pools can be manually associated using the new
+  "ceph osd pool application enable" command. For more details see
+  "Associate Pool to Application" in the documentation.
+
+* ceph-mgr now has a Zabbix plugin. Using zabbix_sender it sends trapper
+  events to a Zabbix server containing high-level information of the Ceph
+  cluster. This makes it easy to monitor a Ceph cluster's status and send
+  out notifications in case of a malfunction.
+
+* The 'mon_warn_osd_usage_min_max_delta' config option has been
+  removed and the associated health warning has been disabled because
+  it does not address clusters undergoing recovery or CRUSH rules that do
+  not target all devices in the cluster.
+
+* Specifying user authorization capabilities for RBD clients has been
+  simplified. The general syntax for using RBD capability profiles is
+  "mon 'profile rbd' osd 'profile rbd[-read-only][ pool={pool-name}[, ...]]'".
+  For more details see "User Management" in the documentation.
+
+* RGW: bucket index resharding now uses the reshard  namespace in log pool
+  upgrade scenarios as well this is a changed behaviour from RC1 where a
+  new pool for reshard was created
+
+* RGW multisite now supports for enabling or disabling sync at a bucket level.
+
+Other Notable Changes
+---------------------
+* bluestore: bluestore/BlueFS: pass string as const ref (`pr#16600 <https://github.com/ceph/ceph/pull/16600>`_, dingdangzhang)
+* bluestore: common/options: make "blue{fs,store}_allocator" LEVEL_DEV (`issue#20660 <http://tracker.ceph.com/issues/20660>`_, `pr#16645 <https://github.com/ceph/ceph/pull/16645>`_, Kefu Chai)
+* bluestore: os/bluestore/BlueStore: Avoid double counting state_kv_queued_lat (`pr#16374 <https://github.com/ceph/ceph/pull/16374>`_, Jianpeng Ma)
+* bluestore: os/bluestore/BlueStore: remove unused code (`pr#16522 <https://github.com/ceph/ceph/pull/16522>`_, Jianpeng Ma)
+* bluestore: os/bluestore: move aio.h/cc from fs dir to bluestore dir (`pr#16409 <https://github.com/ceph/ceph/pull/16409>`_, Pan Liu)
+* bluestore: os/bluestore/StupidAllocator: rounded down len to an align boundary (`issue#20660 <http://tracker.ceph.com/issues/20660>`_, `pr#16593 <https://github.com/ceph/ceph/pull/16593>`_, Zhu Shangzhong)
+* bluestore: os/bluestore: use reference to avoid string copy (`pr#16364 <https://github.com/ceph/ceph/pull/16364>`_, Pan Liu)
+* build/ops: ceph-disk: don't activate suppressed journal devices (`issue#19489 <http://tracker.ceph.com/issues/19489>`_, `pr#16123 <https://github.com/ceph/ceph/pull/16123>`_, David Disseldorp)
+* build/ops: do_cmake.sh: fix syntax for /bin/sh (doesn't have +=) (`pr#16433 <https://github.com/ceph/ceph/pull/16433>`_, Dan Mick)
+* build/ops: include/assert: test c++ before using static_cast<> (`pr#16424 <https://github.com/ceph/ceph/pull/16424>`_, Kefu Chai)
+* build/ops: install-deps.sh: add missing dependencies for FreeBSD (`pr#16545 <https://github.com/ceph/ceph/pull/16545>`_, Alan Somers)
+* build/ops,rbd,rgw: CMakeLists: trim rbd/rgw forced dependencies (`pr#16574 <https://github.com/ceph/ceph/pull/16574>`_, Patrick Donnelly)
+* build/ops: rpm: Drop legacy libxio support (`pr#16449 <https://github.com/ceph/ceph/pull/16449>`_, Nathan Cutler)
+* build/ops: rpm: fix typo WTIH_BABELTRACE (`pr#16366 <https://github.com/ceph/ceph/pull/16366>`_, Nathan Cutler)
+* build/ops: rpm: put mgr python build dependencies in make_check bcond (`issue#20425 <http://tracker.ceph.com/issues/20425>`_, `pr#15940 <https://github.com/ceph/ceph/pull/15940>`_, Nathan Cutler, Tim Serong)
+* build/ops,tests: qa: make run-standalone work on FreeBSD (`pr#16595 <https://github.com/ceph/ceph/pull/16595>`_, Willem Jan Withagen)
+* cmake: disable -fvar-tracking-assignments for config.cc (`pr#16695 <https://github.com/ceph/ceph/pull/16695>`_, Kefu Chai)
+* cmake: use CMAKE_INSTALL_INCLUDEDIR (`pr#16483 <https://github.com/ceph/ceph/pull/16483>`_, David Disseldorp)
+* common: buffer: silence unused var warning on FreeBSD (`pr#16452 <https://github.com/ceph/ceph/pull/16452>`_, Willem Jan Withagen)
+* common: common/common_init: disable default dout logging for UTILITY_NODOUT too (`issue#20771 <http://tracker.ceph.com/issues/20771>`_, `pr#16578 <https://github.com/ceph/ceph/pull/16578>`_, Sage Weil)
+* common: common/options: refactors to set the properties in a more structured way (`pr#16482 <https://github.com/ceph/ceph/pull/16482>`_, Kefu Chai)
+* common:   common/WorkQueue: use threadpoolname + threadaddr for heartbeat_han… (`pr#16563 <https://github.com/ceph/ceph/pull/16563>`_, huangjun)
+* common,core: osd,mds,mgr: do not dereference null rotating_keys (`issue#20667 <http://tracker.ceph.com/issues/20667>`_, `pr#16455 <https://github.com/ceph/ceph/pull/16455>`_, Sage Weil)
+* common: fix Option set_long_description (`pr#16668 <https://github.com/ceph/ceph/pull/16668>`_, Yan Jun)
+* common: follow up to new options infrastructure (`pr#16527 <https://github.com/ceph/ceph/pull/16527>`_, John Spray)
+* common: HashIndex.cc: add compat.h for ENODATA (`pr#16697 <https://github.com/ceph/ceph/pull/16697>`_, Willem Jan Withagen)
+* common: libradosstriper: fix format injection vulnerability (`issue#20240 <http://tracker.ceph.com/issues/20240>`_, `pr#15674 <https://github.com/ceph/ceph/pull/15674>`_, Stan K)
+* common,mon: crush,mon: add weight-set introspection and manipulation commands (`pr#16326 <https://github.com/ceph/ceph/pull/16326>`_, Sage Weil)
+* common: mon/MonClient: scale backoff interval down when we have a healthy mon session (`issue#20371 <http://tracker.ceph.com/issues/20371>`_, `pr#16576 <https://github.com/ceph/ceph/pull/16576>`_, Kefu Chai, Sage Weil)
+* common: prevent unset_dumpable from generating warnings (`pr#16462 <https://github.com/ceph/ceph/pull/16462>`_, Willem Jan Withagen)
+* common,rbd: osdc/Objecter: unify disparate EAGAIN handling paths into one (`pr#16627 <https://github.com/ceph/ceph/pull/16627>`_, Sage Weil)
+* common: remove config opt conversion utility (`pr#16480 <https://github.com/ceph/ceph/pull/16480>`_, John Spray)
+* common: Revamp config option definitions (`issue#20627 <http://tracker.ceph.com/issues/20627>`_, `pr#16211 <https://github.com/ceph/ceph/pull/16211>`_, John Spray, Kefu Chai, Sage Weil)
+* common,rgw: cls/refcount: store and use list of retired tags (`issue#20107 <http://tracker.ceph.com/issues/20107>`_, `pr#15673 <https://github.com/ceph/ceph/pull/15673>`_, Yehuda Sadeh)
+* common: the latency dumped by "ceph osd perf" is not real (`issue#20749 <http://tracker.ceph.com/issues/20749>`_, `pr#16512 <https://github.com/ceph/ceph/pull/16512>`_, Pan Liu)
+* common: use std::move() for better performance (`pr#16620 <https://github.com/ceph/ceph/pull/16620>`_, Xinying Song)
+* core: auth: Remove unused function in AuthSessionHandler (`pr#16666 <https://github.com/ceph/ceph/pull/16666>`_, Luo Kexue)
+* core: ceph: allow '-' with -i and -o for stdin/stdout (`pr#16359 <https://github.com/ceph/ceph/pull/16359>`_, Sage Weil)
+* core: ceph-disk: support osd new (`pr#15432 <https://github.com/ceph/ceph/pull/15432>`_, Loic Dachary, Sage Weil)
+* core: common/options: remove mon_warn_osd_usage_min_max_delta from options.cc too (`pr#16488 <https://github.com/ceph/ceph/pull/16488>`_, Sage Weil)
+* core: kv: resolve a crash issue in ~LevelDBStore() (`pr#16553 <https://github.com/ceph/ceph/pull/16553>`_, wumingqiao)
+* core: kv/RocksDBStore: use vector instead of VLA for holding slices (`pr#16615 <https://github.com/ceph/ceph/pull/16615>`_, Kefu Chai)
+* core: messages: default-initialize MOSDPGRecoveryDelete[Reply] members (`pr#16584 <https://github.com/ceph/ceph/pull/16584>`_, Greg Farnum)
+* core: mgr/MgrClient: do not attempt to access a global variable for config (`pr#16544 <https://github.com/ceph/ceph/pull/16544>`_, Jason Dillaman)
+* core,mgr,tests: qa: flush out monc's dropped msgs on msgr failure injection (`issue#20371 <http://tracker.ceph.com/issues/20371>`_, `pr#16484 <https://github.com/ceph/ceph/pull/16484>`_, Joao Eduardo Luis)
+* core,mon: crush, mon: simplify device class manipulation commands (`pr#16388 <https://github.com/ceph/ceph/pull/16388>`_, xie xingguo)
+* core: mon, osd: misc fixes (`pr#16283 <https://github.com/ceph/ceph/pull/16283>`_, xie xingguo)
+* core,mon,rbd: mon,osd: new rbd-based cephx cap profiles (`pr#15991 <https://github.com/ceph/ceph/pull/15991>`_, Jason Dillaman)
+* core: msg/async: fix the bug of inaccurate calculation of l_msgr_send_bytes (`pr#16526 <https://github.com/ceph/ceph/pull/16526>`_, Jin Cai)
+* core: objclass: modify omap_get_{keys,vals} api (`pr#16667 <https://github.com/ceph/ceph/pull/16667>`_, Yehuda Sadeh, Casey Bodley)
+* core: osd/PG: fix warning so we discard_event() on a no-op state change (`pr#16655 <https://github.com/ceph/ceph/pull/16655>`_, Sage Weil)
+* core: osd/PG: ignore CancelRecovery in NotRecovering (`issue#20804 <http://tracker.ceph.com/issues/20804>`_, `pr#16638 <https://github.com/ceph/ceph/pull/16638>`_, Sage Weil)
+* core: osd/PGLog: fix inaccurate missing assert (`issue#20753 <http://tracker.ceph.com/issues/20753>`_, `pr#16539 <https://github.com/ceph/ceph/pull/16539>`_, Josh Durgin)
+* core:   osd/PrimaryLogPG: fix recovering hang when have unfound objects (`pr#16558 <https://github.com/ceph/ceph/pull/16558>`_, huangjun)
+* core: osd/PrimaryLogPG: skip deleted missing objects in pg[n]ls (`issue#20739 <http://tracker.ceph.com/issues/20739>`_, `pr#16490 <https://github.com/ceph/ceph/pull/16490>`_, Josh Durgin)
+* core,performance: kv/RocksDBStore: Table options for indexing and filtering (`pr#16450 <https://github.com/ceph/ceph/pull/16450>`_, Mark Nelson)
+* core,performance: osd/PG: make prioritized recovery possible (`pr#13723 <https://github.com/ceph/ceph/pull/13723>`_, Piotr Dałek)
+* core: PGLog: store extra duplicate ops beyond the normal log entries (`pr#16172 <https://github.com/ceph/ceph/pull/16172>`_, Josh Durgin, J. Eric Ivancich)
+* core,rgw,tests: qa/suits/rados/basic/tasks/rgw_snaps: wait for pools to be created (`pr#16509 <https://github.com/ceph/ceph/pull/16509>`_, Sage Weil)
+* core,tests: ceph_test_rados_api_watch_notify: flush after unwatch (`issue#20105 <http://tracker.ceph.com/issues/20105>`_, `pr#16402 <https://github.com/ceph/ceph/pull/16402>`_, Sage Weil)
+* core,tests: ceph_test_rados: max_stride_size must be more than min_stride_size (`issue#20775 <http://tracker.ceph.com/issues/20775>`_, `pr#16590 <https://github.com/ceph/ceph/pull/16590>`_, Lianne Wang)
+* core,tests: qa: move ceph-helpers-based make check tests to qa/standalone; run via teuthology (`pr#16513 <https://github.com/ceph/ceph/pull/16513>`_, Sage Weil)
+* core,tests: qa/suites/rados: at-end: ignore PG_{AVAILABILITY,DEGRADED} (`issue#20693 <http://tracker.ceph.com/issues/20693>`_, `pr#16575 <https://github.com/ceph/ceph/pull/16575>`_, Sage Weil)
+* core,tests: qa/tasks/ceph_manager: wait for osd to start after objectstore-tool sequence (`issue#20705 <http://tracker.ceph.com/issues/20705>`_, `pr#16454 <https://github.com/ceph/ceph/pull/16454>`_, Sage Weil)
+* core,tests: qa/tasks/ceph: wait for mgr to activate and pg stats to flush in health() (`issue#20744 <http://tracker.ceph.com/issues/20744>`_, `pr#16514 <https://github.com/ceph/ceph/pull/16514>`_, Sage Weil)
+* core,tests: qa/tasks/dump_stuck: fix dump_stuck test bug (`pr#16559 <https://github.com/ceph/ceph/pull/16559>`_, huangjun)
+* core,tests: qa/workunits/cephtool/test.sh: add sudo for daemon compact (`pr#16500 <https://github.com/ceph/ceph/pull/16500>`_, Sage Weil)
+* core,tests: test: add separate ceph-helpers-based smoke test (`pr#16572 <https://github.com/ceph/ceph/pull/16572>`_, Sage Weil)
+* core: throttle: Minimal destructor fix for Luminous (`pr#16661 <https://github.com/ceph/ceph/pull/16661>`_, Adam C. Emerson)
+* core: vstart.sh: start mgr after mon, before osds (`pr#16613 <https://github.com/ceph/ceph/pull/16613>`_, Sage Weil)
+* crush: a couple of weight-set fixes (`pr#16623 <https://github.com/ceph/ceph/pull/16623>`_, xie xingguo)
+* crush: enforce buckets-before-rules rule (`pr#16453 <https://github.com/ceph/ceph/pull/16453>`_, Sage Weil)
+* crush: s/ruleset/id/ in decompiled output; prevent compilation when ruleset != id (`pr#16400 <https://github.com/ceph/ceph/pull/16400>`_, Sage Weil)
+* doc: Add amitkumar50 affiliation to .organizationmap (`pr#16475 <https://github.com/ceph/ceph/pull/16475>`_, Amit Kumar)
+* doc: add doc requirements on PR submitters (`pr#16394 <https://github.com/ceph/ceph/pull/16394>`_, John Spray)
+* doc: added mgr caps to manual deployment documentation (`pr#16660 <https://github.com/ceph/ceph/pull/16660>`_, Nick Erdmann)
+* doc: add instructions for replacing an OSD (`pr#16314 <https://github.com/ceph/ceph/pull/16314>`_, Kefu Chai)
+* doc: add rbd new trash cli and cleanups  in release-notes.rst (`issue#20702 <http://tracker.ceph.com/issues/20702>`_, `pr#16498 <https://github.com/ceph/ceph/pull/16498>`_, songweibin)
+* doc: Add Zabbix ceph-mgr plugin to PendingReleaseNotes (`pr#16412 <https://github.com/ceph/ceph/pull/16412>`_, Wido den Hollander)
+* doc: AUTHORS: update CephFS PTL (`pr#16399 <https://github.com/ceph/ceph/pull/16399>`_, Patrick Donnelly)
+* doc: ceph-disk: use '-' for feeding ceph cli with stdin (`pr#16362 <https://github.com/ceph/ceph/pull/16362>`_, Kefu Chai)
+* doc: common/options.cc: document bluestore config options (`pr#16489 <https://github.com/ceph/ceph/pull/16489>`_, Sage Weil)
+* doc: Describe mClock's use within Ceph in great detail (`pr#16707 <https://github.com/ceph/ceph/pull/16707>`_, J. Eric Ivancich)
+* doc: doc/install/manual-deployment: update osd creation steps (`pr#16573 <https://github.com/ceph/ceph/pull/16573>`_, Sage Weil)
+* doc: doc/mon: fix ceph-authtool command in rebuild mon's sample (`pr#16503 <https://github.com/ceph/ceph/pull/16503>`_, huanwen ren)
+* doc: doc/qa: cover `config help` command (`pr#16727 <https://github.com/ceph/ceph/pull/16727>`_, John Spray)
+* doc: doc/rados: add page for health checks and update monitoring.rst (`pr#16566 <https://github.com/ceph/ceph/pull/16566>`_, John Spray)
+* doc: doc/rados/operations/health-checks: osd section (`pr#16611 <https://github.com/ceph/ceph/pull/16611>`_, Sage Weil)
+* doc: doc/release-notes: fix upmap and osd replacement links; add fixme (`pr#16730 <https://github.com/ceph/ceph/pull/16730>`_, Sage Weil)
+* doc: [docs/quick-start]: update quick start to add a note for mgr create command for luminous+ builds (`pr#16350 <https://github.com/ceph/ceph/pull/16350>`_, Vasu Kulkarni)
+* doc: Documentation updates for July 2017 releases (`pr#16401 <https://github.com/ceph/ceph/pull/16401>`_, Bryan Stillwell)
+* doc: document mClock related options (`pr#16552 <https://github.com/ceph/ceph/pull/16552>`_, Kefu Chai)
+* doc: Fixed a typo in yum repo filename script (`pr#16431 <https://github.com/ceph/ceph/pull/16431>`_, Jeff Green)
+* doc: fix typo in config.rst (`pr#16721 <https://github.com/ceph/ceph/pull/16721>`_, Jos Collin)
+* doc: fix typos in config.rst (`pr#16681 <https://github.com/ceph/ceph/pull/16681>`_, Song Shun)
+* doc: mailmap: add affiliation for Zhu Shangzhong (`pr#16537 <https://github.com/ceph/ceph/pull/16537>`_, Zhu Shangzhong)
+* doc: .mailmap, .organizationmap: Update ztczll affiliation (`pr#16038 <https://github.com/ceph/ceph/pull/16038>`_, zhanglei)
+* doc: PendingReleaseNotes: "ceph -w" behavior has changed drastically (`pr#16425 <https://github.com/ceph/ceph/pull/16425>`_, Joao Eduardo Luis, Nathan Cutler)
+* doc: Remove contractions from the documentation (`pr#16629 <https://github.com/ceph/ceph/pull/16629>`_, John Wilkins)
+* doc: remove docs on non-existant command (`pr#16616 <https://github.com/ceph/ceph/pull/16616>`_, Luo Kexue, Kefu Chai)
+* doc: reword mds deactivate docs; add optional fs_name argument (`issue#20607 <http://tracker.ceph.com/issues/20607>`_, `pr#16471 <https://github.com/ceph/ceph/pull/16471>`_, Jan Fajerski)
+* doc: rgw clarify limitations when creating tenant names (`pr#16418 <https://github.com/ceph/ceph/pull/16418>`_, Abhishek Lekshmanan)
+* doc: update ceph(8) man page with new sub-commands (`pr#16437 <https://github.com/ceph/ceph/pull/16437>`_, Kefu Chai)
+* doc: Update .organizationmap (`pr#16507 <https://github.com/ceph/ceph/pull/16507>`_, luokexue)
+* doc: update the pool names created by vstart.sh by default (`pr#16652 <https://github.com/ceph/ceph/pull/16652>`_, Zhu Shangzhong)
+* doc: update the rados namespace docs (`pr#15838 <https://github.com/ceph/ceph/pull/15838>`_, Abhishek Lekshmanan)
+* doc: upmap docs; various missing links for release notes (`pr#16637 <https://github.com/ceph/ceph/pull/16637>`_, Sage Weil)
+* doc: various fixes (`pr#16723 <https://github.com/ceph/ceph/pull/16723>`_, Kefu Chai)
+* librados: add missing implementations for C service daemon API methods (`pr#16543 <https://github.com/ceph/ceph/pull/16543>`_, Jason Dillaman)
+* librbd: add compare and write API (`pr#14868 <https://github.com/ceph/ceph/pull/14868>`_, Zhengyong Wang, Jason Dillaman)
+* librbd: add LIBRBD_SUPPORTS_WRITESAME support (`pr#16583 <https://github.com/ceph/ceph/pull/16583>`_, Xiubo Li)
+* mgr: add per-DaemonState lock (`pr#16432 <https://github.com/ceph/ceph/pull/16432>`_, Sage Weil)
+* mgr: fix lock cycle (`pr#16508 <https://github.com/ceph/ceph/pull/16508>`_, Sage Weil)
+* mgr: mgr/dashboard: add OSD list view (`pr#16373 <https://github.com/ceph/ceph/pull/16373>`_, John Spray)
+* mgr: mgr_module interface to report health alerts (`pr#16487 <https://github.com/ceph/ceph/pull/16487>`_, Sage Weil)
+* mgr: mgr/PyState: shut up about get_config on nonexistent keys (`pr#16641 <https://github.com/ceph/ceph/pull/16641>`_, Sage Weil)
+* mgr: mon/MgrMonitor: fix standby addition to mgrmap (`issue#20647 <http://tracker.ceph.com/issues/20647>`_, `pr#16397 <https://github.com/ceph/ceph/pull/16397>`_, Sage Weil)
+* mgr,mon: mon/AuthMonitor: generate bootstrap-mgr key on upgrade (`issue#20666 <http://tracker.ceph.com/issues/20666>`_, `pr#16395 <https://github.com/ceph/ceph/pull/16395>`_, Joao Eduardo Luis)
+* mgr,mon: mon/MgrMonitor: reset mgrdigest timer with new subscription (`issue#20633 <http://tracker.ceph.com/issues/20633>`_, `pr#16582 <https://github.com/ceph/ceph/pull/16582>`_, Sage Weil)
+* mgr: perf schema fns/change notification and Prometheus plugin (`pr#16406 <https://github.com/ceph/ceph/pull/16406>`_, Dan Mick)
+* mgr: pybind/mgr/zabbix: fix health in non-compat mode (`issue#20767 <http://tracker.ceph.com/issues/20767>`_, `pr#16580 <https://github.com/ceph/ceph/pull/16580>`_, Sage Weil)
+* mgr,pybind,rbd: mgr/dashboard: show rbd image features (`pr#16468 <https://github.com/ceph/ceph/pull/16468>`_, Yanhu Cao)
+* mgr,rbd: mgr/dashboard: RBD iSCSI daemon status page (`pr#16547 <https://github.com/ceph/ceph/pull/16547>`_, Jason Dillaman)
+* mgr,rbd: mgr/dashboard: rbd mirroring status page (`pr#16360 <https://github.com/ceph/ceph/pull/16360>`_, Jason Dillaman)
+* mgr: vstart.sh: fix mgr vs restful command startup race (`pr#16564 <https://github.com/ceph/ceph/pull/16564>`_, Sage Weil)
+* mon: add force-create-pg back (`issue#20605 <http://tracker.ceph.com/issues/20605>`_, `pr#16353 <https://github.com/ceph/ceph/pull/16353>`_, Kefu Chai)
+* mon: add mgr metdata commands, and overall 'versions' command for all daemon versions (`pr#16460 <https://github.com/ceph/ceph/pull/16460>`_, Sage Weil)
+* mon: a few health fixes (`pr#16415 <https://github.com/ceph/ceph/pull/16415>`_, xie xingguo)
+* mon: 'config-key put' -> 'config-key set' (`pr#16569 <https://github.com/ceph/ceph/pull/16569>`_, Sage Weil)
+* mon: do not dereference empty mgr_commands (`pr#16501 <https://github.com/ceph/ceph/pull/16501>`_, Sage Weil)
+* mon: Fix deep_age copy paste error (`pr#16434 <https://github.com/ceph/ceph/pull/16434>`_, Brad Hubbard)
+* mon: Fix output text and doc (`pr#16367 <https://github.com/ceph/ceph/pull/16367>`_, Yan Jun)
+* mon: '\* list' -> '\* ls' (`pr#16423 <https://github.com/ceph/ceph/pull/16423>`_, Sage Weil)
+* mon: load mgr commands at runtime (`pr#16028 <https://github.com/ceph/ceph/pull/16028>`_, John Spray, Sage Weil)
+* mon: mon/HealthMonitor: avoid sending unnecessary MMonHealthChecks to leader (`pr#16478 <https://github.com/ceph/ceph/pull/16478>`_, xie xingguo)
+* mon: mon/HealthMonitor: trigger a proposal if stat updated (`pr#16477 <https://github.com/ceph/ceph/pull/16477>`_, Kefu Chai)
+* mon: mon/LogMonitor: don't read list's end() for log last (`pr#16376 <https://github.com/ceph/ceph/pull/16376>`_, Joao Eduardo Luis)
+* mon: mon/MDSMonitor: close object section of formatter (`pr#16516 <https://github.com/ceph/ceph/pull/16516>`_, Chang Liu)
+* mon: mon/MgrMonitor: only induce mgr epoch shortly after mkfs (`pr#16356 <https://github.com/ceph/ceph/pull/16356>`_, Sage Weil)
+* mon: mon/OSDMonitor: ensure UP is not set for newly-created OSDs (`issue#20751 <http://tracker.ceph.com/issues/20751>`_, `pr#16534 <https://github.com/ceph/ceph/pull/16534>`_, Sage Weil)
+* mon: mon/OSDMonitor: issue pool application related warning (`pr#16520 <https://github.com/ceph/ceph/pull/16520>`_, xie xingguo)
+* mon: mon/OSDMonitor: remove zeroed new_state updates (`issue#20751 <http://tracker.ceph.com/issues/20751>`_, `pr#16518 <https://github.com/ceph/ceph/pull/16518>`_, Sage Weil)
+* mon: mon/PGMap: remove skewed utilizatoin warning (`issue#20730 <http://tracker.ceph.com/issues/20730>`_, `pr#16461 <https://github.com/ceph/ceph/pull/16461>`_, Sage Weil)
+* mon: OSDMonitor: check mon_max_pool_pg_num when set pool pg_num (`pr#16511 <https://github.com/ceph/ceph/pull/16511>`_, chenhg)
+* mon: prime pg_temp and a few health warning fixes (`pr#16530 <https://github.com/ceph/ceph/pull/16530>`_, xie xingguo)
+* mon: show destroyed status in tree view; do not auto-out destroyed osds (`pr#16446 <https://github.com/ceph/ceph/pull/16446>`_, xie xingguo)
+* mon: stop issuing not-[deep]-scrubbed warnings if disabled (`pr#16465 <https://github.com/ceph/ceph/pull/16465>`_, xie xingguo)
+* mon: support pool application metadata key/values (`pr#15763 <https://github.com/ceph/ceph/pull/15763>`_, Jason Dillaman)
+* msg: messages/: always set header.version in encode_payload() (`issue#19939 <http://tracker.ceph.com/issues/19939>`_, `pr#16421 <https://github.com/ceph/ceph/pull/16421>`_, Kefu Chai)
+* msg: mgr/status: row has incorrect number of values (`issue#20750 <http://tracker.ceph.com/issues/20750>`_, `pr#16529 <https://github.com/ceph/ceph/pull/16529>`_, liuchang0812)
+* msg: msg/async: use auto iterator having more simple code and good performance (`pr#16524 <https://github.com/ceph/ceph/pull/16524>`_, dingdangzhang)
+* osd: add default_device_class to metadata (`pr#16634 <https://github.com/ceph/ceph/pull/16634>`_, Neha Ojha)
+* osd: add dump filter for tracked ops (`pr#16561 <https://github.com/ceph/ceph/pull/16561>`_, Yan Jun)
+* osd: Add recovery sleep configuration option for HDDs and SSDs (`pr#16328 <https://github.com/ceph/ceph/pull/16328>`_, Neha Ojha)
+* osd: cmpext operator should ignore -ENOENT on read (`pr#16622 <https://github.com/ceph/ceph/pull/16622>`_, Jason Dillaman)
+* osd: combine conditional statements (`pr#16391 <https://github.com/ceph/ceph/pull/16391>`_, Yan Jun)
+* osd: do not send pg_created unless luminous (`issue#20785 <http://tracker.ceph.com/issues/20785>`_, `pr#16677 <https://github.com/ceph/ceph/pull/16677>`_, Kefu Chai)
+* osd: EC read handling: don't grab an objectstore error to use as the read error (`pr#16663 <https://github.com/ceph/ceph/pull/16663>`_, David Zafman)
+* osd: fix a couple bugs with persisting the missing set when it contains deletes (`issue#20704 <http://tracker.ceph.com/issues/20704>`_, `pr#16459 <https://github.com/ceph/ceph/pull/16459>`_, Josh Durgin)
+* osd: fix OpRequest and tracked op dump information (`pr#16504 <https://github.com/ceph/ceph/pull/16504>`_, Yan Jun)
+* osd: fix pg ref leaks when osd shutdown (`issue#20684 <http://tracker.ceph.com/issues/20684>`_, `pr#16408 <https://github.com/ceph/ceph/pull/16408>`_, Yang Honggang)
+* osd: Log audit (`pr#16281 <https://github.com/ceph/ceph/pull/16281>`_, Brad Hubbard)
+* osd: moved OpFinisher logic from OSDOp to OpContext (`issue#20783 <http://tracker.ceph.com/issues/20783>`_, `pr#16617 <https://github.com/ceph/ceph/pull/16617>`_, Jason Dillaman)
+* osd: populate last_epoch_split during build_initial_pg_history (`issue#20754 <http://tracker.ceph.com/issues/20754>`_, `pr#16519 <https://github.com/ceph/ceph/pull/16519>`_, Sage Weil)
+* osd: PrimaryLogPG, PGBackend: complete callback even if interval changes (`issue#20747 <http://tracker.ceph.com/issues/20747>`_, `pr#16536 <https://github.com/ceph/ceph/pull/16536>`_, Josh Durgin)
+* osd: process deletes during recovery instead of peering (`issue#19971 <http://tracker.ceph.com/issues/19971>`_, `pr#15952 <https://github.com/ceph/ceph/pull/15952>`_, Josh Durgin)
+* osd: rephrase "wrongly marked me down" clog message (`pr#16365 <https://github.com/ceph/ceph/pull/16365>`_, John Spray)
+* osd: scrub_to specifies clone ver, but transaction include head write… (`issue#20041 <http://tracker.ceph.com/issues/20041>`_, `pr#16404 <https://github.com/ceph/ceph/pull/16404>`_, David Zafman)
+* osd: support cmpext operation on EC-backed pools (`pr#15693 <https://github.com/ceph/ceph/pull/15693>`_, Zhengyong Wang, Jason Dillaman)
+* performance,rgw: rgw_file: permit dirent offset computation (`pr#16275 <https://github.com/ceph/ceph/pull/16275>`_, Matt Benjamin)
+* pybind: pybind/mgr/restful: fix typo (`pr#16560 <https://github.com/ceph/ceph/pull/16560>`_, Nick Erdmann)
+* rbd: cls/rbd: silence warning from -Wunused-variable (`pr#16670 <https://github.com/ceph/ceph/pull/16670>`_, Yan Jun)
+* rbd: cls/rbd: trash_list should be iterable (`issue#20643 <http://tracker.ceph.com/issues/20643>`_, `pr#16372 <https://github.com/ceph/ceph/pull/16372>`_, Jason Dillaman)
+* rbd: fixed coverity 'Argument cannot be negative' warning (`pr#16686 <https://github.com/ceph/ceph/pull/16686>`_, amitkuma)
+* rbd: make it more understandable when adding peer returns error (`pr#16313 <https://github.com/ceph/ceph/pull/16313>`_, songweibin)
+* rbd-mirror: guard the deletion of non-primary images (`pr#16398 <https://github.com/ceph/ceph/pull/16398>`_, Jason Dillaman)
+* rbd-mirror: initialize timer context pointer to null (`pr#16603 <https://github.com/ceph/ceph/pull/16603>`_, Jason Dillaman)
+* rbd: modified some commands' description into imperative sentence (`pr#16694 <https://github.com/ceph/ceph/pull/16694>`_, songweibin)
+* rbd,tests: qa/tasks/rbd_fio: bump default fio version to 2.21 (`pr#16656 <https://github.com/ceph/ceph/pull/16656>`_, Ilya Dryomov)
+* rbd,tests: qa: thrash tests for backoff and upmap (`pr#16428 <https://github.com/ceph/ceph/pull/16428>`_, Ilya Dryomov)
+* rbd,tests: qa/workunits: adjust path to ceph-helpers.sh (`pr#16599 <https://github.com/ceph/ceph/pull/16599>`_, Sage Weil)
+* rgw: acl grants num limit (`pr#16291 <https://github.com/ceph/ceph/pull/16291>`_, Enming Zhang)
+* rgw: check placement existence when create bucket (`pr#16385 <https://github.com/ceph/ceph/pull/16385>`_, Jiaying Ren)
+* rgw: check placement target existence during bucket creation (`pr#16384 <https://github.com/ceph/ceph/pull/16384>`_, Jiaying Ren)
+* rgw: delete object in error path (`issue#20620 <http://tracker.ceph.com/issues/20620>`_, `pr#16324 <https://github.com/ceph/ceph/pull/16324>`_, Yehuda Sadeh)
+* rgw: Do not decrement stats cache when the cache values are zero (`issue#20661 <http://tracker.ceph.com/issues/20661>`_, `pr#16389 <https://github.com/ceph/ceph/pull/16389>`_, Pavan Rallabhandi)
+* rgw: Drop dump_usage_bucket_info() to silence warning from -Wunused-function (`pr#16497 <https://github.com/ceph/ceph/pull/16497>`_, Wei Qiaomiao)
+* rgw: drop unused find_replacement() and some function docs (`pr#16386 <https://github.com/ceph/ceph/pull/16386>`_, Jiaying Ren)
+* rgw: fix asctime when logging in rgw_lc (`pr#16422 <https://github.com/ceph/ceph/pull/16422>`_, Abhishek Lekshmanan)
+* rgw: fix error message in removing bucket with --bypass-gc flag (`issue#20688 <http://tracker.ceph.com/issues/20688>`_, `pr#16419 <https://github.com/ceph/ceph/pull/16419>`_, Abhishek Varshney)
+* rgw: fix err when copy object in bucket with specified placement rule (`issue#20378 <http://tracker.ceph.com/issues/20378>`_, `pr#15837 <https://github.com/ceph/ceph/pull/15837>`_, fang yuxiang)
+* rgw: Fix for Policy Parse exception in case of multiple statements (`pr#16689 <https://github.com/ceph/ceph/pull/16689>`_, Pritha Srivastava)
+* rgw: fix memory leaks during Swift Static Website's error handling (`issue#20757 <http://tracker.ceph.com/issues/20757>`_, `pr#16531 <https://github.com/ceph/ceph/pull/16531>`_, Radoslaw Zarzynski)
+* rgw: fix parse/eval of policy conditions with IfExists (`issue#20708 <http://tracker.ceph.com/issues/20708>`_, `pr#16463 <https://github.com/ceph/ceph/pull/16463>`_, Casey Bodley)
+* rgw: fix radosgw will crash when service is restarted during lifecycl… (`issue#20756 <http://tracker.ceph.com/issues/20756>`_, `pr#16495 <https://github.com/ceph/ceph/pull/16495>`_, Wei Qiaomiao)
+* rgw: fix rgw hang when do RGWRealmReloader::reload after go SIGHUP (`issue#20686 <http://tracker.ceph.com/issues/20686>`_, `pr#16417 <https://github.com/ceph/ceph/pull/16417>`_, fang.yuxiang)
+* rgw: fix segfault in RevokeThread during its shutdown procedure (`issue#19831 <http://tracker.ceph.com/issues/19831>`_, `pr#15033 <https://github.com/ceph/ceph/pull/15033>`_, Radoslaw Zarzynski)
+* rgw: fix the UTF8 check on bucket entry name in rgw_log_op() (`issue#20779 <http://tracker.ceph.com/issues/20779>`_, `pr#16604 <https://github.com/ceph/ceph/pull/16604>`_, Radoslaw Zarzynski)
+* rgw: modify email to empty by admin RESTful api doesn't work (`pr#16309 <https://github.com/ceph/ceph/pull/16309>`_, fang.yuxiang)
+* rgw: never let http_redirect_code of RGWRedirectInfo to stay uninitialized (`issue#20774 <http://tracker.ceph.com/issues/20774>`_, `pr#16601 <https://github.com/ceph/ceph/pull/16601>`_, Radoslaw Zarzynski)
+* rgw: raise debug level of RGWPostObj_ObjStore_S3::get_policy (`pr#16203 <https://github.com/ceph/ceph/pull/16203>`_, Shasha Lu)
+* rgw: req xml params size limitation error msg (`pr#16310 <https://github.com/ceph/ceph/pull/16310>`_, Enming Zhang)
+* rgw: restore admin socket path in mrgw.sh (`pr#16540 <https://github.com/ceph/ceph/pull/16540>`_, Casey Bodley)
+* rgw: rgw_file: properly & |'d flags (`issue#20663 <http://tracker.ceph.com/issues/20663>`_, `pr#16448 <https://github.com/ceph/ceph/pull/16448>`_, Matt Benjamin)
+* rgw: rgw multisite: feature of bucket sync enable/disable (`pr#15801 <https://github.com/ceph/ceph/pull/15801>`_, Zhang Shaowen, Casey Bodley, Zengran Zhang)
+* rgw: should unlock when reshard_log->update() reture non-zero in RGWB… (`pr#16502 <https://github.com/ceph/ceph/pull/16502>`_, Wei Qiaomiao)
+* rgw: test,rgw: fix rgw placement rule pool config option (`pr#16380 <https://github.com/ceph/ceph/pull/16380>`_, Jiaying Ren)
+* rgw: usage (`issue#16191 <http://tracker.ceph.com/issues/16191>`_, `pr#14287 <https://github.com/ceph/ceph/pull/14287>`_, Ji Chen, Orit Wasserman)
+* rgw: use a namespace for rgw reshard pool for upgrades as well (`issue#20289 <http://tracker.ceph.com/issues/20289>`_, `pr#16368 <https://github.com/ceph/ceph/pull/16368>`_, Karol Mroz, Abhishek Lekshmanan)
+* rgw: Use comparison instead of assignment (`pr#16653 <https://github.com/ceph/ceph/pull/16653>`_, amitkuma)
+* tests: add setup/teardown for asok dir (`pr#16523 <https://github.com/ceph/ceph/pull/16523>`_, Kefu Chai)
+* tests: cephtool/test.sh: Only delete a test pool when no longer needed (`pr#16443 <https://github.com/ceph/ceph/pull/16443>`_, Willem Jan Withagen)
+* tests: qa: Added luminous to the mix in schedule_subset.sh (`pr#16430 <https://github.com/ceph/ceph/pull/16430>`_, Yuri Weinstein)
+* tests: qa,doc: document and fix tests for pool application warnings (`pr#16568 <https://github.com/ceph/ceph/pull/16568>`_, Sage Weil)
+* tests: qa/run-standalone.sh: fix the find option to be compatible with GNU find (`pr#16646 <https://github.com/ceph/ceph/pull/16646>`_, Kefu Chai)
+* tests: qa/suites/rados/singleton/all/erasure-code-nonregression: fix typo (`pr#16579 <https://github.com/ceph/ceph/pull/16579>`_, Sage Weil)
+* tests: qa/suites/upgrade/jewel-x: misc fixes for new health checks (`pr#16429 <https://github.com/ceph/ceph/pull/16429>`_, Sage Weil)
+* tests: qa/tasks/ceph-deploy: Fix bluestore options for ceph-deploy (`pr#16571 <https://github.com/ceph/ceph/pull/16571>`_, Vasu Kulkarni)
+* tests: qa/tasks/reg11184: use literal 'foo' instead pool_name (`pr#16451 <https://github.com/ceph/ceph/pull/16451>`_, Kefu Chai)
+* tests: qa/workunits/cephtool/test.sh: "ceph osd stat" output changed, update accordingly (`pr#16444 <https://github.com/ceph/ceph/pull/16444>`_, Willem Jan Withagen, Kefu Chai)
+* tests: qa/workunits/cephtool/test.sh: disable 'fs status' until bug is fixed (`issue#20761 <http://tracker.ceph.com/issues/20761>`_, `pr#16541 <https://github.com/ceph/ceph/pull/16541>`_, Sage Weil)
+* tests: qa/workunits/cephtool/test.sh: fix test to watch audit channel (`pr#16470 <https://github.com/ceph/ceph/pull/16470>`_, Sage Weil)
+* tests: test: ceph osd stat out has changed, fix tests for that (`pr#16403 <https://github.com/ceph/ceph/pull/16403>`_, Willem Jan Withagen)
+* tests: test: create asok files in a temp directory under $TMPDIR (`issue#16895 <http://tracker.ceph.com/issues/16895>`_, `pr#16445 <https://github.com/ceph/ceph/pull/16445>`_, Kefu Chai)
+* tests: test: Fixes for test_pidfile (`issue#20770 <http://tracker.ceph.com/issues/20770>`_, `pr#16587 <https://github.com/ceph/ceph/pull/16587>`_, David Zafman)
+* tests: test/osd: kill compile warning (`pr#16669 <https://github.com/ceph/ceph/pull/16669>`_, Yan Jun)
+* tests: test/rados: fix wrong parameter order of RETURN1_IF_NOT_VAL (`pr#16589 <https://github.com/ceph/ceph/pull/16589>`_, Yan Jun)
+* tests: test: reg11184 might not always find pg 2.0 prior to import (`pr#16610 <https://github.com/ceph/ceph/pull/16610>`_, David Zafman)
+* tests: test: s/osd_objectstore_type/osd_objectstore (`pr#16469 <https://github.com/ceph/ceph/pull/16469>`_, xie xingguo)
+* tests: test: test_pidfile running 2nd mon has unreliable log output (`pr#16635 <https://github.com/ceph/ceph/pull/16635>`_, David Zafman)
+* tools: ceph-disk: change the lockbox partition number to 5 (`issue#20556 <http://tracker.ceph.com/issues/20556>`_, `pr#16247 <https://github.com/ceph/ceph/pull/16247>`_, Shangzhong Zhu)
+* tools: ceph-disk: Fix for missing 'not' in \*_is_diskdevice checks (`issue#20706 <http://tracker.ceph.com/issues/20706>`_, `pr#16481 <https://github.com/ceph/ceph/pull/16481>`_, Nikita Gerasimov)
+* tools: ceph_disk/main.py: FreeBSD root has wheel for group (`pr#16609 <https://github.com/ceph/ceph/pull/16609>`_, Willem Jan Withagen)
+* tools: ceph-disk: s/ceph_osd_mkfs/command_check_call/ (`issue#20685 <http://tracker.ceph.com/issues/20685>`_, `pr#16427 <https://github.com/ceph/ceph/pull/16427>`_, Zhu Shangzhong)
+* tools: ceph-release-notes: escape _ for unintended links (`issue#17499 <http://tracker.ceph.com/issues/17499>`_, `pr#16528 <https://github.com/ceph/ceph/pull/16528>`_, Kefu Chai)
+* tools: ceph-release-notes: port it to py3 (`pr#16261 <https://github.com/ceph/ceph/pull/16261>`_, Kefu Chai)
+* tools: ceph-release-notes: refactor and fix regressions (`pr#16411 <https://github.com/ceph/ceph/pull/16411>`_, Nathan Cutler)
+* tools: os/bluestore/bluestore_tool: add sanity check to get rid of occasionally crash (`pr#16013 <https://github.com/ceph/ceph/pull/16013>`_, xie xingguo)
+* tools: script: add docker core dump debugger (`pr#16375 <https://github.com/ceph/ceph/pull/16375>`_, Patrick Donnelly)
+
+
+v12.1.2 Luminous (RC)
+=====================
+
+This is the second release candidate for Luminous, the next long term
+stable release.
+
+
+Other Notable Changes
+---------------------
+
+* bluestore,common,performance: isa-l: update isa-l to v2.18 (`pr#15895 <https://github.com/ceph/ceph/pull/15895>`_, Ganesh Mahalingam, Tushar Gohad)
+* bluestore: os/bluestore/BlueFS: clean up log_writer aios from compaction (`issue#20454 <http://tracker.ceph.com/issues/20454>`_, `pr#16017 <https://github.com/ceph/ceph/pull/16017>`_, Sage Weil)
+* bluestore: os/bluestore/BlueFS: clear current log entrys before dump all fnode (`pr#15973 <https://github.com/ceph/ceph/pull/15973>`_, Jianpeng Ma)
+* bluestore: os/bluestore: cleanup min_alloc_size; some formatting nits (`pr#15826 <https://github.com/ceph/ceph/pull/15826>`_, xie xingguo)
+* bluestore: os/bluestore: clear up redundant size assignment in KerenelDevice (`pr#16121 <https://github.com/ceph/ceph/pull/16121>`_, Shasha Lu)
+* bluestore: os/blueStore: Failure retry for opening file (`pr#16237 <https://github.com/ceph/ceph/pull/16237>`_, Yankun Li)
+* bluestore: os/bluestore: fix deferred_aio deadlock (`pr#16051 <https://github.com/ceph/ceph/pull/16051>`_, Sage Weil)
+* bluestore: os/bluestore: Make BitmapFreelistManager kv itereator short lived (`pr#16243 <https://github.com/ceph/ceph/pull/16243>`_, Mark Nelson)
+* bluestore: os/bluestore: misc fix and cleanups (`pr#16315 <https://github.com/ceph/ceph/pull/16315>`_, Jianpeng Ma)
+* bluestore: os/bluestore: move object exist in assign nid (`pr#16117 <https://github.com/ceph/ceph/pull/16117>`_, Jianpeng Ma)
+* bluestore: os/bluestore: narrow cache lock range; make sure min_alloc_size p2 aligned (`pr#15911 <https://github.com/ceph/ceph/pull/15911>`_, xie xingguo)
+* bluestore: os/bluestore: only submit deferred if there is any (`pr#16269 <https://github.com/ceph/ceph/pull/16269>`_, Sage Weil)
+* bluestore: os/bluestore: reduce some overhead for _do_clone_range() and _do_remove() (`pr#15944 <https://github.com/ceph/ceph/pull/15944>`_, xie xingguo)
+* bluestore: os/bluestore: slightly refactor Blob::try_reuse_blob (`pr#15836 <https://github.com/ceph/ceph/pull/15836>`_, xie xingguo)
+* bluestore: os/bluestore: use bufferlist functions whenever possible (`pr#16158 <https://github.com/ceph/ceph/pull/16158>`_, Jianpeng Ma)
+* bluestore,performance: os/bluestore: cap rocksdb cache size (`pr#15786 <https://github.com/ceph/ceph/pull/15786>`_, Mark Nelson)
+* bluestore,performance: os/bluestore: default cache size of 3gb (`pr#15976 <https://github.com/ceph/ceph/pull/15976>`_, Sage Weil)
+* bluestore,performance: os/bluestore: differ default cache size for hdd/ssd backends (`pr#16157 <https://github.com/ceph/ceph/pull/16157>`_, xie xingguo)
+* bluestore,performance: os/bluestore/KernelDevice: batch aio submit (`pr#16032 <https://github.com/ceph/ceph/pull/16032>`_, Haodong Tang)
+* bluestore,performance: os/bluestore: optimized (encode|decode)_escaped (`pr#15759 <https://github.com/ceph/ceph/pull/15759>`_, Piotr Dałek)
+* build/ops: build: build erasure-code isa lib without versions (`pr#16205 <https://github.com/ceph/ceph/pull/16205>`_, James Page)
+* build/ops: build: execute dh_systemd_{enable,start} after dh_install (`issue#19585 <http://tracker.ceph.com/issues/19585>`_, `pr#16218 <https://github.com/ceph/ceph/pull/16218>`_, James Page)
+* build/ops: ceph.in: allow developer mode from outside build tree (`issue#20472 <http://tracker.ceph.com/issues/20472>`_, `pr#16055 <https://github.com/ceph/ceph/pull/16055>`_, Dan Mick)
+* build/ops: ceph_release: we are in the 'rc' phase (12.1.z) (`pr#15957 <https://github.com/ceph/ceph/pull/15957>`_, Sage Weil)
+* build/ops,core: osd/OSD: auto class on osd start up (`pr#16014 <https://github.com/ceph/ceph/pull/16014>`_, xie xingguo)
+* build/ops: debian: workaround the bug in dpkg-maintscript-helper (`issue#20453 <http://tracker.ceph.com/issues/20453>`_, `pr#16072 <https://github.com/ceph/ceph/pull/16072>`_, Kefu Chai)
+* build/ops: debian: wrap-and-sort all files (`pr#16110 <https://github.com/ceph/ceph/pull/16110>`_, James Page)
+* build/ops: os/bluestore: fix build errors when spdk is on (`pr#16118 <https://github.com/ceph/ceph/pull/16118>`_, Ilsoo Byun)
+* build/ops,rbd,tests: test/librbd: re-enable internal tests in ceph_test_librbd (`pr#16255 <https://github.com/ceph/ceph/pull/16255>`_, Mykola Golub)
+* build/ops,rgw,tests,tools: vstart: allow to start multiple radosgw when RGW=x (`pr#15632 <https://github.com/ceph/ceph/pull/15632>`_, Adam Kupczyk)
+* build/ops,rgw,tools: vstart: add --rgw_compression to set rgw compression plugin (`pr#15929 <https://github.com/ceph/ceph/pull/15929>`_, Casey Bodley)
+* build/ops: rpm: bump epoch ahead of RHEL base (`issue#20508 <http://tracker.ceph.com/issues/20508>`_, `pr#16126 <https://github.com/ceph/ceph/pull/16126>`_, Ken Dreyer)
+* build/ops: rpm: Fix undefined FIRST_ARG (`issue#20077 <http://tracker.ceph.com/issues/20077>`_, `pr#16208 <https://github.com/ceph/ceph/pull/16208>`_, Boris Ranto)
+* build/ops: rpm: obsolete libcephfs1 (`pr#16074 <https://github.com/ceph/ceph/pull/16074>`_, Nathan Cutler)
+* build/ops: rpm: sane packaging of %{_docdir}/ceph directory (`pr#15900 <https://github.com/ceph/ceph/pull/15900>`_, Nathan Cutler)
+* build/ops: systemd: Add explicit Before=ceph.target (`pr#15835 <https://github.com/ceph/ceph/pull/15835>`_, Tim Serong)
+* build/ops: systemd/ceph-mgr: remove automagic mgr creation hack (`issue#19994 <http://tracker.ceph.com/issues/19994>`_, `pr#16023 <https://github.com/ceph/ceph/pull/16023>`_, Sage Weil)
+* build/ops,tests,tools: vstart.sh: Work around mgr restfull not available (`pr#15877 <https://github.com/ceph/ceph/pull/15877>`_, Willem Jan Withagen)
+* cephfs: Remove "experimental" warnings from multimds (`pr#15154 <https://github.com/ceph/ceph/pull/15154>`_, John Spray, "Yan, Zheng")
+* cleanup: test,mon,msg: kill clang analyzer warnings (`pr#16320 <https://github.com/ceph/ceph/pull/16320>`_, Kefu Chai)
+* cmake: fix the build with -DWITH_ZFS=ON (`pr#15907 <https://github.com/ceph/ceph/pull/15907>`_, Kefu Chai)
+* cmake: Rewrite HAVE_BABELTRACE  option to WITH_ (`pr#15305 <https://github.com/ceph/ceph/pull/15305>`_, Willem Jan Withagen)
+* common: auth/RotatingKeyRing: use std::move() to set secrets (`pr#15866 <https://github.com/ceph/ceph/pull/15866>`_, Kefu Chai)
+* common: ceph.in, mgr: misc cleanups (`pr#16229 <https://github.com/ceph/ceph/pull/16229>`_, liuchang0812)
+* common: common,config: OPT_FLOAT and OPT_DOUBLE output format in config show (`issue#20104 <http://tracker.ceph.com/issues/20104>`_, `pr#15647 <https://github.com/ceph/ceph/pull/15647>`_, Yanhu Cao)
+* common: common/config_opt: remove unused config (`pr#15874 <https://github.com/ceph/ceph/pull/15874>`_, alex.wu)
+* common: common/config_opts: drop unused opt (`pr#15876 <https://github.com/ceph/ceph/pull/15876>`_, Yanhu Cao)
+* common: common/Mutex.cc: fixed the error in comment (`pr#16214 <https://github.com/ceph/ceph/pull/16214>`_, Pan Liu)
+* common: common/Timer: do not add event if already shutdown (`issue#20432 <http://tracker.ceph.com/issues/20432>`_, `pr#16201 <https://github.com/ceph/ceph/pull/16201>`_, Kefu Chai)
+* common: compressor/zlib: remove g_ceph_context/g_conf from compressor plugin (`pr#16245 <https://github.com/ceph/ceph/pull/16245>`_, Casey Bodley)
+* common,core: osd/osd_types: add flag name (IGNORE_REDIRECT) (`pr#15795 <https://github.com/ceph/ceph/pull/15795>`_, Myoungwon Oh)
+* common: fix log warnings (`pr#16056 <https://github.com/ceph/ceph/pull/16056>`_, xie xingguo)
+* common: initialize array in struct BackTrace (`pr#15864 <https://github.com/ceph/ceph/pull/15864>`_, Jos Collin)
+* common: libradosstriper: fix format injection vulnerability (`issue#20240 <http://tracker.ceph.com/issues/20240>`_, `pr#15674 <https://github.com/ceph/ceph/pull/15674>`_, Stan K)
+* common: misc cleanups in common, global, os, osd submodules (`pr#16321 <https://github.com/ceph/ceph/pull/16321>`_, Yan Jun)
+* common: msg/async: make recv_stamp more precise (`pr#15810 <https://github.com/ceph/ceph/pull/15810>`_, Pan Liu)
+* common: osdc/Objecter: release message if it's not handled (`issue#19741 <http://tracker.ceph.com/issues/19741>`_, `pr#15890 <https://github.com/ceph/ceph/pull/15890>`_, Kefu Chai)
+* common: osd/OSDMap: print require_osd_release (`pr#15974 <https://github.com/ceph/ceph/pull/15974>`_, Sage Weil)
+* common: Passing null pointer option_name to operator << in md_config_t::parse_option() (`pr#15881 <https://github.com/ceph/ceph/pull/15881>`_, Jos Collin)
+* common,rdma: msg/async/rdma: use lists properly (`pr#15908 <https://github.com/ceph/ceph/pull/15908>`_, Adir lev, Adir Lev)
+* common,tests: ceph_test_rados_api_c_read_operations: do not assert per-op rval is correct (`issue#19518 <http://tracker.ceph.com/issues/19518>`_, `pr#16196 <https://github.com/ceph/ceph/pull/16196>`_, Sage Weil)
+* common: Update the error string when res_nsearch() or res_search() fails (`pr#15878 <https://github.com/ceph/ceph/pull/15878>`_, huanwen ren)
+* core: ceph-disk/ceph_disk/main.py: Replace ST_ISBLK() test by is_diskdevice() (`pr#15587 <https://github.com/ceph/ceph/pull/15587>`_, Willem Jan Withagen)
+* core: ceph_disk/main.py: Allow FreeBSD zap a OSD disk (`pr#15642 <https://github.com/ceph/ceph/pull/15642>`_, Willem Jan Withagen)
+* core: ceph-disk: set the default systemd unit timeout to 3h (`issue#20229 <http://tracker.ceph.com/issues/20229>`_, `pr#15585 <https://github.com/ceph/ceph/pull/15585>`_, Loic Dachary)
+* core: Context: C_ContextsBase: delete enclosed contexts in dtor (`issue#20432 <http://tracker.ceph.com/issues/20432>`_, `pr#16159 <https://github.com/ceph/ceph/pull/16159>`_, Kefu Chai)
+* core: crush/CrushWrapper: chooseargs encoding fix (`pr#15984 <https://github.com/ceph/ceph/pull/15984>`_, Ilya Dryomov)
+* core: crush/CrushWrapper: make get_immediate_parent[_id] ignore per-class shadow hierarchy (`issue#20546 <http://tracker.ceph.com/issues/20546>`_, `pr#16221 <https://github.com/ceph/ceph/pull/16221>`_, Sage Weil)
+* core: kv/RocksDBStore: abort if rocksdb EIO, don't return incorrect result (`pr#15862 <https://github.com/ceph/ceph/pull/15862>`_, Haomai Wang)
+* core: make the conversion from wire error to host OS work (`pr#15780 <https://github.com/ceph/ceph/pull/15780>`_, Willem Jan Withagen)
+* core: messages/MOSDPing.h: drop unused fields (`pr#15843 <https://github.com/ceph/ceph/pull/15843>`_, Piotr Dałek)
+* core,mgr: mgr,librados: service map (`pr#15858 <https://github.com/ceph/ceph/pull/15858>`_, Yehuda Sadeh, John Spray, Sage Weil)
+* core,mgr,mon: mgr,mon: enable/disable mgr modules via 'ceph mgr module ...' commands (`pr#15958 <https://github.com/ceph/ceph/pull/15958>`_, Sage Weil)
+* core,mgr: mon/PGMap: slightly better debugging around pgmap updates (`pr#15820 <https://github.com/ceph/ceph/pull/15820>`_, Sage Weil)
+* core: mon/MonClient: respect the priority in SRV RR (`issue#5249 <http://tracker.ceph.com/issues/5249>`_, `pr#15964 <https://github.com/ceph/ceph/pull/15964>`_, Kefu Chai)
+* core: mon/MonmapMonitor: use __func__ instead of hard code function name (`pr#16037 <https://github.com/ceph/ceph/pull/16037>`_, Yanhu Cao)
+* core,mon: mon/MDSMonitor: fix segv when multiple MDSs raise same alert (`pr#16302 <https://github.com/ceph/ceph/pull/16302>`_, Sage Weil)
+* core,mon: mon/MgrStatMonitor: avoid dup health warnings during luminous upgrade (`issue#20435 <http://tracker.ceph.com/issues/20435>`_, `pr#15986 <https://github.com/ceph/ceph/pull/15986>`_, Sage Weil)
+* core,mon: mon, osd: misc fixes (`pr#16078 <https://github.com/ceph/ceph/pull/16078>`_, xie xingguo)
+* core: mon, osd: misc fixes and cleanups (`pr#16160 <https://github.com/ceph/ceph/pull/16160>`_, xie xingguo)
+* core: mon/OSDMonitor: _apply_remap -> _apply_upmap; less code redundancy (`pr#15846 <https://github.com/ceph/ceph/pull/15846>`_, xie xingguo)
+* core: mon/OSDMonitor: do not allow crush device classes until luminous (`pr#16188 <https://github.com/ceph/ceph/pull/16188>`_, Sage Weil)
+* core: osd/ECTransaction: cleanup the redundant check which works in overwrite IO context (`pr#15765 <https://github.com/ceph/ceph/pull/15765>`_, tang.jin)
+* core: osd/filestore: Revert "os/filestore: move ondisk in front (`issue#20524 <http://tracker.ceph.com/issues/20524>`_, `pr#16156 <https://github.com/ceph/ceph/pull/16156>`_, Kefu Chai)
+* core: osd/PG: Add two new mClock implementations of the PG sharded operator queue (`pr#14997 <https://github.com/ceph/ceph/pull/14997>`_, J. Eric Ivancich)
+* core: osd/PG: set clean when last_epoch_clean is updated (`issue#19023 <http://tracker.ceph.com/issues/19023>`_, `pr#15555 <https://github.com/ceph/ceph/pull/15555>`_, Samuel Just)
+* core: osd/PrimaryLogPG solve cache tier osd high memory consumption (`issue#20464 <http://tracker.ceph.com/issues/20464>`_, `pr#16011 <https://github.com/ceph/ceph/pull/16011>`_, Peng Xie)
+* core: osd/ReplicatedBackend: reset thread heartbeat after every omap entry … (`issue#20375 <http://tracker.ceph.com/issues/20375>`_, `pr#15823 <https://github.com/ceph/ceph/pull/15823>`_, Josh Durgin)
+* core: os/filestore: call committed_thru when no journal entries are replayed (`pr#15781 <https://github.com/ceph/ceph/pull/15781>`_, Kuan-Kai Chiu)
+* core: os/filestore: do not free event if not added (`pr#16235 <https://github.com/ceph/ceph/pull/16235>`_, Kefu Chai)
+* core: os/filestore: Exclude BTRFS on FreeBSD (`pr#16171 <https://github.com/ceph/ceph/pull/16171>`_, Willem Jan Withagen)
+* core: os/filestore/FileJournal: FileJournal::open() close journal file before return error (`issue#20504 <http://tracker.ceph.com/issues/20504>`_, `pr#16120 <https://github.com/ceph/ceph/pull/16120>`_, Yang Honggang)
+* core: os/filestore/FileStore.cc: remove a redundant judgement when get max latency (`pr#15961 <https://github.com/ceph/ceph/pull/15961>`_, Jianpeng Ma)
+* core: os/filestore: require experimental flag for btrfs (`pr#16086 <https://github.com/ceph/ceph/pull/16086>`_, Sage Weil)
+* core,performance: os/filestore/HashIndex: randomize split threshold by a configurable amount (`issue#15835 <http://tracker.ceph.com/issues/15835>`_, `pr#15689 <https://github.com/ceph/ceph/pull/15689>`_, Josh Durgin)
+* core,performance: os/filestore: queue ondisk completion before apply work (`pr#13918 <https://github.com/ceph/ceph/pull/13918>`_, Pan Liu)
+* core,performance: src/OSD: add more useful perf counters for performance tuning (`pr#15915 <https://github.com/ceph/ceph/pull/15915>`_, Pan Liu)
+* core,rbd: mon,osd: do not create rbd pool by default (`pr#15894 <https://github.com/ceph/ceph/pull/15894>`_, Greg Farnum, Sage Weil, David Zafman)
+* core: src/vstart.sh: kill dead upmap option (`pr#15848 <https://github.com/ceph/ceph/pull/15848>`_, xie xingguo)
+* core:" Stringify needs access to << before reference" src/include/stringify.h (`pr#16334 <https://github.com/ceph/ceph/pull/16334>`_, Willem Jan Withagen)
+* core,tests: do all valgrind runs on centos (`issue#20360 <http://tracker.ceph.com/issues/20360>`_, `issue#18126 <http://tracker.ceph.com/issues/18126>`_, `pr#16046 <https://github.com/ceph/ceph/pull/16046>`_, Sage Weil)
+* core,tests: qa/objectstore/filestore-btrfs: test btrfs on trusty only (`issue#20169 <http://tracker.ceph.com/issues/20169>`_, `pr#15814 <https://github.com/ceph/ceph/pull/15814>`_, Sage Weil)
+* core,tests: qa: stop testing btrfs (`issue#20169 <http://tracker.ceph.com/issues/20169>`_, `pr#16044 <https://github.com/ceph/ceph/pull/16044>`_, Sage Weil)
+* core,tests: qa/suites/powercycle/osd/tasks/radosbench: consume less space (`issue#20302 <http://tracker.ceph.com/issues/20302>`_, `pr#15821 <https://github.com/ceph/ceph/pull/15821>`_, Sage Weil)
+* core,tests: qa/suites/rados/singleton/all/reg11184: whitelist health warnings (`pr#16306 <https://github.com/ceph/ceph/pull/16306>`_, Sage Weil)
+* core,tests: qa/suites/rados/thrash/workload/\*: enable rados.py cache tiering ops (`issue#11793 <http://tracker.ceph.com/issues/11793>`_, `pr#16244 <https://github.com/ceph/ceph/pull/16244>`_, Sage Weil)
+* core,tests: qa/tasks/ceph_manager: wait longer for pg stats to flush (`pr#16322 <https://github.com/ceph/ceph/pull/16322>`_, Sage Weil)
+* core,tests: qa/tasks/ceph.py: no osd id to 'osd create' command (`issue#20548 <http://tracker.ceph.com/issues/20548>`_, `pr#16233 <https://github.com/ceph/ceph/pull/16233>`_, Sage Weil)
+* core,tests: qa/tasks/ceph: simplify ceph deployment slightly (`pr#15853 <https://github.com/ceph/ceph/pull/15853>`_, Sage Weil)
+* core,tests: qa/tasks/dump_stuck: fix for active+clean+remapped (`issue#20431 <http://tracker.ceph.com/issues/20431>`_, `pr#15955 <https://github.com/ceph/ceph/pull/15955>`_, Sage Weil)
+* core,tests: qa/tasks/radosbench: longer timeout (`pr#16213 <https://github.com/ceph/ceph/pull/16213>`_, Sage Weil)
+* crush: silence warning from -Woverflow (`pr#16329 <https://github.com/ceph/ceph/pull/16329>`_, Jos Collin)
+* doc: dev: add notes on PR make check validation test (`pr#16079 <https://github.com/ceph/ceph/pull/16079>`_, Nathan Cutler)
+* doc: doc/mgr/dashboard: update dashboard docs to reflect new defaults (`pr#16241 <https://github.com/ceph/ceph/pull/16241>`_, Sage Weil)
+* doc: doc/rados.8: add offset option for put command (`pr#16155 <https://github.com/ceph/ceph/pull/16155>`_, Jianpeng Ma)
+* doc: doc/release-notes: add Images creation timestamp note (`pr#15963 <https://github.com/ceph/ceph/pull/15963>`_, clove)
+* doc: doc/release-notes: fix ceph-deploy command (`pr#15987 <https://github.com/ceph/ceph/pull/15987>`_, Sage Weil)
+* doc: doc/release-notes: Luminous release notes typo fixes  "ceph config-key ls"->"ceph config-key list" (`pr#16330 <https://github.com/ceph/ceph/pull/16330>`_, scienceluo)
+* doc: doc/release-notes: Luminous release notes typo fixes (`pr#16338 <https://github.com/ceph/ceph/pull/16338>`_, Luo Kexue)
+* doc: doc/release-notes: update luminous notes (`pr#15851 <https://github.com/ceph/ceph/pull/15851>`_, Sage Weil)
+* doc: doc/releases: Update releases from Feb 2017 to July 2017 (`pr#16303 <https://github.com/ceph/ceph/pull/16303>`_, Bryan Stillwell)
+* doc: docs: mgr dashboard (`pr#15920 <https://github.com/ceph/ceph/pull/15920>`_, Wido den Hollander)
+* doc: fix link for ceph-mgr cephx authorization (`pr#16246 <https://github.com/ceph/ceph/pull/16246>`_, Greg Farnum)
+* doc: Jewel v10.2.8 release notes (`pr#16274 <https://github.com/ceph/ceph/pull/16274>`_, Nathan Cutler)
+* doc: Jewel v10.2.9 release notes (`pr#16318 <https://github.com/ceph/ceph/pull/16318>`_, Nathan Cutler)
+* doc: kill sphinx warnings (`pr#16198 <https://github.com/ceph/ceph/pull/16198>`_, Kefu Chai)
+* doc: Luminous release notes typo fixes (`pr#15899 <https://github.com/ceph/ceph/pull/15899>`_, Abhishek Lekshmanan)
+* doc: mailmap: add Myoungwon Oh's mailmap and affiliation (`pr#15934 <https://github.com/ceph/ceph/pull/15934>`_, Myoungwon Oh)
+* doc: mailmap, organizationmap: add affiliation for Tushar Gohad (`pr#16081 <https://github.com/ceph/ceph/pull/16081>`_, Tushar Gohad)
+* doc:  .mailmap, .organizationmap: Update Fan Yang information and affiliation (`pr#16067 <https://github.com/ceph/ceph/pull/16067>`_, Fan Yang)
+* doc: .mailmap, .organizationmap: Update Song Weibin information and affiliation (`pr#16311 <https://github.com/ceph/ceph/pull/16311>`_, songweibin)
+* doc: mgr/restful: bind to :: and update docs (`pr#16267 <https://github.com/ceph/ceph/pull/16267>`_, Sage Weil)
+* doc: update intro, quick start docs (`pr#16224 <https://github.com/ceph/ceph/pull/16224>`_, Sage Weil)
+* doc: v12.1.0 release notes notable changes addition again (`pr#15857 <https://github.com/ceph/ceph/pull/15857>`_, Abhishek Lekshmanan)
+* librados: add log channel to rados_monitor_log2 callback (`pr#15926 <https://github.com/ceph/ceph/pull/15926>`_, Sage Weil)
+* librados: redirect balanced reads to acting primary when targeting object isn't recovered (`issue#17968 <http://tracker.ceph.com/issues/17968>`_, `pr#15489 <https://github.com/ceph/ceph/pull/15489>`_, Xuehan Xu)
+* librbd: fail IO request when exclusive lock cannot be obtained (`pr#15860 <https://github.com/ceph/ceph/pull/15860>`_, Jason Dillaman)
+* mgr: clean up daemon start process (`issue#20383 <http://tracker.ceph.com/issues/20383>`_, `pr#16020 <https://github.com/ceph/ceph/pull/16020>`_, John Spray)
+* mgr: clean up fsstatus module (`pr#15925 <https://github.com/ceph/ceph/pull/15925>`_, John Spray)
+* mgr: cluster log message on plugin load error (`pr#15927 <https://github.com/ceph/ceph/pull/15927>`_, John Spray)
+* mgr: dashboard improvements (`pr#16043 <https://github.com/ceph/ceph/pull/16043>`_, John Spray)
+* mgr: drop repeated log info. and unnecessary write permission (`pr#15896 <https://github.com/ceph/ceph/pull/15896>`_, Yan Jun)
+* mgr: enable ceph_send_command() to send pg command (`pr#15865 <https://github.com/ceph/ceph/pull/15865>`_, Kefu Chai)
+* mgr: increase debug level for ticks 0 -> 10 (`pr#16301 <https://github.com/ceph/ceph/pull/16301>`_, Dan Mick)
+* mgr: mgr/ClusterState: do not mangle PGMap outside of Incremental (`issue#20208 <http://tracker.ceph.com/issues/20208>`_, `pr#16262 <https://github.com/ceph/ceph/pull/16262>`_, Sage Weil)
+* mgr: mgr/dashboard: add OSD list view (`pr#16373 <https://github.com/ceph/ceph/pull/16373>`_, John Spray)
+* mgr: mon/mgr: add detail error infomation (`pr#16048 <https://github.com/ceph/ceph/pull/16048>`_, Yan Jun)
+* mgr,mon: mgr,mon: debug init and mgrdigest subscriptions (`issue#20633 <http://tracker.ceph.com/issues/20633>`_, `pr#16351 <https://github.com/ceph/ceph/pull/16351>`_, Sage Weil)
+* mgr: pybind/mgr/dashboard: bind to :: by default (`pr#16223 <https://github.com/ceph/ceph/pull/16223>`_, Sage Weil)
+* mgr,rbd: pybind/mgr/dashboard: initial block integration (`pr#15521 <https://github.com/ceph/ceph/pull/15521>`_, Jason Dillaman)
+* mgr: Zabbix monitoring module (`pr#16019 <https://github.com/ceph/ceph/pull/16019>`_, Wido den Hollander)
+* mon: add support public_bind_addr option (`pr#16189 <https://github.com/ceph/ceph/pull/16189>`_, Bassam Tabbara)
+* mon: a few more upmap (and other) fixes (`pr#16239 <https://github.com/ceph/ceph/pull/16239>`_, xie xingguo)
+* mon: clean up in ceph_mon.cc (`pr#14102 <https://github.com/ceph/ceph/pull/14102>`_, huanwen ren)
+* mon: collect mon metdata as part of the election (`issue#20434 <http://tracker.ceph.com/issues/20434>`_, `pr#16148 <https://github.com/ceph/ceph/pull/16148>`_, Sage Weil)
+* mon: debug session feature tracking (`issue#20475 <http://tracker.ceph.com/issues/20475>`_, `pr#16128 <https://github.com/ceph/ceph/pull/16128>`_, Sage Weil)
+* mon: Division by zero in PGMapDigest::dump_pool_stats_full() (`pr#15901 <https://github.com/ceph/ceph/pull/15901>`_, Jos Collin)
+* mon: do crushtool test with fork and timeout, but w/o exec of crushtool (`issue#19964 <http://tracker.ceph.com/issues/19964>`_, `pr#16025 <https://github.com/ceph/ceph/pull/16025>`_, Sage Weil)
+* mon: Filter `log last` output by severity and channel (`pr#15924 <https://github.com/ceph/ceph/pull/15924>`_, John Spray)
+* mon: fix hang on deprecated/removed 'pg set_\*full_ratio' commands (`issue#20600 <http://tracker.ceph.com/issues/20600>`_, `pr#16300 <https://github.com/ceph/ceph/pull/16300>`_, Sage Weil)
+* mon: fix kvstore type in mon compact command (`pr#15954 <https://github.com/ceph/ceph/pull/15954>`_, liuchang0812)
+* mon: Fix status output warning for mon_warn_osd_usage_min_max_delta (`issue#20544 <http://tracker.ceph.com/issues/20544>`_, `pr#16220 <https://github.com/ceph/ceph/pull/16220>`_, David Zafman)
+* mon: handle cases where store->get() may return error (`issue#19601 <http://tracker.ceph.com/issues/19601>`_, `pr#14678 <https://github.com/ceph/ceph/pull/14678>`_, Jos Collin)
+* mon: include device class in tree view; hide shadow hierarchy (`pr#16016 <https://github.com/ceph/ceph/pull/16016>`_, Sage Weil)
+* mon: maintain the "cluster" PerfCounters when using ceph-mgr (`issue#20562 <http://tracker.ceph.com/issues/20562>`_, `pr#16249 <https://github.com/ceph/ceph/pull/16249>`_, Greg Farnum)
+* mon: mon,crush: create crush rules using device classes for replicated and ec pools via cli (`pr#16027 <https://github.com/ceph/ceph/pull/16027>`_, Sage Weil)
+* mon: mon/MgrStatMonitor: do not crash on luminous dev version upgrades (`pr#16287 <https://github.com/ceph/ceph/pull/16287>`_, Sage Weil)
+* mon: mon/Monitor: recreate mon session if features changed (`issue#20433 <http://tracker.ceph.com/issues/20433>`_, `pr#16230 <https://github.com/ceph/ceph/pull/16230>`_, Joao Eduardo Luis)
+* mon: mon/OSDMonitor: a couple of upmap and other fixes (`pr#15917 <https://github.com/ceph/ceph/pull/15917>`_, xie xingguo)
+* mon: mon/OSDMonitor: guard 'osd crush set-device-class' (`pr#16217 <https://github.com/ceph/ceph/pull/16217>`_, Sage Weil)
+* mon: mon/OSDMonitor: "osd crush class rename" support (`pr#15875 <https://github.com/ceph/ceph/pull/15875>`_, xie xingguo)
+* mon: mon/OSDMonitor: two pool opts related fix (`pr#15968 <https://github.com/ceph/ceph/pull/15968>`_, xie xingguo)
+* mon: mon/PaxosService: use __func__ instead of hard code function name (`pr#15863 <https://github.com/ceph/ceph/pull/15863>`_, Yanhu Cao)
+* mon: revamp health check/warning system (`pr#15643 <https://github.com/ceph/ceph/pull/15643>`_, John Spray, Sage Weil)
+* mon: show the leader info on mon stat command (`pr#14178 <https://github.com/ceph/ceph/pull/14178>`_, song baisen)
+* mon: skip crush smoke test when running under valgrind (`issue#20602 <http://tracker.ceph.com/issues/20602>`_, `pr#16346 <https://github.com/ceph/ceph/pull/16346>`_, Sage Weil)
+* mon,tests: qa/suites: add test exercising workunits/mon/auth_caps.sh (`pr#15754 <https://github.com/ceph/ceph/pull/15754>`_, Kefu Chai)
+* msg: make listen backlog an option, increase from 128 to 512 (`issue#20330 <http://tracker.ceph.com/issues/20330>`_, `pr#15743 <https://github.com/ceph/ceph/pull/15743>`_, Haomai Wang)
+* msg: msg/async: increase worker reference with local listen table enabled backend (`issue#20390 <http://tracker.ceph.com/issues/20390>`_, `pr#15897 <https://github.com/ceph/ceph/pull/15897>`_, Haomai Wang)
+* msg: msg/async/rdma: Data path fixes (`pr#15903 <https://github.com/ceph/ceph/pull/15903>`_, Adir lev)
+* msg: msg/async/rdma: register buffer as continuous (`pr#15967 <https://github.com/ceph/ceph/pull/15967>`_, Adir Lev)
+* msg: msg/async/rdma: remove assert from ibv_dealloc_pd in ProtectionDomain (`pr#15832 <https://github.com/ceph/ceph/pull/15832>`_, DanielBar-On)
+* msg: msg/MOSDOpReply: fix missing trace decode (`pr#15999 <https://github.com/ceph/ceph/pull/15999>`_, Yan Jun)
+* msg: QueueStrategy::wait() joins all threads (`issue#20534 <http://tracker.ceph.com/issues/20534>`_, `pr#16194 <https://github.com/ceph/ceph/pull/16194>`_, Casey Bodley)
+* msg: Revert "msg/async: increase worker reference with local listen table enabled backend" (`issue#20603 <http://tracker.ceph.com/issues/20603>`_, `pr#16323 <https://github.com/ceph/ceph/pull/16323>`_, Haomai Wang)
+* osd: Check for and automatically repair object info soid during scrub (`issue#20471 <http://tracker.ceph.com/issues/20471>`_, `pr#16052 <https://github.com/ceph/ceph/pull/16052>`_, David Zafman)
+* osd: check queue_transaction return value (`pr#15873 <https://github.com/ceph/ceph/pull/15873>`_, zhanglei)
+* osd: clear_queued_recovery() in on_shutdown() (`issue#20432 <http://tracker.ceph.com/issues/20432>`_, `pr#16093 <https://github.com/ceph/ceph/pull/16093>`_, Kefu Chai)
+* osd: compact osd feature (`issue#19592 <http://tracker.ceph.com/issues/19592>`_, `pr#16045 <https://github.com/ceph/ceph/pull/16045>`_, liuchang0812)
+* osd: Corrupt objects stop snaptrim and mark pg snaptrim_error (`issue#13837 <http://tracker.ceph.com/issues/13837>`_, `pr#15635 <https://github.com/ceph/ceph/pull/15635>`_, David Zafman)
+* osd: dump the field name of object watchers  and cleanups (`pr#15946 <https://github.com/ceph/ceph/pull/15946>`_, Yan Jun)
+* osd: Execute crush_location_hook when configured in ceph.conf (`pr#15951 <https://github.com/ceph/ceph/pull/15951>`_, Wido den Hollander)
+* osd: On EIO from read recover the primary replica from another copy (`issue#18165 <http://tracker.ceph.com/issues/18165>`_, `pr#14760 <https://github.com/ceph/ceph/pull/14760>`_, David Zafman)
+* osd: osd does not using MPing Messages,do not include unused include (`pr#15833 <https://github.com/ceph/ceph/pull/15833>`_, linbing)
+* osd: Preserve OSDOp information for historic ops (`pr#15265 <https://github.com/ceph/ceph/pull/15265>`_, Guo-Fu Tseng)
+* osd: restart boot process if waiting for luminous mons (`issue#20631 <http://tracker.ceph.com/issues/20631>`_, `pr#16341 <https://github.com/ceph/ceph/pull/16341>`_, Sage Weil)
+* osd: unlock sdata_op_ordering_lock with sdata_lock hold to avoid miss… (`pr#15891 <https://github.com/ceph/ceph/pull/15891>`_, Ming Lin)
+* pybind: ceph.in: Check return value when connecting (`pr#16130 <https://github.com/ceph/ceph/pull/16130>`_, Douglas Fuller)
+* pybind: ceph-rest-api: Various REST API fixes (`pr#15910 <https://github.com/ceph/ceph/pull/15910>`_, Wido den Hollander)
+* pybind: pybind/mgr/dashboard: fix get kernel_version error (`pr#16094 <https://github.com/ceph/ceph/pull/16094>`_, Peng Zhang)
+* pybind: restore original API for backwards compatibility (`issue#20421 <http://tracker.ceph.com/issues/20421>`_, `pr#15932 <https://github.com/ceph/ceph/pull/15932>`_, Jason Dillaman)
+* rbd: do not attempt to load key if auth is disabled (`issue#19035 <http://tracker.ceph.com/issues/19035>`_, `pr#16024 <https://github.com/ceph/ceph/pull/16024>`_, Jason Dillaman)
+* rbd-mirror: ignore permission errors on rbd_mirroring object (`issue#20571 <http://tracker.ceph.com/issues/20571>`_, `pr#16264 <https://github.com/ceph/ceph/pull/16264>`_, Jason Dillaman)
+* rbd,tests: qa/suites/rbd: restrict python memcheck validation to CentOS (`pr#15923 <https://github.com/ceph/ceph/pull/15923>`_, Jason Dillaman)
+* rbd,tests: qa/tasks: rbd-mirror daemon not properly run in foreground mode (`issue#20630 <http://tracker.ceph.com/issues/20630>`_, `pr#16340 <https://github.com/ceph/ceph/pull/16340>`_, Jason Dillaman)
+* rbd,tests: test: fix compile warning in ceph_test_cls_rbd (`pr#15919 <https://github.com/ceph/ceph/pull/15919>`_, Jason Dillaman)
+* rbd,tests: test: fix failing rbd devstack teuthology test (`pr#15956 <https://github.com/ceph/ceph/pull/15956>`_, Jason Dillaman)
+* rbd,tools: tools/rbd_mirror: initialize non-static class member m_do_resync in ImageReplayer (`pr#15889 <https://github.com/ceph/ceph/pull/15889>`_, Jos Collin)
+* rbd,tools: tools/rbd_nbd: add --version show support (`pr#16254 <https://github.com/ceph/ceph/pull/16254>`_, Jin Cai)
+* rgw: add a new error code for non-existed subuser (`pr#16095 <https://github.com/ceph/ceph/pull/16095>`_, Zhao Chao)
+* rgw: add a new error code for non-existed user (`issue#20468 <http://tracker.ceph.com/issues/20468>`_, `pr#16033 <https://github.com/ceph/ceph/pull/16033>`_, Zhao Chao)
+* rgw: add missing RGWPeriod::reflect() based on new atomic update_latest_epoch() (`issue#19816 <http://tracker.ceph.com/issues/19816>`_, `issue#19817 <http://tracker.ceph.com/issues/19817>`_, `pr#14915 <https://github.com/ceph/ceph/pull/14915>`_, Casey Bodley)
+* rgw: auto reshard old buckets (`pr#15665 <https://github.com/ceph/ceph/pull/15665>`_, Orit Wasserman)
+* rgw: cleanup rgw-admin duplicated judge during OLH GET/READLOG (`pr#15700 <https://github.com/ceph/ceph/pull/15700>`_, Jiaying Ren)
+* rgw: cls: ceph::timespan tag_timeout wrong units (`issue#20380 <http://tracker.ceph.com/issues/20380>`_, `pr#16026 <https://github.com/ceph/ceph/pull/16026>`_, Matt Benjamin)
+* rgw: Compress crash bug refactor (`issue#20098 <http://tracker.ceph.com/issues/20098>`_, `pr#15569 <https://github.com/ceph/ceph/pull/15569>`_, Adam Kupczyk)
+* rgw: Correcting the condition in ceph_assert while parsing an AWS Principal (`pr#15997 <https://github.com/ceph/ceph/pull/15997>`_, Pritha Srivastava)
+* rgw: Do not fetch bucket stats by default upon bucket listing (`issue#20377 <http://tracker.ceph.com/issues/20377>`_, `pr#15834 <https://github.com/ceph/ceph/pull/15834>`_, Pavan Rallabhandi)
+* rgw: drop unused function RGWRemoteDataLog::get_shard_info() (`pr#16236 <https://github.com/ceph/ceph/pull/16236>`_, Shasha Lu)
+* rgw: drop unused rgw_pool parameter, local variables and member variable (`pr#16154 <https://github.com/ceph/ceph/pull/16154>`_, Jiaying Ren)
+* rgw: external auth engines of S3 honor rgw_keystone_implicit_tenants (`issue#17779 <http://tracker.ceph.com/issues/17779>`_, `pr#15572 <https://github.com/ceph/ceph/pull/15572>`_, Radoslaw Zarzynski)
+* rgw: Fix a bug that multipart upload may exceed the quota (`issue#19602 <http://tracker.ceph.com/issues/19602>`_, `pr#12010 <https://github.com/ceph/ceph/pull/12010>`_, Zhang Shaowen)
+* rgw: Fix duplicate tag removal during GC (`issue#20107 <http://tracker.ceph.com/issues/20107>`_, `pr#15912 <https://github.com/ceph/ceph/pull/15912>`_, Jens Rosenboom)
+* rgw: fix error handling in get_params() of RGWPostObj_ObjStore_S3 (`pr#15670 <https://github.com/ceph/ceph/pull/15670>`_, Radoslaw Zarzynski)
+* rgw: fix error handling in the link() method of RGWBucket (`issue#20279 <http://tracker.ceph.com/issues/20279>`_, `pr#15669 <https://github.com/ceph/ceph/pull/15669>`_, Radoslaw Zarzynski)
+* rgw: fixes for AWSBrowserUploadAbstractor auth (`issue#20372 <http://tracker.ceph.com/issues/20372>`_, `pr#15882 <https://github.com/ceph/ceph/pull/15882>`_, Radoslaw Zarzynski, Casey Bodley)
+* rgw: fix infinite loop in rest api for log list (`issue#20386 <http://tracker.ceph.com/issues/20386>`_, `pr#15983 <https://github.com/ceph/ceph/pull/15983>`_, xierui, Casey Bodley)
+* rgw: fix leaks with incomplete multiparts (`issue#17164 <http://tracker.ceph.com/issues/17164>`_, `pr#15630 <https://github.com/ceph/ceph/pull/15630>`_, Abhishek Varshney)
+* rgw: fix marker encoding problem (`issue#20463 <http://tracker.ceph.com/issues/20463>`_, `pr#15998 <https://github.com/ceph/ceph/pull/15998>`_, Marcus Watts)
+* rgw: fix memory leak in copy_obj_to_remote_dest (`pr#9974 <https://github.com/ceph/ceph/pull/9974>`_, weiqiaomiao)
+* rgw: fix not initialized vars which cause rgw crash with ec data pool (`issue#20542 <http://tracker.ceph.com/issues/20542>`_, `pr#16177 <https://github.com/ceph/ceph/pull/16177>`_, Aleksei Gutikov)
+* rgw: fix potential null pointer dereference in rgw_admin (`pr#15667 <https://github.com/ceph/ceph/pull/15667>`_, Radoslaw Zarzynski)
+* rgw: fix radosgw-admin data sync run crash (`issue#20423 <http://tracker.ceph.com/issues/20423>`_, `pr#15938 <https://github.com/ceph/ceph/pull/15938>`_, Shasha Lu)
+* rgw: fix s3 object uploads with chunked transfers and v4 signatures (`issue#20447 <http://tracker.ceph.com/issues/20447>`_, `pr#15965 <https://github.com/ceph/ceph/pull/15965>`_, Marcus Watts)
+* rgw: fix wrong error code for expired Swift TempURL's links (`issue#20384 <http://tracker.ceph.com/issues/20384>`_, `pr#15850 <https://github.com/ceph/ceph/pull/15850>`_, Radoslaw Zarzynski)
+* rgw: fix zone did't update realm_id when added to zonegroup (`issue#17995 <http://tracker.ceph.com/issues/17995>`_, `pr#12139 <https://github.com/ceph/ceph/pull/12139>`_, Tianshan Qu)
+* rgw: implement get/put object tags for S3 (`pr#13753 <https://github.com/ceph/ceph/pull/13753>`_, Abhishek Lekshmanan)
+* rgw: /info claims we do support Swift's accounts ACLs (`issue#20394 <http://tracker.ceph.com/issues/20394>`_, `pr#15887 <https://github.com/ceph/ceph/pull/15887>`_, Radoslaw Zarzynski)
+* rgw: initialize non-static class members in ESQueryCompiler (`pr#15884 <https://github.com/ceph/ceph/pull/15884>`_, Jos Collin)
+* rgw: initialize Non-static class member val in ESQueryNodeLeafVal_Int (`pr#15888 <https://github.com/ceph/ceph/pull/15888>`_, Jos Collin)
+* rgw: initialize Non-static class member worker in RGWReshard (`pr#15886 <https://github.com/ceph/ceph/pull/15886>`_, Jos Collin)
+* rgw: Initialize pointer fields (`pr#16021 <https://github.com/ceph/ceph/pull/16021>`_, Jos Collin)
+* rgw: lease_stack: use reset method instead of assignment (`pr#16185 <https://github.com/ceph/ceph/pull/16185>`_, Nathan Cutler)
+* rgw: lock is not released when set sync marker is failed (`issue#18077 <http://tracker.ceph.com/issues/18077>`_, `pr#12197 <https://github.com/ceph/ceph/pull/12197>`_, Zhang Shaowen)
+* rgw: log_meta only for more than one zone (`issue#20357 <http://tracker.ceph.com/issues/20357>`_, `pr#15777 <https://github.com/ceph/ceph/pull/15777>`_, Orit Wasserman, Leo Zhang)
+* rgw: multipart copy-part remove '/' for s3 java sdk request header (`issue#20075 <http://tracker.ceph.com/issues/20075>`_, `pr#15283 <https://github.com/ceph/ceph/pull/15283>`_, root)
+* rgw:multisite: fix RGWRadosRemoveOmapKeysCR and change cn to intrusive_ptr (`issue#20539 <http://tracker.ceph.com/issues/20539>`_, `pr#16197 <https://github.com/ceph/ceph/pull/16197>`_, Shasha Lu)
+* rgw: omit X-Account-Access-Control if there is no grant to serialize (`issue#20395 <http://tracker.ceph.com/issues/20395>`_, `pr#15883 <https://github.com/ceph/ceph/pull/15883>`_, Radoslaw Zarzynski)
+* rgw: radosgw: fix compilation with cryptopp (`pr#15960 <https://github.com/ceph/ceph/pull/15960>`_, Adam Kupczyk)
+* rgw: reject request if decoded URI contains \0 in the middle (`issue#20418 <http://tracker.ceph.com/issues/20418>`_, `pr#15953 <https://github.com/ceph/ceph/pull/15953>`_, Radoslaw Zarzynski)
+* rgw: remove a redundant judgement in rgw_rados.cc:delete_obj (`pr#11124 <https://github.com/ceph/ceph/pull/11124>`_, Zhang Shaowen)
+* rgw: remove the useless output when listing zonegroups (`pr#16331 <https://github.com/ceph/ceph/pull/16331>`_, Zhang Shaowen)
+* rgw: Replace get_zonegroup().is_master_zonegroup() with is_meta_master() in RGWBulkDelete::Deleter::delete_single() (`pr#16062 <https://github.com/ceph/ceph/pull/16062>`_, Fan Yang)
+* rgw: rgw_file: add compression interop to RGW NFS (`issue#20462 <http://tracker.ceph.com/issues/20462>`_, `pr#15989 <https://github.com/ceph/ceph/pull/15989>`_, Matt Benjamin)
+* rgw: rgw_file:  add service map registration (`pr#16251 <https://github.com/ceph/ceph/pull/16251>`_, Matt Benjamin)
+* rgw: rgw_file: avoid a recursive lane lock in LRU drain (`issue#20374 <http://tracker.ceph.com/issues/20374>`_, `pr#15819 <https://github.com/ceph/ceph/pull/15819>`_, Matt Benjamin)
+* rgw: rgw_file: fix misuse of make_key_name before make_fhk (`pr#15108 <https://github.com/ceph/ceph/pull/15108>`_, Gui Hecheng)
+* rgw: rgw_file skip policy read for virtual components (`pr#16034 <https://github.com/ceph/ceph/pull/16034>`_, Gui Hecheng)
+* rgw: rgw:fix s3 aws v2 signature priority between header['X-Amz-Date'] and header['Date'] (`issue#20176 <http://tracker.ceph.com/issues/20176>`_, `pr#15467 <https://github.com/ceph/ceph/pull/15467>`_, yuliyang)
+* rgw:  rgw: fix the subdir without slash of s3 website url (`issue#20307 <http://tracker.ceph.com/issues/20307>`_, `pr#15703 <https://github.com/ceph/ceph/pull/15703>`_, liuhong)
+* rgw: rgw/rgw_frontend.h: Return negative value for empty uid in RGWLoadGenFrontend::init() (`pr#16204 <https://github.com/ceph/ceph/pull/16204>`_, jimifm)
+* rgw: rgw/rgw_op: fix whitespace and indentation warning (`pr#15928 <https://github.com/ceph/ceph/pull/15928>`_, Sage Weil)
+* rgw: rgw/rgw_rados: Remove duplicate calls in RGWRados::finalize() (`pr#15281 <https://github.com/ceph/ceph/pull/15281>`_, jimifm)
+* rgw: rgw,test: fix rgw placement rule pool config option (`pr#16084 <https://github.com/ceph/ceph/pull/16084>`_, Jiaying Ren)
+* rgw: S3 lifecycle now supports expiration date (`pr#15807 <https://github.com/ceph/ceph/pull/15807>`_, Zhang Shaowen)
+* rgw: silence compile warning from -Wmaybe-uninitialized (`pr#15996 <https://github.com/ceph/ceph/pull/15996>`_, Jiaying Ren)
+* rgw: silence warning from -Wmaybe-uninitialized (`pr#15949 <https://github.com/ceph/ceph/pull/15949>`_, Jos Collin)
+* rgw,tests: qa/tasks: S3A hadoop task to test s3a with Ceph (`pr#14624 <https://github.com/ceph/ceph/pull/14624>`_, Vasu Kulkarni)
+* rgw,tests: vstart: remove rgw_enable_static_website (`pr#15856 <https://github.com/ceph/ceph/pull/15856>`_, Casey Bodley)
+* rgw: Uninitialized member in LCRule (`pr#15827 <https://github.com/ceph/ceph/pull/15827>`_, Jos Collin)
+* rgw: use 64-bit offsets for compression (`issue#20231 <http://tracker.ceph.com/issues/20231>`_, `pr#15656 <https://github.com/ceph/ceph/pull/15656>`_, Adam Kupczyk, fang yuxiang)
+* rgw: use uncompressed size for range_to_ofs() in slo/dlo (`pr#15931 <https://github.com/ceph/ceph/pull/15931>`_, Casey Bodley)
+* rgw: using RGW_OBJ_NS_MULTIPART in check_bad_index_multipart (`pr#15774 <https://github.com/ceph/ceph/pull/15774>`_, Shasha Lu)
+* rgw: verify md5 in post obj (`issue#19739 <http://tracker.ceph.com/issues/19739>`_, `pr#14961 <https://github.com/ceph/ceph/pull/14961>`_, Yehuda Sadeh)
+* rgw: Wip rgw fix prefix list (`issue#19432 <http://tracker.ceph.com/issues/19432>`_, `pr#15916 <https://github.com/ceph/ceph/pull/15916>`_, Giovani Rinaldi, Orit Wasserman)
+* tests: ceph-disk: use communicate() instead of wait() for output (`pr#16347 <https://github.com/ceph/ceph/pull/16347>`_, Kefu Chai)
+* tests: cls_lock: move lock_info_t definition to cls_lock_types.h (`pr#16091 <https://github.com/ceph/ceph/pull/16091>`_, runsisi)
+* tests: fix rados/upgrade/jewel-x-singleton and make workunit task handle repo URLs not ending in ".git" (`issue#20554 <http://tracker.ceph.com/issues/20554>`_, `issue#20368 <http://tracker.ceph.com/issues/20368>`_, `pr#16228 <https://github.com/ceph/ceph/pull/16228>`_, Nathan Cutler, Sage Weil)
+* tests: mgr,os,test: kill clang analyzer warnings (`pr#16227 <https://github.com/ceph/ceph/pull/16227>`_, Kefu Chai)
+* tests: move swift.py task from teuthology to ceph, phase one (master) (`issue#20392 <http://tracker.ceph.com/issues/20392>`_, `pr#15859 <https://github.com/ceph/ceph/pull/15859>`_, Nathan Cutler, Sage Weil, Warren Usui, Greg Farnum, Ali Maredia, Tommi Virtanen, Zack Cerza, Sam Lang, Yehuda Sadeh, Joe Buck, Josh Durgin)
+* tests: [qa/ceph-deploy]:  run create mgr nodes as well (`pr#16216 <https://github.com/ceph/ceph/pull/16216>`_, Vasu Kulkarni)
+* tests: qa: do not restrict valgrind runs to centos (`issue#18126 <http://tracker.ceph.com/issues/18126>`_, `pr#15893 <https://github.com/ceph/ceph/pull/15893>`_, Greg Farnum)
+* tests: qa/suites/rados/singleton/all/mon-auth-caps: more osds so we can go clean (`pr#16225 <https://github.com/ceph/ceph/pull/16225>`_, Sage Weil)
+* tests: qa/suites/upgrade/hammer-jewel-x: add luminous.yaml (`issue#20342 <http://tracker.ceph.com/issues/20342>`_, `pr#15764 <https://github.com/ceph/ceph/pull/15764>`_, Kefu Chai)
+* tests: qa/tasks/ceph: don't hard-code cluster name when copying fsid (`pr#16212 <https://github.com/ceph/ceph/pull/16212>`_, Jason Dillaman)
+* tests: qa/tasks/ceph: should be "Waiting for all PGs", not "all osds" (`pr#16122 <https://github.com/ceph/ceph/pull/16122>`_, Kefu Chai)
+* tests: qa/tasks/radosbench: increase timeout (`pr#15885 <https://github.com/ceph/ceph/pull/15885>`_, Sage Weil)
+* tests: qa/workunits/ceph-helpers: enable experimental features for osd (`pr#16319 <https://github.com/ceph/ceph/pull/16319>`_, Kefu Chai)
+* tests: qa/workunits/ceph-helpers: test wait_for_health_ok differently (`pr#16317 <https://github.com/ceph/ceph/pull/16317>`_, Kefu Chai)
+* tests: rgw.py: put client roles in a separate list (`issue#20417 <http://tracker.ceph.com/issues/20417>`_, `pr#15913 <https://github.com/ceph/ceph/pull/15913>`_, Nathan Cutler)
+* tests: rgw/singleton: drop duplicate filestore-xfs.yaml (`pr#15959 <https://github.com/ceph/ceph/pull/15959>`_, Nathan Cutler)
+* tests: test: Division by zero in Legacy::encode_n() (`pr#15902 <https://github.com/ceph/ceph/pull/15902>`_, Jos Collin)
+* tests: test/fio: print all perfcounters rather than objectstore itself (`pr#16339 <https://github.com/ceph/ceph/pull/16339>`_, Jianpeng Ma)
+* tests: test/fio: remove experimental option for bluestore & rocksdb (`pr#16263 <https://github.com/ceph/ceph/pull/16263>`_, Pan Liu)
+* tests: test: Fix reg11184 test to remove extraneous pg (`pr#16265 <https://github.com/ceph/ceph/pull/16265>`_, David Zafman)
+* tests: test/msgr: fixed the hang issue for perf_msg_client (`pr#16358 <https://github.com/ceph/ceph/pull/16358>`_, Pan Liu)
+* tests: test/osd/osd-scrub-repair.sh: disable ec_overwrite tests on FreeBSD (`pr#15445 <https://github.com/ceph/ceph/pull/15445>`_, Willem Jan Withagen)
+* tests: test/osd/osd-scrub-repair.sh: Fix diff options on FreeBSD (`pr#15914 <https://github.com/ceph/ceph/pull/15914>`_, Willem Jan Withagen)
+* tests,tools: test, ceph-osdomap-tool: kill clang warnings (`pr#15905 <https://github.com/ceph/ceph/pull/15905>`_, Kefu Chai)
+* tools: ceph-conf: fix typo in usage: 'mon add' should be 'mon addr' (`pr#15935 <https://github.com/ceph/ceph/pull/15935>`_, Peng Zhang)
+* tools: ceph-create-keys: add an argument to override default 10-minute timeout (`pr#16049 <https://github.com/ceph/ceph/pull/16049>`_, Douglas Fuller)
+* tools: ceph.in: filter out audit from ceph -w (`pr#16345 <https://github.com/ceph/ceph/pull/16345>`_, John Spray)
+* tools: ceph-release-notes: escape asterisks not for inline emphasis (`pr#16199 <https://github.com/ceph/ceph/pull/16199>`_, Kefu Chai)
+* tools: ceph-release-notes: handle an edge case (`pr#16277 <https://github.com/ceph/ceph/pull/16277>`_, Nathan Cutler)
+* tools: Cleanup dead code in ceph-objectstore-tool (`pr#15812 <https://github.com/ceph/ceph/pull/15812>`_, David Zafman)
+* tools: libradosstriper: fix MultiAioCompletion leaks on failure (`pr#15471 <https://github.com/ceph/ceph/pull/15471>`_, Kefu Chai)
+* tools: tools/rados: some cleanups (`pr#16147 <https://github.com/ceph/ceph/pull/16147>`_, Yan Jun)
+* tools: vstart.sh: bind restful, dashboard to ::, not 127.0.0.1 (`pr#16349 <https://github.com/ceph/ceph/pull/16349>`_, Sage Weil)
+
+
+v12.1.0 Luminous (RC)
+=====================
+
+This is the first release candidate for Luminous, the next long term
+stable release.
+
+
+
+
 Notable Changes since Kraken
 ----------------------------
 
index f86d7567431c0d293d392b3770cc6e9f6472728b..50b7f307f6ef2828e9cca7474913ed1bfeb227c2 100644 (file)
@@ -101,6 +101,7 @@ configuration details, perform the following steps using ``ceph-deploy``.
    - ``ceph.bootstrap-osd.keyring``
    - ``ceph.bootstrap-mds.keyring``
    - ``ceph.bootstrap-rgw.keyring``
+   - ``ceph.bootstrap-rbd.keyring``
 
 .. note:: If this process fails with a message similar to "Unable to
    find /etc/ceph/ceph.client.admin.keyring", please ensure that the
index debc0fc206f874b04e5355f0739063bb20b3539e..5534fa9e572e934e05f5aa0ff331a0d693762e62 100644 (file)
@@ -89,7 +89,7 @@ See `block devices`_ for additional details.
 
 .. _Storage Cluster Quick Start: ../quick-ceph-deploy
 .. _create a pool: ../../rados/operations/pools/#create-a-pool
-.. _block devices: ../../rbd/rbd
+.. _block devices: ../../rbd
 .. _FAQ: http://wiki.ceph.com/How_Can_I_Give_Ceph_a_Try
 .. _OS Recommendations: ../os-recommendations
 .. _rbdmap manpage: ../../man/8/rbdmap
diff --git a/ceph/qa/cephfs/overrides/whitelist_health.yaml b/ceph/qa/cephfs/overrides/whitelist_health.yaml
new file mode 100644 (file)
index 0000000..ddd8eab
--- /dev/null
@@ -0,0 +1,9 @@
+overrides:
+  ceph:
+    log-whitelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
index a41bd34981cd1d3902e1b8a26ccfa3eef0de0e85..9e090d7ded072a64bf08420c0c4cefa3c255c9a1 100644 (file)
@@ -2,8 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (OSD_DOWN)
-      - (OSD_
+      - \(OSD_DOWN\)
+      - \(OSD_
       - but it is still running
 # MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a'
       - is not responding
index 3b08e287d516584ec4dc30721025611576fa4968..5d5fcca9c950fd9b3adafbf8d3d1910e4317c2cd 100644 (file)
@@ -1,12 +1,12 @@
 roles:
-- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, client.0]
-- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1]
+- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0]
+- [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1]
 openstack:
 - volumes: # attached to each instance
-    count: 3
+    count: 4
     size: 10 # GB
 overrides:
   ceph:
     conf:
       osd:
-        osd shutdown pgref assert: true
\ No newline at end of file
+        osd shutdown pgref assert: true
index cd4f4e8ec2ec902419c43245da85a602698eab32..ddc79a84b60c64ec7bcfd8227fd6953d571e04e2 100644 (file)
@@ -1,13 +1,13 @@
 roles:
-- [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2]
-- [mon.b, mgr.y, osd.3, osd.4, osd.5]
+- [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2, osd.3]
+- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7]
 - [client.0]
 openstack:
 - volumes: # attached to each instance
-    count: 3
+    count: 4
     size: 10 # GB
 overrides:
   ceph:
     conf:
       osd:
-        osd shutdown pgref assert: true
\ No newline at end of file
+        osd shutdown pgref assert: true
diff --git a/ceph/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml b/ceph/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
new file mode 100644 (file)
index 0000000..dfcc616
--- /dev/null
@@ -0,0 +1,25 @@
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 50
+    ec_pool: true
+    write_append_excl: false
+    erasure_code_profile:
+      name: jerasure21profile
+      plugin: jerasure
+      k: 4
+      m: 2
+      technique: reed_sol_van
+      crush-failure-domain: osd
+    op_weights:
+      read: 100
+      write: 0
+      append: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
+      copy_from: 50
+      setattr: 25
+      rmattr: 25
index ee9c9092e6e5528a60e87d908c6691f09c352c62..24696fcd3193592954266db660e3291850c5340e 100755 (executable)
@@ -162,39 +162,21 @@ function TEST_mon_classes() {
     ceph osd crush tree --show-shadow | grep -q '~ccc' || return 1
     ceph osd crush rm-device-class 0 || return 1
     ceph osd tree | grep -q 'aaa' && return 1
-    ceph osd crush dump | grep -q '~aaa' && return 1
-    ceph osd crush tree --show-shadow | grep -q '~aaa' && return 1
-    ceph osd crush class ls | grep -q 'aaa' && return 1
+    ceph osd crush class ls | grep -q 'aaa' && return 1 # class 'aaa' should gone
     ceph osd crush rm-device-class 1 || return 1
     ceph osd tree | grep -q 'bbb' && return 1
-    ceph osd crush dump | grep -q '~bbb' && return 1
-    ceph osd crush tree --show-shadow | grep -q '~bbb' && return 1
-    ceph osd crush class ls | grep -q 'bbb' && return 1
+    ceph osd crush class ls | grep -q 'bbb' && return 1 # class 'bbb' should gone
     ceph osd crush rm-device-class 2 || return 1
     ceph osd tree | grep -q 'ccc' && return 1
-    ceph osd crush dump | grep -q '~ccc' && return 1
-    ceph osd crush tree --show-shadow | grep -q '~ccc' && return 1
-    ceph osd crush class ls | grep -q 'ccc' && return 1
+    ceph osd crush class ls | grep -q 'ccc' && return 1 # class 'ccc' should gone
     ceph osd crush set-device-class asdf all || return 1
     ceph osd tree | grep -q 'asdf' || return 1
     ceph osd crush dump | grep -q '~asdf' || return 1
     ceph osd crush tree --show-shadow | grep -q '~asdf' || return 1
+    ceph osd crush rule create-replicated asdf-rule default host asdf || return 1
     ceph osd crush rm-device-class all || return 1
     ceph osd tree | grep -q 'asdf' && return 1
-    ceph osd crush dump | grep -q '~asdf' && return 1
-    ceph osd crush tree --show-shadow | grep -q '~asdf' && return 1
-
-    # test 'class rm' automatically recycles shadow trees
-    ceph osd crush set-device-class asdf 0 1 2 || return 1
-    ceph osd tree | grep -q 'asdf' || return 1
-    ceph osd crush dump | grep -q '~asdf' || return 1
-    ceph osd crush tree --show-shadow | grep -q '~asdf' || return 1
-    ceph osd crush class ls | grep -q 'asdf' || return 1
-    ceph osd crush class rm asdf || return 1
-    ceph osd tree | grep -q 'asdf' && return 1
-    ceph osd crush dump | grep -q '~asdf' && return 1
-    ceph osd crush tree --show-shadow | grep -q '~asdf' && return 1
-    ceph osd crush class ls | grep -q 'asdf' && return 1
+    ceph osd crush class ls | grep -q 'asdf' || return 1 # still referenced by asdf-rule
 
     ceph osd crush set-device-class abc osd.2 || return 1
     ceph osd crush move osd.2 root=foo rack=foo-rack host=foo-host || return 1
@@ -211,35 +193,11 @@ function TEST_mon_classes() {
     ceph osd crush dump | grep -q 'foo-host~abc' || return 1
     ceph osd crush tree --show-shadow | grep -q 'foo-host~abc' || return 1
     ceph osd crush rm-device-class osd.2 || return 1
-    ceph osd crush dump | grep -q 'foo~abc' && return 1
-    ceph osd crush tree --show-shadow | grep -q 'foo~abc' && return 1
-    ceph osd crush dump | grep -q 'foo-rack~abc' && return 1
-    ceph osd crush tree --show-shadow | grep -q 'foo-rack~abc' && return 1
-    ceph osd crush dump | grep -q 'foo-host~abc' && return 1
-    ceph osd crush tree --show-shadow | grep -q 'foo-host~abc' && return 1
     # restore class, so we can continue to test create-replicated
     ceph osd crush set-device-class abc osd.2 || return 1
 
     ceph osd crush rule create-replicated foo-rule foo host abc || return 1
 
-    # test class_is_in_use
-    ceph osd crush set-device-class hdd osd.0 || return 1
-    ceph osd crush set-device-class ssd osd.1 || return 1
-    ceph osd crush rule create-replicated foo-hdd1 default host hdd || return 1
-    ceph osd crush rule create-replicated foo-hdd2 default host hdd || return 1
-    ceph osd crush rule create-replicated foo-ssd default host ssd || return 1
-    expect_failure $dir EBUSY ceph osd crush class rm hdd || return 1
-    expect_failure $dir EBUSY ceph osd crush class rm ssd || return 1
-    ceph osd crush rule rm foo-hdd1 || return 1
-    expect_failure $dir EBUSY ceph osd crush class rm hdd || return 1 # still referenced by foo-hdd2
-    ceph osd crush rule rm foo-hdd2 || return 1
-    ceph osd crush rule rm foo-ssd || return 1
-    ceph osd crush class rm hdd || return 1
-    ceph osd crush class rm ssd || return 1
-    expect_failure $dir EBUSY ceph osd crush class rm abc || return 1 # still referenced by foo-rule
-    ceph osd crush rule rm foo-rule || return 1
-    ceph osd crush class rm abc || return 1
-
     # test set-device-class implicitly change class
     ceph osd crush set-device-class hdd osd.0 || return 1
     expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1
index 3b8e5a31b50013ca8c4ebe24303773f34f7d9d0d..9f413d08089a15c55593c700fb846647db6e54a8 100755 (executable)
@@ -27,25 +27,6 @@ function run() {
     export CEPH_ARGS
     CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
     CEPH_ARGS+="--mon-host=$CEPH_MON "
-    #
-    # Disable device auto class feature for this testing,
-    # as it will automatically make root clones based on new class types
-    # and hence affect the down osd counting.
-    # E.g.,
-    #
-    # ID WEIGHT  TYPE NAME                                            UP/DOWN REWEIGHT PRIMARY-AFFINITY
-    # -4 3.00000 root default~hdd
-    # -3 3.00000     host gitbuilder-ceph-rpm-centos7-amd64-basic~hdd
-    #  0 1.00000         osd.0                                           down  1.00000          1.00000
-    #  1 1.00000         osd.1                                             up  1.00000          1.00000
-    #  2 1.00000         osd.2                                             up  1.00000          1.00000
-    # -1 3.00000 root default
-    # -2 3.00000     host gitbuilder-ceph-rpm-centos7-amd64-basic
-    #  0 1.00000         osd.0                                           down  1.00000          1.00000
-    #  1 1.00000         osd.1                                             up  1.00000          1.00000
-    #  2 1.00000         osd.2                                             up  1.00000          1.00000
-    #
-    CEPH_ARGS+="--osd-class-update-on-start=false "
 
     OLD_ARGS=$CEPH_ARGS
     CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false "
diff --git a/ceph/qa/suites/big/rados-thrash/openstack.yaml b/ceph/qa/suites/big/rados-thrash/openstack.yaml
new file mode 100644 (file)
index 0000000..4d6edcd
--- /dev/null
@@ -0,0 +1,8 @@
+openstack:
+  - machine:
+      disk: 40 # GB
+      ram: 8000 # MB
+      cpus: 1
+    volumes: # attached to each instance
+      count: 3
+      size: 10 # GB
index b79ce8dc0d41e0d9492175bd0c6dd69e4544f800..c61c376aaa35c7a96f5e978c5ec3400aa1ba0629 100644 (file)
@@ -18,8 +18,8 @@ tasks:
     fs: xfs # this implicitly means /dev/vd? are used instead of directories
     wait-for-scrub: false
     log-whitelist:
-      - (OSD_
-      - (PG_
+      - \(OSD_
+      - \(PG_
     conf:
        global:
            mon pg warn min per osd: 2
diff --git a/ceph/qa/suites/fs/32bits/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/32bits/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
deleted file mode 100644 (file)
index b5bf1fa7b5eff0adf0e074915e94938f14b87e42..0000000000000000000000000000000000000000
+++ /dev/null
@@ -1,9 +0,0 @@
-overrides:
-  ceph:
-    log-whitelist:
-      - overall HEALTH_
-      - (FS_DEGRADED)
-      - (MDS_FAILED)
-      - (MDS_DEGRADED)
-      - (FS_WITH_FAILED_MDS)
-      - (MDS_DAMAGE)
new file mode 120000 (symlink)
index 0000000000000000000000000000000000000000..440e747fbb7603b8f81aca25f392ab1c22e03b46
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
index 629804ec752e3cc170e00de243b41b2f352905c0..90d0e7bcb04561f3137a552e619da8c93adfa8fa 100644 (file)
@@ -4,6 +4,7 @@ overrides:
       - force file system read-only
       - bad backtrace
       - MDS in read-only mode
+      - \(MDS_READ_ONLY\)
 
 
 tasks:
index 4483dbb6f8f9790141474bcab1caf54f9deb2e80..d22bc353c77dd325e5621c30ff12954808ecd699 100644 (file)
@@ -9,6 +9,7 @@ overrides:
       - failing to respond to cache pressure
       - slow requests are blocked
       - failing to respond to capability release
+      - \(MDS_CLIENT_OLDEST_TID\)
 
 tasks:
   - cephfs_test_runner:
index 324ff75dfe91558e030e79905e1e96edac27ee6b..66f819d0658d975803e844d25429858290015f0c 100644 (file)
@@ -2,7 +2,7 @@
 overrides:
   ceph:
     log-whitelist:
-      - bad backtrace on dir ino
+      - bad backtrace on directory inode
       - error reading table object
       - Metadata damage detected
       - slow requests are blocked
diff --git a/ceph/qa/suites/fs/basic_workload/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/basic_workload/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/fs/multiclient/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/multiclient/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/fs/multifs/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/multifs/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
index 3358b664e1f3c6a3b86be8473f45b5c0d9353818..8833fd63b781513cb27d49dbc2f02bff605d5d00 100644 (file)
@@ -2,6 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - not responding, replacing
+      - \(MDS_INSUFFICIENT_STANDBY\)
   ceph-fuse:
     disabled: true
 tasks:
diff --git a/ceph/qa/suites/fs/permission/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/permission/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/fs/snaps/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/snaps/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
deleted file mode 100644 (file)
index fc8119daca809d59c421ebf1dbf62aef950d9045..0000000000000000000000000000000000000000
+++ /dev/null
@@ -1,8 +0,0 @@
-overrides:
-  ceph:
-    log-whitelist:
-      - overall HEALTH_
-      - (FS_DEGRADED)
-      - (MDS_FAILED)
-      - (MDS_DEGRADED)
-      - (FS_WITH_FAILED_MDS)
new file mode 120000 (symlink)
index 0000000000000000000000000000000000000000..440e747fbb7603b8f81aca25f392ab1c22e03b46
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/fs/traceless/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/traceless/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/fs/verify/overrides/whitelist_health.yaml b/ceph/qa/suites/fs/verify/overrides/whitelist_health.yaml
new file mode 120000 (symlink)
index 0000000..440e747
--- /dev/null
@@ -0,0 +1 @@
+../../../../cephfs/overrides/whitelist_health.yaml
\ No newline at end of file
index 324ff75dfe91558e030e79905e1e96edac27ee6b..66f819d0658d975803e844d25429858290015f0c 100644 (file)
@@ -2,7 +2,7 @@
 overrides:
   ceph:
     log-whitelist:
-      - bad backtrace on dir ino
+      - bad backtrace on directory inode
       - error reading table object
       - Metadata damage detected
       - slow requests are blocked
index c893a460434cc6b57d5c364aa2c99102a3c5a010..06f3f57388b9cb704cdfb12739d959c6efe6e617 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - reached quota
-      - (POOL_APP_NOT_ENABLED)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - ceph-fuse:
 - workunit:
diff --git a/ceph/qa/suites/powercycle/osd/thrashosds-health.yaml b/ceph/qa/suites/powercycle/osd/thrashosds-health.yaml
new file mode 120000 (symlink)
index 0000000..ebf7f34
--- /dev/null
@@ -0,0 +1 @@
+../../../tasks/thrashosds-health.yaml
\ No newline at end of file
index 1c333a0f329e9aa6625161449015b3d7c06cd374..74090830107cc9c56fb38cfb548cf6a7662ca3fb 100644 (file)
@@ -1,4 +1,4 @@
 overrides:
   ceph:
     log-whitelist:
-      - (MDS_TRIM)
+      - \(MDS_TRIM\)
index e4d84236abcef05f65b099f78cfe1d329d2a3350..e559d9126e86f68b9b2b4abffcf5de85775b3e6d 100644 (file)
@@ -1,4 +1,4 @@
 openstack:
   - volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 10 # GB
index e633ae2d42b2c156d5af881bb61ea839e204e203..8c70304d07910e7c085fe0c5ab01bfacf59dd085 100644 (file)
@@ -3,10 +3,11 @@ overrides:
     log-whitelist:
     - but it is still running
     - overall HEALTH_
-    - (OSDMAP_FLAGS)
-    - (PG_
-    - (OSD_
-    - (OBJECT_
+    - \(OSDMAP_FLAGS\)
+    - \(PG_
+    - \(OSD_
+    - \(OBJECT_
+    - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index ded794c17412f68fc9a551a5f69f79854dc98177..bee513eb9e93d57c44d2f90e5502006247f5766a 100644 (file)
@@ -2,8 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
-      - (TOO_FEW_PGS)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(TOO_FEW_PGS\)
 tasks:
 - workunit:
     clients:
index 545e50fedafb9ee3a20ecdd8109f73e729bb3686..2dade6dee79875538057cdf8d862d8c9e016f241 100644 (file)
@@ -3,7 +3,7 @@ overrides:
     log-whitelist:
     - but it is still running
     - overall HEALTH_
-    - (POOL_APP_NOT_ENABLED)
+    - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index 4e826b2b9ce885d1cfe0516256d2b492ef182279..6b764a875a00f6c532269b4c96591960a29e083a 100644 (file)
@@ -3,7 +3,7 @@ overrides:
     log-whitelist:
     - but it is still running
     - overall HEALTH_
-    - (POOL_APP_NOT_ENABLED)
+    - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index d89ac340b2cbb02d5753ed742b9d279134d40807..c82023c2a2e45047713d47a9618895867b6cd0a8 100644 (file)
@@ -3,7 +3,7 @@ overrides:
     log-whitelist:
     - but it is still running
     - overall HEALTH_
-    - (POOL_APP_NOT_ENABLED)
+    - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index fd5eb8515c90554f752e02d41f57b89730bee385..34be4715177a7f6bd00ba7522da486098f5edbe6 100644 (file)
@@ -7,8 +7,10 @@ tasks:
       wait-for-scrub: false
       log-whitelist:
         - overall HEALTH_
-        - (MGR_DOWN)
-        - (PG_
+        - \(MGR_DOWN\)
+        - \(PG_
+        - replacing it with standby
+        - No standby daemons available
   - cephfs_test_runner:
       modules:
         - tasks.mgr.test_failover
index a2c0efc7779b94ad961823d8bf31ed5e7a317356..9c08e93f3848377e04a7864e5d32be237ecfbe5f 100644 (file)
@@ -4,6 +4,11 @@ overrides:
       mon:
         mon min osdmap epochs: 25
         paxos service trim min: 5
+# thrashing monitors may make mgr have trouble w/ its keepalive
+    log-whitelist:
+      - daemon x is unresponsive
+      - overall HEALTH_
+      - \(MGR_DOWN\)
 tasks:
 - install:
 - ceph:
index 38570fcf615ef2ac9c7fd4751fedd55834f5cb27..2d1ba8824029cd791ee1f7bafd0a838b69af006c 100644 (file)
@@ -2,8 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
-      - (TOO_FEW_PGS)
+      - \(MON_DOWN\)
+      - \(TOO_FEW_PGS\)
 tasks:
 - mon_thrash:
     revive_delay: 90
index e940c42ad7435252fcbf88426f48efd44528ca0e..fa829b34b1b39a9586cc83ecc6208258b1e5ebf0 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
+      - \(MON_DOWN\)
     conf:
       osd:
         mon client ping interval: 4
index 92c9eb3a808ff490d22e6526ad41834552527cbf..041cee0b3e4e754b20605f63cc602734ac1cf160 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
+      - \(MON_DOWN\)
 tasks:
 - mon_thrash:
     revive_delay: 20
index 68020cd665143267b878db27a2cb451d524b507e..14f41f7fb2fb43eda21c047e47604f01fd0e139c 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
+      - \(MON_DOWN\)
     conf:
       mon:
         paxos min: 10
index b07f8b511f65e803a7c82d38c1df3e616ddccb12..08b1522c7fa38ac577fc9693dac72cadf5c21505 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
+      - \(MON_DOWN\)
     conf:
       mon:
         paxos min: 10
index abe8b6cb887ad0f0e5f0a4dd8fc306d92b20209e..c6b00b486aaefbfd7dbd77cc53b95535f3581fe5 100644 (file)
@@ -3,7 +3,7 @@ overrides:
     log-whitelist:
       - slow request
       - overall HEALTH_
-      - (POOL_APP_NOT_ENABLED)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - exec:
     client.0:
index 00b9519075f6bb192263731c5429966852c43c2c..940d3a8e43677a61e914463edfc5eeaa73c313d4 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
      log-whitelist:
        - overall HEALTH_
-       - (POOL_APP_NOT_ENABLED)
+       - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - exec:
     client.0:
index b2994181f6b56f70bf950979b2b1f51f406850f3..3b821bc0c72f4d13b179b1beab82de5d03efbc6d 100644 (file)
@@ -3,12 +3,14 @@ overrides:
     log-whitelist:
       - reached quota
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
-      - (POOL_FULL)
-      - (REQUEST_SLOW)
-      - (MON_DOWN)
-      - (PG_
-      - (POOL_APP_NOT_ENABLED)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(POOL_FULL\)
+      - \(REQUEST_SLOW\)
+      - \(MON_DOWN\)
+      - \(PG_
+      - \(POOL_APP_NOT_ENABLED\)
+      - \(SMALLER_PGP_NUM\)
     conf:
       global:
         debug objecter: 20
index fc5e7f25e66b0a3044915bdc3aaa0ac2cb9cfb20..b05eb38be358b23c2feed5b5d7bc907f299c568b 100644 (file)
@@ -3,8 +3,8 @@ overrides:
     log-whitelist:
     - but it is still running
     - overall HEALTH_
-    - (PG_
-    - (MON_DOWN)
+    - \(PG_
+    - \(MON_DOWN\)
 tasks:
 - workunit:
     clients:
index 4234bf73e6816b22ab34f88905ac313dfe9787c5..137f58dc77f4eea43dea4893d40ef614f1741685 100644 (file)
@@ -3,5 +3,5 @@ tasks:
 - ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
+      - \(MON_DOWN\)
 - mon_recovery:
index 2001faa3fe81bfd009738fbeb0c4ce6fc754ba3a..f3163c96b555c0221f65c87ce09eb748550d01e5 100644 (file)
@@ -14,9 +14,10 @@ tasks:
         osd max object namespace len: 64
     log-whitelist:
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (TOO_FEW_PGS)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(TOO_FEW_PGS\)
+      - \(POOL_APP_NOT_ENABLED\)
 - ceph_objectstore_tool:
     objects: 20
index e62dd068f11069c3f8f7d46c8866c8c01be7a8a2..90906d66eb2aef0da150b5049bcbfb3d99b7a6ab 100644 (file)
@@ -5,13 +5,18 @@ tasks:
 - ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MGR_DOWN)
+      - \(MGR_DOWN\)
 - exec:
     mon.a:
       - ceph restful create-key admin
       - ceph restful create-self-signed-cert
-- ceph.restart: [mgr.x]
+      - ceph restful restart
 - workunit:
     clients:
       client.a:
         - rest/test-restful.sh
+- exec:
+    mon.a:
+      - ceph restful delete-key admin
+      - ceph restful list-keys | jq ".admin" | grep null
+
index ac64165aaaa6a6d47ebd7ab24968196fd6b917d8..58648031507d237a213dd8f125e9a3de99183888 100644 (file)
@@ -5,7 +5,7 @@ tasks:
 - ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
     conf:
       global:
         osd max object name len: 460
index e7fafe9ac48d716b97afbbf4c92046343a098f52..e766bdc5729c766d7776e4704ef13fc7bed3e955 100644 (file)
@@ -10,7 +10,7 @@ tasks:
 - ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
     conf:
       global:
         osd max object name len: 460
index b811199d1ea9499c6702fb1a6a9da327f0fb1fe0..b245866082f3b985e2d35c7d2171d3a377189650 100644 (file)
@@ -6,9 +6,10 @@ overrides:
     log-whitelist:
       - is full
       - overall HEALTH_
-      - (POOL_FULL)
-      - (POOL_NEAR_FULL)
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(POOL_FULL\)
+      - \(POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(CACHE_POOL_NEAR_FULL\)
 tasks:
 - install:
 - ceph:
index 00157dfaf567356eae3c2b375cca86f81f7ae65d..a28582fd610ec402cfbf7b14184609515d06a8a6 100644 (file)
@@ -11,9 +11,9 @@ tasks:
     log-whitelist:
       - but it is still running
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
 - workunit:
     clients:
       all:
index 3a9dbde316451d9adcd5808cd7c0ca6945922b97..f480dbbf9c43400118c84db4c7f67a537115122b 100644 (file)
@@ -13,9 +13,9 @@ tasks:
 - ceph:
     log-whitelist:
       - overall HEALTH_
-      - (PG_
-      - (OSD_
-      - (OBJECT_
+      - \(PG_
+      - \(OSD_
+      - \(OBJECT_
     conf:
       osd:
         osd debug reject backfill probability: .3
@@ -40,3 +40,5 @@ tasks:
 - exec:
     client.0:
       - sudo ceph osd pool set foo size 2
+- sleep:
+    duration: 300
index 347c499528e5603e1afb32c19e69afa399e981eb..b0d5de305d842f6d03c0e6a2bb158be9eea99f8f 100644 (file)
@@ -9,7 +9,7 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (PG_
+      - \(PG_
     conf:
       global:
         osd heartbeat grace: 40
index f15fb889615875eb311046c0907f0d46270c0d20..604a9e452131fca7c195a6e365e19e2b285fb658 100644 (file)
@@ -14,10 +14,11 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_DEGRADED)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
+      - \(POOL_APP_NOT_ENABLED\)
     conf:
       osd:
         debug osd: 5
index 90d8b1838b9860002798eb8eb723dff794e22d4c..e2f0245df70e8736a87e82fef07791a13070bf44 100644 (file)
@@ -14,10 +14,11 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_DEGRADED)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
+      - \(POOL_APP_NOT_ENABLED\)
     conf:
       osd:
         debug osd: 5
index 67546062c600ab84a032812276674a58a5940ec6..59085ffaeeabf597f70f741c01827c12d58caa97 100644 (file)
@@ -13,7 +13,7 @@ tasks:
     log-whitelist:
       - but it is still running
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
 - dump_stuck:
index e095fd0d58690f3eae9d6345a713899cf52340c4..68644c860eec32e1b8bb03d73de24d7f5f4ec588 100644 (file)
@@ -17,8 +17,8 @@ tasks:
     log-whitelist:
       - objects unfound and apparently lost
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
 - ec_lost_unfound:
index 5502b5c9b0ffc42db082f5b61c838ec0b38ce8cb..bcaef784f9b5106e4641580ddf49f7af44f106bf 100644 (file)
@@ -16,8 +16,8 @@ tasks:
     log-whitelist:
       - objects unfound and apparently lost
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
 - rep_lost_unfound_delete:
index bb0bb2c0afe4978e565346c779afd79fae4fd4e6..a4a309dee81185c2f4b299481185b1e606dc7f9e 100644 (file)
@@ -16,8 +16,8 @@ tasks:
     log-whitelist:
       - objects unfound and apparently lost
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
 - lost_unfound:
index 9b3a1f56e7cfe2c3d6132c3310bdf9b99fcc1c3a..5b374071502e67c538366578f66ed4f3fe931b9f 100644 (file)
@@ -16,10 +16,10 @@ tasks:
     log-whitelist:
       - but it is still running
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
     conf:
       osd:
         osd min pg log entries: 5
index a779f06affd170fa80d4b2fd01f3653ee9ecdf04..ed5b216b24981c228f33f94cd7484d5672be8ac4 100644 (file)
@@ -17,10 +17,10 @@ tasks:
     log-whitelist:
       - but it is still running
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_
     conf:
       osd:
         osd min pg log entries: 5
index eb0f6c337309bddb314c5ee850d26e8a34b42d32..a887ac5e3e4a7dfab21e862a3631c52c6a97c8cd 100644 (file)
@@ -16,10 +16,10 @@ tasks:
     log-whitelist:
       - but it is still running
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_DEGRADED)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
     conf:
       osd:
         osd min pg log entries: 5
index e87cd543ce6c91fb30e30a0e54ee01699842b465..645034a4007007ef7919b5b10626d1af8b1e58b1 100644 (file)
@@ -19,7 +19,7 @@ tasks:
     log-whitelist:
       - objects unfound and apparently lost
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
 - peer:
index bf4813a1fe2ea2a5a5b5a4bf3e53ec1383975848..182779531dbae22814a4d5c582ee5616f04522fa 100644 (file)
@@ -18,8 +18,8 @@ tasks:
     - had wrong cluster addr
     - reached quota
     - overall HEALTH_
-    - (POOL_FULL)
-    - (POOL_APP_NOT_ENABLED)
+    - \(POOL_FULL\)
+    - \(POOL_APP_NOT_ENABLED\)
 - workunit:
     clients:
       all:
index 6847cef8da29a335b73aa605e34343ebf381394e..78d77c8811f04daf657132cd6d8dd82b708e4622 100644 (file)
@@ -17,10 +17,10 @@ tasks:
     log-whitelist:
       - no reply from
       - overall HEALTH_
-      - (MON_DOWN)
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
+      - \(MON_DOWN\)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
 - full_sequential:
   - radosbench:
       clients: [client.0]
index 1abf5739f00a38a5d4a9697ffcf1634f046da0f3..f3c8575f110423b0560649a3b5429625cb70fa9b 100644 (file)
@@ -17,11 +17,11 @@ overrides:
         debug osd: 5
     log-whitelist:
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (SMALLER_PGP_NUM)
-      - (OBJECT_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
 tasks:
 - install:
 - ceph:
index 97da1379096082045091fb5f99345ce581aa6d81..3eddce821bf711d255215c9501e970edf12f6b28 100644 (file)
@@ -8,9 +8,10 @@ tasks:
     fs: xfs
     log-whitelist:
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_DEGRADED)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
+      - \(POOL_APP_NOT_ENABLED\)
 - resolve_stuck_peering:
 
index fbcec11171d0b313adeeeb59313d1911079014e0..d988d1ad1a6a4116f1b95ba9b0ccca1edcbc4948 100644 (file)
@@ -19,10 +19,10 @@ tasks:
       - but it is still running
       - had wrong client addr
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_DEGRADED)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
     conf:
       client.rest0:
         debug ms: 1
index 8273b2ef6e545c2070e655393092a4ddec8b8772..42c8ae3947bc61c027da413f051b5332ca04807e 100644 (file)
@@ -12,7 +12,7 @@ tasks:
 - ceph:
     log-whitelist:
       - overall HEALTH_
-      - (POOL_APP_NOT_ENABLED)
+      - \(POOL_APP_NOT_ENABLED\)
 - workunit:
     clients:
       all:
index 3efdb955fe62c15e315f89808cc322e5ae4c3d14..48ef78ff90ccb2f0f9c9c76097b257abf8a16ae3 100644 (file)
@@ -24,9 +24,9 @@ tasks:
     log-whitelist:
       - objects unfound and apparently lost
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (OBJECT_DEGRADED)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(OBJECT_DEGRADED\)
 - watch_notify_same_primary:
     clients: [client.0]
index db86583f22c887f22c8b68cb06e71dec1dafda7f..1c45ee3524ac6e3240017f6b30a414fdae0e13d3 100644 (file)
@@ -1,5 +1,4 @@
 roles:
-- [osd.0, osd.1, osd.2, client.0, mon.a]
-- [osd.3, osd.4, osd.5, mon.b]
-- [osd.6, osd.7, osd.8, mon.c]
-- [osd.9, osd.10, osd.11, mgr.x]
+- [osd.0, osd.1, osd.2, osd.3, client.0, mon.a]
+- [osd.4, osd.5, osd.6, osd.7, mon.b, mgr.x]
+- [osd.8, osd.9, osd.10, osd.11, mon.c]
index e4d84236abcef05f65b099f78cfe1d329d2a3350..e559d9126e86f68b9b2b4abffcf5de85775b3e6d 100644 (file)
@@ -1,4 +1,4 @@
 openstack:
   - volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 10 # GB
diff --git a/ceph/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml b/ceph/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml
new file mode 120000 (symlink)
index 0000000..a4d836c
--- /dev/null
@@ -0,0 +1 @@
+../../../../erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml b/ceph/qa/suites/rados/thrash-erasure-code/thrashers/mapgap.yaml
deleted file mode 100644 (file)
index fc2bd43..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-overrides:
-  ceph:
-    conf:
-      mon:
-        mon min osdmap epochs: 2
-      osd:
-        osd map cache size: 1
-        osd scrub min interval: 60
-        osd scrub max interval: 120
-        osd max backfills: 5
-    log-whitelist:
-    - but it is still running
-    - objects unfound and apparently lost
-    - osd_map_cache_size
-tasks:
-- thrashosds:
-    timeout: 1800
-    chance_pgnum_grow: 1
-    chance_pgpnum_fix: 1
-    chance_test_map_discontinuity: 0.5
-    min_in: 4
index f4d1349b48262fa81a7afbf05beda5bc314608b5..b0f3b9b4da228ac05591a27012f7bcf432cc1b1f 100644 (file)
@@ -1,4 +1,4 @@
 openstack:
   - volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 30 # GB
index a18aa3342a01094b9178f97e0246520a3c15968a..4e60cd6560a30ce5cda406483f9499003da80511 100644 (file)
@@ -25,7 +25,7 @@ overrides:
 # setting luminous triggers peering, which *might* trigger health alerts
     log-whitelist:
       - overall HEALTH_
-      - (PG_AVAILABILITY)
-      - (PG_DEGRADED)
+      - \(PG_AVAILABILITY\)
+      - \(PG_DEGRADED\)
   thrashosds:
     chance_thrash_cluster_full: 0
index bbfe7bf50d1a11607924c1e33e2663be4980ebb9..0cef20773003b7e69b411c70612ebdc8e7530017 100644 (file)
@@ -5,8 +5,8 @@ overrides:
 tasks:
 - exec:
     client.0:
-      - sudo ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
-      - sudo ceph osd pool create base 4 4 erasure teuthologyprofile
+      - sudo ceph osd erasure-code-profile set myprofile crush-failure-domain=osd m=2 k=2
+      - sudo ceph osd pool create base 4 4 erasure myprofile
       - sudo ceph osd pool application enable base rados
       - sudo ceph osd pool set base min_size 2
       - sudo ceph osd pool create cache 4
index e096e4e56ed9595973c81b93572d3d49ee83b880..23c705d1eb56fb5c6cf4f1dc6d643beded0e37ef 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - reached quota
-      - (POOL_APP_NOT_ENABLED)
+      - \(POOL_APP_NOT_ENABLED\)
     crush_tunables: hammer
     conf:
       client:
index d4c69463c972eab3544db3456d3dc98fa4308655..a1108158c80fd6970cef7390e9edfbd092b299df 100644 (file)
@@ -2,6 +2,12 @@ meta:
 - desc: |
     install upgrade on remaining node
     restartin remaining osds
+overrides:
+  ceph:
+    log-whitelist:
+      - overall HEALTH_
+      - \(FS_DEGRADED\)
+      - \(MDS_
 tasks:
 - install.upgrade:
     osd.3:
index e4d84236abcef05f65b099f78cfe1d329d2a3350..e559d9126e86f68b9b2b4abffcf5de85775b3e6d 100644 (file)
@@ -1,4 +1,4 @@
 openstack:
   - volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 10 # GB
index 4063f4e6e6b031fcf488f40a06a479530cdc2af7..266a4e4798dc88bf7e84a847118c49ac117ae8bc 100644 (file)
@@ -2,9 +2,9 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (MON_DOWN)
-      - (OSDMAP_FLAGS)
-      - (SMALLER_PGP_NUM)
-      - (POOL_APP_NOT_ENABLED)
+      - \(MON_DOWN\)
+      - \(OSDMAP_FLAGS\)
+      - \(SMALLER_PGP_NUM\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - mon_recovery:
index ea81ad1ff96e706a4101deff33cbc84955bee0c0..05b843ebd3eb46bd503b0789c9896fdcc0384459 100644 (file)
@@ -3,12 +3,12 @@ overrides:
     log-whitelist:
       - reached quota
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
-      - (POOL_FULL)
-      - (SMALLER_PGP_NUM)
-      - (REQUEST_SLOW)
-      - (CACHE_POOL_NEAR_FULL)
-      - (POOL_APP_NOT_ENABLED)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_FULL\)
+      - \(SMALLER_PGP_NUM\)
+      - \(REQUEST_SLOW\)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(POOL_APP_NOT_ENABLED\)
     conf:
       client:
         debug ms: 1
index 5c8f924abadcd3b3e165a99ee8ad00fcec00aea1..1b5056573543048929c95acbb31c5f3a8595a9f4 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
 tasks:
 - exec:
     client.0:
index 9d34002a15882490a5b8e7a940b95a53a96c67da..fe1e26d5382e5deab49cc7e68b84b159f3d32e41 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index f60a5ffa7e1b2b09e468998efb812967ef3c317e..7ab3185ec10e5a21af0c365cfefb4fd5ea95631c 100644 (file)
@@ -1,7 +1,7 @@
 overrides:
   ceph:
     log-whitelist:
-      - (REQUEST_SLOW)
+      - \(REQUEST_SLOW\)
 tasks:
 - workunit:
     clients:
index 9e90369ef50183d8e146d2cc57516990dc817306..7fdb1f1a41553eb07833b9dcfaaf4125f6ba4ace 100644 (file)
@@ -14,7 +14,7 @@ overrides:
     fs: xfs
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
     conf:
       client:
         rbd default data pool: datapool
index 5c8f924abadcd3b3e165a99ee8ad00fcec00aea1..1b5056573543048929c95acbb31c5f3a8595a9f4 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
 tasks:
 - exec:
     client.0:
index f4d1349b48262fa81a7afbf05beda5bc314608b5..b0f3b9b4da228ac05591a27012f7bcf432cc1b1f 100644 (file)
@@ -1,4 +1,4 @@
 openstack:
   - volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 30 # GB
index 75dfc6a45534e26818a5632eabcb1ae8ad494ea4..a63ab270340745cb2b10872c2091fb9a5d2be64b 100644 (file)
@@ -4,6 +4,7 @@ tasks:
       - sudo ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2
       - sudo ceph osd pool create datapool 4 4 erasure teuthologyprofile
       - sudo ceph osd pool set datapool allow_ec_overwrites true
+      - rbd pool init datapool
 
 overrides:
   thrashosds:
index 5c8f924abadcd3b3e165a99ee8ad00fcec00aea1..1b5056573543048929c95acbb31c5f3a8595a9f4 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
 tasks:
 - exec:
     client.0:
index b70e8d52b80aff7a05e3fb6895b9605eb5348d0d..04af9c85bb2e2e9e053ba2a839d0dc75cbe50125 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index c2af3573dc5df4d94b745b255bdcd43b375cb50f..6ae7f46274ea319f5d69630936e4782a3c25241c 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index f1121a40396581d98afe55e557e8e0795c81ca74..578115ee6cbd0575780d8aa572b6792f1cd39786 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index 2030acb90836ccb748eab56844f8342763b3dd8c..90f80dcf2b851f5a4be1dadeb9180941830995aa 100644 (file)
@@ -1,3 +1,7 @@
 tasks:
 - install:
 - ceph:
+overrides:
+  ceph:
+    log-whitelist:
+      - (POOL_APP_NOT_ENABLED)
index f87995808a15bfba0e492b4d020b39f1729bf05c..9c39c7e5fb6813f1b8ac77e67eca4e6ffc938a42 100644 (file)
@@ -4,5 +4,5 @@ openstack:
       ram: 30000 # MB
       cpus: 1
     volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 30 # GB
index cfa0fcd7025bc161580c0079470a82d67437b0bd..c75e6fd4732b1e116cbd8eda7800816fb0648114 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
 tasks:
 - exec:
     client.0:
index 5c8f924abadcd3b3e165a99ee8ad00fcec00aea1..1b5056573543048929c95acbb31c5f3a8595a9f4 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NEAR_FULL\)
+      - \(CACHE_POOL_NO_HIT_SET\)
 tasks:
 - exec:
     client.0:
diff --git a/ceph/qa/suites/rbd/singleton-bluestore/% b/ceph/qa/suites/rbd/singleton-bluestore/%
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/ceph/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml b/ceph/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml
new file mode 100644 (file)
index 0000000..9af52e0
--- /dev/null
@@ -0,0 +1,14 @@
+roles:
+- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0]
+- [mon.b, mgr.y, osd.3, osd.4, osd.5]
+- [mon.c, mgr.z, osd.6, osd.7, osd.8]
+- [osd.9, osd.10, osd.11]
+tasks:
+- install:
+- ceph:
+    log-whitelist:
+      - 'application not enabled'
+- workunit:
+    timeout: 30m
+    clients:
+      all: [rbd/issue-20295.sh]
diff --git a/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp.yaml b/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp.yaml
new file mode 120000 (symlink)
index 0000000..b23b2a7
--- /dev/null
@@ -0,0 +1 @@
+../../../../objectstore/bluestore-comp.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml b/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml
new file mode 120000 (symlink)
index 0000000..bd7d7e0
--- /dev/null
@@ -0,0 +1 @@
+../../../../objectstore/bluestore.yaml
\ No newline at end of file
diff --git a/ceph/qa/suites/rbd/singleton-bluestore/openstack.yaml b/ceph/qa/suites/rbd/singleton-bluestore/openstack.yaml
new file mode 100644 (file)
index 0000000..f4d1349
--- /dev/null
@@ -0,0 +1,4 @@
+openstack:
+  - volumes: # attached to each instance
+      count: 3
+      size: 30 # GB
index 5006dd8017b3928ad2f46a16c04db3386215c748..0800cbfce10c84a9afb3fa1f66490757c0e60ba3 100644 (file)
@@ -6,7 +6,8 @@ tasks:
     fs: xfs
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 - workunit:
     clients:
       all: [rbd/test_rbd_mirror.sh]
index 39e43d021ac5d367f034f71c3e6fea365063038e..40fef4770b9f471dbf195c7fbc96535c6a6e53bc 100644 (file)
@@ -4,5 +4,5 @@ openstack:
       ram: 8000 # MB
       cpus: 1
     volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 30 # GB
index 7ed2c709b24b2ebc7eca75c89a9e6fd49a87c7f5..e723e0929e0921fb01cc75ee333000024707b6ef 100644 (file)
@@ -1,8 +1,11 @@
 overrides:
   ceph:
     log-whitelist:
-    - but it is still running
-    - objects unfound and apparently lost
+      - but it is still running
+      - objects unfound and apparently lost
+      - overall HEALTH_
+      - (CACHE_POOL_NEAR_FULL)
+      - (CACHE_POOL_NO_HIT_SET)
 tasks:
 - exec:
     client.0:
index c2af3573dc5df4d94b745b255bdcd43b375cb50f..6ae7f46274ea319f5d69630936e4782a3c25241c 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index 7f64ef3f13618613a0af9b962403b1ad50a0c51b..a9021548d85762b81e6bb0fa413a3c14165db91c 100644 (file)
@@ -9,7 +9,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
     conf:
       client:
         rbd clone copy on read: true
index f1121a40396581d98afe55e557e8e0795c81ca74..578115ee6cbd0575780d8aa572b6792f1cd39786 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index b70e8d52b80aff7a05e3fb6895b9605eb5348d0d..04af9c85bb2e2e9e053ba2a839d0dc75cbe50125 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index b70e8d52b80aff7a05e3fb6895b9605eb5348d0d..04af9c85bb2e2e9e053ba2a839d0dc75cbe50125 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index c2af3573dc5df4d94b745b255bdcd43b375cb50f..6ae7f46274ea319f5d69630936e4782a3c25241c 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index f1121a40396581d98afe55e557e8e0795c81ca74..578115ee6cbd0575780d8aa572b6792f1cd39786 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index 8adc7209ad7df8530ec44717994c21a2c2058758..e0943439a57bdb739a2e14cb35c96095e84ad168 100644 (file)
@@ -2,7 +2,8 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (CACHE_POOL_NO_HIT_SET)
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
 tasks:
 - workunit:
     clients:
index 4d6edcd07b61432b8f5d43c1ef87ba387fc8f94b..7d652b491c93d04d056c3bec91dc562eca5d405e 100644 (file)
@@ -4,5 +4,5 @@ openstack:
       ram: 8000 # MB
       cpus: 1
     volumes: # attached to each instance
-      count: 3
+      count: 4
       size: 10 # GB
index 2c0ab2d8a189a168df735e5663ecb97cade9a946..591931d9f28018898769458bf4f7d45d19a2ca04 100644 (file)
@@ -2,7 +2,7 @@ overrides:
   ceph:
     log-whitelist:
       - reached quota
-      - (POOL_APP_NOT_ENABLED)
+      - \(POOL_APP_NOT_ENABLED\)
     conf:
       global:
         ms inject delay max: 1
index a8934deed0a48e0a2f48a5007f5ec9e851a63864..20e81be244aebd24e2daf496c558e0e34aee8daa 100644 (file)
@@ -24,5 +24,6 @@ overrides:
     - scrub mismatch
     - ScrubResult
     - wrongly marked
+    - (MDS_FAILED)
     conf:
     fs: xfs
index d90dc18b5be028c82181cca00dc206afaf24f321..a7ff9f485d77136384bb2b35f8f49b9c0235522e 100644 (file)
@@ -13,8 +13,19 @@ tasks:
     add_osds_to_crush: true
     log-whitelist:
       - overall HEALTH_
-      - (FS_
-      - (MDS_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - Monitor daemon marked osd
+      - Behind on trimming
+    conf:
+      global:
+        mon warn on pool no app: false
 - print: "**** done ceph"
 - install.upgrade:
     mon.a:
index d690682930aa1e59c79777cac241b473b86b4e14..3033f14be5d717e796e867e7146ee2d4151e7416 100644 (file)
@@ -20,6 +20,7 @@ overrides:
     conf:
       mon:
         mon debug unsafe allow tier with nonempty snaps: true
+        mon warn on pool no app: false
       osd:
         osd map max advance: 1000
 roles:
index 882f2a7d2abbe9363e8f158bd753bf27b294de37..de0893c1df5ab7227086c351661055de081949e9 100644 (file)
@@ -8,6 +8,23 @@ tasks:
     branch: kraken
 - print: "**** done installing kraken"
 - ceph:
+    log-whitelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
 - print: "**** done ceph"
 - install.upgrade:
     mon.a:
diff --git a/ceph/qa/suites/upgrade/kraken-x/stress-split-erasure-code/thrashosds-health.yaml b/ceph/qa/suites/upgrade/kraken-x/stress-split-erasure-code/thrashosds-health.yaml
new file mode 120000 (symlink)
index 0000000..e0426db
--- /dev/null
@@ -0,0 +1 @@
+../../../../tasks/thrashosds-health.yaml
\ No newline at end of file
index dbb03ec826ee4762004bd3ab60ca530e6a911f4e..b8a28f986ac8342f9670957abca713f35619522f 100644 (file)
@@ -6,6 +6,10 @@ meta:
 overrides:
   ceph:
     fs: xfs
+    log-whitelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(MGR_DOWN\)
     conf:
       global:
         enable experimental unrecoverable data corrupting features: "*"
diff --git a/ceph/qa/suites/upgrade/kraken-x/stress-split/thrashosds-health.yaml b/ceph/qa/suites/upgrade/kraken-x/stress-split/thrashosds-health.yaml
new file mode 120000 (symlink)
index 0000000..e0426db
--- /dev/null
@@ -0,0 +1 @@
+../../../../tasks/thrashosds-health.yaml
\ No newline at end of file
index 05c4c7d90abf3eda87c8a7e81bedec39637e0715..358ec8d648e99555adf05ce5290ab418408e910a 100644 (file)
@@ -342,6 +342,13 @@ def create_rbd_pool(ctx, config):
     mon_remote.run(
         args=['sudo', 'ceph', '--cluster', cluster_name,
               'osd', 'pool', 'create', 'rbd', '8'])
+    mon_remote.run(
+        args=[
+            'sudo', 'ceph', '--cluster', cluster_name,
+            'osd', 'pool', 'application', 'enable',
+            'rbd', 'rbd', '--yes-i-really-mean-it'
+        ],
+        check_status=False)
     yield
 
 @contextlib.contextmanager
@@ -686,6 +693,7 @@ def cluster(ctx, config):
                     '-p',
                     mnt_point,
                 ])
+            log.info(str(roles_to_devs))
             log.info(str(roles_to_journals))
             log.info(role)
             if roles_to_devs.get(role):
@@ -1022,8 +1030,8 @@ def osd_scrub_pgs(ctx, config):
     indicate the last scrub completed.  Time out if no progess is made
     here after two minutes.
     """
-    retries = 20
-    delays = 10
+    retries = 40
+    delays = 20
     cluster_name = config['cluster']
     manager = ctx.managers[cluster_name]
     all_clean = False
@@ -1225,9 +1233,9 @@ def healthy(ctx, config):
     log.info('Waiting until %s daemons up and pgs clean...', cluster_name)
     manager = ctx.managers[cluster_name]
     try:
-        manager.wait_for_mgr_available()
-    except run.CommandFailedError:
-        log.info('ignoring mgr wait error, probably testing upgrade')
+        manager.wait_for_mgr_available(timeout=30)
+    except (run.CommandFailedError, AssertionError) as e:
+        log.info('ignoring mgr wait error, probably testing upgrade: %s', e)
 
     firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
     (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
@@ -1240,8 +1248,8 @@ def healthy(ctx, config):
 
     try:
         manager.flush_all_pg_stats()
-    except run.CommandFailedError:
-        log.info('ignoring flush pg stats error, probably testing upgrade')
+    except (run.CommandFailedError, Exception) as e:
+        log.info('ignoring flush pg stats error, probably testing upgrade: %s', e)
     manager.wait_for_clean()
 
     log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
index 47f3921347dbd980ed0050d7576af5e917e1a23b..0f66106c9f1f0ef0b87b5402867af105e28ece76 100644 (file)
@@ -84,7 +84,7 @@ class CephTestCase(unittest.TestCase):
         def seen_health_warning():
             health = self.ceph_cluster.mon_manager.get_mon_health()
             codes = [s for s in health['checks']]
-            summary_strings = [s[1]['message'] for s in health['checks'].iteritems()]
+            summary_strings = [s[1]['summary']['message'] for s in health['checks'].iteritems()]
             if len(summary_strings) == 0:
                 log.debug("Not expected number of summary strings ({0})".format(summary_strings))
                 return False
index 774dd8ffe1ef274e6dc5a488609c53d75a1c23b4..913999db7733b7a835bc0e89b25883e66840d082 100644 (file)
@@ -18,6 +18,9 @@ class TestExports(CephFSTestCase):
             filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees])
             log.info("%s =?= %s", filtered, test)
             if filtered == test:
+                # Confirm export_pin in output is correct:
+                for s in subtrees:
+                    self.assertTrue(s['export_pin'] == s['auth_first'])
                 return subtrees
             time.sleep(pause)
         raise RuntimeError("rank {0} failed to reach desired subtree state", rank)
index f2486a06331f675c669ddc8f8854784c427fff22..a62ef743216471116c27b447f209d3f0370d8b59 100644 (file)
@@ -68,8 +68,8 @@ class TestFragmentation(CephFSTestCase):
 
         frags = self.get_dir_ino("/splitdir")['dirfrags']
         self.assertEqual(len(frags), 2)
-        self.assertEqual(frags[0]['dirfrag'], "10000000000.0*")
-        self.assertEqual(frags[1]['dirfrag'], "10000000000.1*")
+        self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
+        self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
         self.assertEqual(
             sum([len(f['dentries']) for f in frags]),
             split_size + 1
index 2774423dafb3a327a85d84ee0be3ca45f4a8e109..77ca07a194a1824bb663be2461088b219f62ced5 100644 (file)
@@ -5,6 +5,7 @@ from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from teuthology.orchestra.run import CommandFailedError
 import errno
 import time
+import json
 
 
 class TestMisc(CephFSTestCase):
@@ -130,3 +131,21 @@ class TestMisc(CephFSTestCase):
         time.sleep(self.mds_session_autoclose * 1.5)
         ls_data = self.fs.mds_asok(['session', 'ls'])
         self.assert_session_count(1, ls_data)
+
+    def test_filtered_df(self):
+        pool_name = self.fs.get_data_pool_name()
+        raw_df = self.fs.get_pool_df(pool_name)
+        raw_avail = float(raw_df["max_avail"])
+        out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
+                                                  pool_name, 'size',
+                                                  '-f', 'json-pretty')
+        j = json.loads(out)
+        pool_size = int(j['size'])
+
+        proc = self.mount_a.run_shell(['df', '.'])
+        output = proc.stdout.getvalue()
+        fs_avail = output.split('\n')[1].split()[3]
+        fs_avail = float(fs_avail) * 1024
+
+        ratio = (raw_avail / pool_size) / fs_avail
+        assert 0.9 < ratio < 1.1
index 3c639bbfc418bb06c173bf72a9cd1f59a5cc4543..65dc9a9eb856161e54a5e189aa1fc74e27f39061 100644 (file)
@@ -230,13 +230,16 @@ vc.disconnect()
 
             # Write something outside volume to check this space usage is
             # not reported in the volume's DF.
-            other_bin_mb = 6
+            other_bin_mb = 8
             self.mount_a.write_n_mb("other.bin", other_bin_mb)
 
             # global: df should see all the writes (data + other).  This is a >
             # rather than a == because the global spaced used includes all pools
-            self.assertGreater(self.mount_a.df()['used'],
-                               (data_bin_mb + other_bin_mb) * 1024 * 1024)
+            def check_df():
+                used = self.mount_a.df()['used']
+                return used >= (other_bin_mb * 1024 * 1024)
+
+            self.wait_until_true(check_df, timeout=30)
 
             # Hack: do a metadata IO to kick rstats
             self.mounts[2].run_shell(["touch", "foo"])
index ee16381d1b491b027bcd84d4bce200b4f2cd5569..ef5680d01bdb90cd1346577d6adfba2a4df444a9 100644 (file)
@@ -228,7 +228,7 @@ def run_tests(ctx, config):
     """
     assert isinstance(config, dict)
     testdir = teuthology.get_testdir(ctx)
-    attrs = ["!fails_on_rgw"]
+    attrs = ["!fails_on_rgw", "!lifecycle"]
     for client, client_config in config.iteritems():
         args = [
             'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
index 7113e5948aa2b939271925c3bbf7746697e1b9ca..3c9c4d22689e453c173dc607f334d06ca76b3dce 100644 (file)
@@ -2,12 +2,12 @@ overrides:
   ceph:
     log-whitelist:
       - overall HEALTH_
-      - (OSDMAP_FLAGS)
-      - (OSD_
-      - (PG_
-      - (POOL_
-      - (CACHE_POOL_
-      - (SMALLER_PGP_NUM)
-      - (OBJECT_
-      - (REQUEST_SLOW)
-      - (TOO_FEW_PGS)
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
index bbb93f07829c20ac3cf2582662059f0213195b1c..7a795b925d68cb8e70f0fd1f7f3c7a68b847c8a1 100755 (executable)
@@ -1,8 +1,8 @@
 #!/bin/bash
-if [ -f $(dirname $0)/../../standalone/ceph-helpers-root.sh ]; then
-    source $(dirname $0)/../../standalone/ceph-helpers-root.sh
+if [ -f $(dirname $0)/../ceph-helpers-root.sh ]; then
+    source $(dirname $0)/../ceph-helpers-root.sh
 else
-    echo "$(dirname $0)/../../standalone/ceph-helpers-root.sh does not exist."
+    echo "$(dirname $0)/../ceph-helpers-root.sh does not exist."
     exit 1
 fi
 
index 9d8482df632721a4878b0ff36d77de884a8343f8..7c43ada2885c09e64f0b3a60291174343cacb8ab 100755 (executable)
@@ -1327,9 +1327,10 @@ function test_mon_osd_create_destroy()
   expect_false ceph auth get-key client.osd-lockbox.$uuid3
   expect_false ceph config-key exists dm-crypt/osd/$uuid3/luks
   ceph osd purge osd.$id3 --yes-i-really-mean-it
-  ceph osd purge osd.$id3 --yes-i-really-mean-it
+  ceph osd purge osd.$id3 --yes-i-really-mean-it # idempotent
 
   ceph osd purge osd.$id --yes-i-really-mean-it
+  ceph osd purge 123456 --yes-i-really-mean-it
   expect_false ceph osd find $id
   expect_false ceph auth get-key osd.$id
   expect_false ceph auth get-key client.osd-lockbox.$uuid
index 5e6f696118f3295884813efd3ae68530d4126820..a9f1512c5cc3a0e31a7c331f408ea4375ab8bb4f 100755 (executable)
@@ -21,6 +21,7 @@ ceph osd crush rule create-simple bar default host
 
 # make sure we're at luminous+ before using crush device classes
 ceph osd require-osd-release luminous
+ceph osd crush rm-device-class all
 ceph osd crush set-device-class ssd osd.0
 ceph osd crush set-device-class hdd osd.1
 ceph osd crush rule create-replicated foo-ssd default host ssd
index 3ff7e7603373cd884344028a223e16f6666f34b6..2bff335c628bb8816b5cc68db18ce48735be50ca 100755 (executable)
@@ -20,10 +20,10 @@ expect()
 
 ceph osd pool delete test test --yes-i-really-really-mean-it || true
 expect 'ceph osd pool create test 256 256' 0
+expect 'rbd --pool=test pool init' 0
 expect 'ceph osd pool mksnap test snapshot' 0
 expect 'ceph osd pool rmsnap test snapshot' 0
 
-expect 'rbd --pool=test pool init' 0
 expect 'rbd --pool=test --rbd_validate_pool=false create --size=102400 image' 0
 expect 'rbd --pool=test snap create image@snapshot' 22
 
@@ -45,11 +45,13 @@ expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0
 
 ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it || true
 expect 'rados mkpool test-foo' 0
+expect 'rbd pool init test-foo'
 expect 'rbd --pool test-foo create --size 1024 image' 0
 expect 'rbd --pool test-foo snap create image@snapshot' 0
 
 ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true
 expect 'rados mkpool test-bar' 0
+expect 'rbd pool init test-bar'
 expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0
 expect 'rbd --pool test-bar snap rm image@snapshot' 95
 expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0
index 1c839d234284460f16e71ea84730097c6c071552..f958520842fb4d80796ed84f99c4d68a81221bf8 100755 (executable)
@@ -123,6 +123,7 @@ test_rename() {
     rbd rename bar2 foo2 2>&1 | grep exists
 
     rados mkpool rbd2
+    rbd pool init rbd2
     rbd create -p rbd2 -s 1 foo
     rbd rename rbd2/foo rbd2/bar
     rbd -p rbd2 ls | grep bar
@@ -359,6 +360,7 @@ test_clone() {
     rbd snap protect test1@s1
 
     rados mkpool rbd2
+    rbd pool init rbd2
     rbd clone test1@s1 rbd2/clone
     rbd -p rbd2 ls | grep clone
     rbd -p rbd2 ls -l | grep clone | grep test1@s1
diff --git a/ceph/qa/workunits/rbd/issue-20295.sh b/ceph/qa/workunits/rbd/issue-20295.sh
new file mode 100755 (executable)
index 0000000..3d617a0
--- /dev/null
@@ -0,0 +1,18 @@
+#!/bin/sh -ex
+
+TEST_POOL=ecpool
+TEST_IMAGE=test1
+PGS=12
+
+ceph osd pool create $TEST_POOL $PGS $PGS erasure
+ceph osd pool application enable $TEST_POOL rbd
+ceph osd pool set $TEST_POOL allow_ec_overwrites true
+rbd --data-pool $TEST_POOL create --size 1024G $TEST_IMAGE
+rbd bench \
+    --io-type write \
+    --io-size 4096 \
+    --io-pattern=rand \
+    --io-total 100M \
+    $TEST_IMAGE
+
+echo "OK"
diff --git a/ceph/qa/workunits/rbd/rbd-ggate.sh b/ceph/qa/workunits/rbd/rbd-ggate.sh
new file mode 100755 (executable)
index 0000000..536070a
--- /dev/null
@@ -0,0 +1,182 @@
+#!/bin/sh -ex
+
+POOL=testrbdggate$$
+IMAGE=test
+SIZE=64
+DATA=
+DEV=
+
+if which xmlstarlet > /dev/null 2>&1; then
+  XMLSTARLET=xmlstarlet
+elif which xml > /dev/null 2>&1; then
+  XMLSTARLET=xml
+else
+  echo "Missing xmlstarlet binary!"
+  exit 1
+fi
+
+_sudo()
+{
+    local cmd
+
+    if [ `id -u` -eq 0 ]
+    then
+       "$@"
+       return $?
+    fi
+
+    # Look for the command in the user path. If it fails run it as is,
+    # supposing it is in sudo path.
+    cmd=`which $1 2>/dev/null` || cmd=$1
+    shift
+    sudo -nE "${cmd}" "$@"
+}
+
+setup()
+{
+    if [ -e CMakeCache.txt ]; then
+       # running under cmake build dir
+
+       CEPH_SRC=$(readlink -f $(dirname $0)/../../../src)
+       CEPH_ROOT=${PWD}
+       CEPH_BIN=${CEPH_ROOT}/bin
+
+       export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH}
+       export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind
+       for x in ${CEPH_ROOT}/lib/cython_modules/lib* ; do
+            PYTHONPATH="${PYTHONPATH}:${x}"
+       done
+       PATH=${CEPH_BIN}:${PATH}
+    fi
+
+    _sudo echo test sudo
+
+    trap cleanup INT TERM EXIT
+    TEMPDIR=`mktemp -d`
+    DATA=${TEMPDIR}/data
+    dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+    ceph osd pool create ${POOL} 64 64
+    rbd --dest-pool ${POOL} --no-progress import ${DATA} ${IMAGE}
+}
+
+cleanup()
+{
+    set +e
+    rm -Rf ${TEMPDIR}
+    if [ -n "${DEV}" ]
+    then
+       _sudo rbd-ggate unmap ${DEV}
+    fi
+    ceph osd pool delete ${POOL} ${POOL} --yes-i-really-really-mean-it
+}
+
+expect_false()
+{
+  if "$@"; then return 1; else return 0; fi
+}
+
+#
+# main
+#
+
+setup
+
+# exit status test
+expect_false rbd-ggate
+expect_false rbd-ggate INVALIDCMD
+if [ `id -u` -ne 0 ]
+then
+    expect_false rbd-ggate map ${IMAGE}
+fi
+expect_false _sudo rbd-ggate map INVALIDIMAGE
+
+# map test using the first unused device
+DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}`
+_sudo rbd-ggate list | grep "^${DEV}$"
+
+# map test specifying the device
+expect_false _sudo rbd-ggate --device ${DEV} map ${POOL}/${IMAGE}
+dev1=${DEV}
+_sudo rbd-ggate unmap ${DEV}
+_sudo rbd-ggate list | expect_false grep "^${DEV}$"
+DEV=
+# XXX: race possible when the device is reused by other process
+DEV=`_sudo rbd-ggate --device ${dev1} map ${POOL}/${IMAGE}`
+[ "${DEV}" = "${dev1}" ]
+_sudo rbd-ggate list | grep "^${DEV}$"
+
+# read test
+[ "`dd if=${DATA} bs=1M | md5`" = "`_sudo dd if=${DEV} bs=1M | md5`" ]
+
+# write test
+dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE}
+_sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo sync
+[ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+
+# trim test
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -eq "${provisioned}" ]
+_sudo newfs -E ${DEV}
+_sudo sync
+provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .`
+used=`rbd -p ${POOL} --format xml du ${IMAGE} |
+  $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
+[ "${used}" -lt "${provisioned}" ]
+
+# resize test
+devname=$(basename ${DEV})
+size=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+test -n "${size}"
+rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M
+rbd info ${POOL}/${IMAGE}
+if [ -z "$RBD_GGATE_RESIZE_SUPPORTED" ]; then
+    # XXX: ggate device resize is not supported by vanila kernel.
+    # rbd-ggate should terminate when detecting resize.
+    _sudo rbd-ggate list | expect_false grep "^${DEV}$"
+else
+    _sudo rbd-ggate list | grep "^${DEV}$"
+    size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+    test -n "${size2}"
+    test ${size2} -eq $((size * 2))
+    dd if=/dev/urandom of=${DATA} bs=1M count=$((SIZE * 2))
+    _sudo dd if=${DATA} of=${DEV} bs=1M
+    _sudo sync
+    [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+    rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
+    rbd info ${POOL}/${IMAGE}
+    size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}')
+    test -n "${size2}"
+    test ${size2} -eq ${size}
+    truncate -s ${SIZE}M ${DATA}
+    [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ]
+    _sudo rbd-ggate unmap ${DEV}
+fi
+DEV=
+
+# read-only option test
+DEV=`_sudo rbd-ggate map --read-only ${POOL}/${IMAGE}`
+devname=$(basename ${DEV})
+_sudo rbd-ggate list | grep "^${DEV}$"
+access=$(geom gate list ${devname} | awk '$1 == "access:" {print $2}')
+test "${access}" = "read-only"
+_sudo dd if=${DEV} of=/dev/null bs=1M
+expect_false _sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo rbd-ggate unmap ${DEV}
+
+# exclusive option test
+DEV=`_sudo rbd-ggate map --exclusive ${POOL}/${IMAGE}`
+_sudo rbd-ggate list | grep "^${DEV}$"
+_sudo dd if=${DATA} of=${DEV} bs=1M
+_sudo sync
+expect_false timeout 10 \
+    rbd -p ${POOL} bench ${IMAGE} --io-type=write --io-size=1024 --io-total=1024
+_sudo rbd-ggate unmap ${DEV}
+DEV=
+rbd bench -p ${POOL} ${IMAGE} --io-type=write --io-size=1024 --io-total=1024
+
+echo OK
index 7d59200c1711389e79ddcda7b15a9a949ffd83b5..021cbaf03f740ed5bf58ba0ccc9b0ac3e5a2f03d 100755 (executable)
@@ -286,6 +286,7 @@ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 testlog "TEST: simple image resync"
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
 compare_images ${POOL} ${image}
@@ -297,6 +298,7 @@ request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 admin_daemon ${CLUSTER1} rbd mirror start ${POOL}/${image}
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
 admin_daemon ${CLUSTER1} rbd mirror start ${POOL}/${image}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
 compare_images ${POOL} ${image}
@@ -306,6 +308,7 @@ stop_mirror ${CLUSTER1}
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 start_mirror ${CLUSTER1}
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
 compare_images ${POOL} ${image}
@@ -327,6 +330,7 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnecte
 testlog " - replay started after resync requested"
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
 test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
@@ -352,6 +356,7 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnecte
 testlog " - replay started after resync requested"
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
 test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
@@ -361,8 +366,10 @@ testlog " - rbd_mirroring_resync_after_disconnect config option"
 set_image_meta ${CLUSTER2} ${POOL} ${image} \
               conf_rbd_mirroring_resync_after_disconnect true
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+image_id=$(get_image_id ${CLUSTER1} ${pool} ${image})
 disconnect_image ${CLUSTER2} ${POOL} ${image}
-wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id}
+wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
 test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})"
index b3cf017f6652a94bf2a90a78ba12109e49f39787..f825bec8549db559417cb7bd5b8ba2b22aeb187e 100755 (executable)
@@ -217,6 +217,11 @@ setup()
     CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool create ${PARENT_POOL} 64 64
     CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool create ${POOL} 64 64
 
+    CEPH_ARGS='' rbd --cluster ${CLUSTER1} pool init ${POOL}
+    CEPH_ARGS='' rbd --cluster ${CLUSTER2} pool init ${POOL}
+    CEPH_ARGS='' rbd --cluster ${CLUSTER1} pool init ${PARENT_POOL}
+    CEPH_ARGS='' rbd --cluster ${CLUSTER2} pool init ${PARENT_POOL}
+
     rbd --cluster ${CLUSTER1} mirror pool enable ${POOL} pool
     rbd --cluster ${CLUSTER2} mirror pool enable ${POOL} pool
     rbd --cluster ${CLUSTER1} mirror pool enable ${PARENT_POOL} image
index 9135c2ecefcf19c4374453fb4e17693ea441a443..a7ecd839c5dc5e45f4e3844069362b68e4679534 100755 (executable)
@@ -62,15 +62,17 @@ function rbd_check_perfcounter()
 function rbd_watch_start()
 {
     local image=$1
+    local asok=$(rbd_watch_asok ${image})
 
     mkfifo $(rbd_watch_fifo ${image})
     (cat $(rbd_watch_fifo ${image}) |
-           rbd watch ${image} > $(rbd_watch_out_file ${image}) 2>&1)&
+           rbd --admin-socket ${asok} watch ${image} \
+                > $(rbd_watch_out_file ${image}) 2>&1)&
 
     # find pid of the started rbd watch process
     local pid
     for i in `seq 10`; do
-       pid=$(ps auxww | awk "/[r]bd watch ${image}/ {print \$2}")
+       pid=$(ps auxww | awk "/[r]bd --admin.* watch ${image}/ {print \$2}")
        test -n "${pid}" && break
        sleep 0.1
     done
@@ -78,14 +80,12 @@ function rbd_watch_start()
     echo ${pid} > $(rbd_watch_pid_file ${image})
 
     # find watcher admin socket
-    local asok=$(ceph-conf admin_socket | sed -E "s/[0-9]+/${pid}/")
     test -n "${asok}"
     for i in `seq 10`; do
        test -S "${asok}" && break
        sleep 0.1
     done
     test -S "${asok}"
-    ln -s "${asok}" $(rbd_watch_asok ${image})
 
     # configure debug level
     ceph --admin-daemon "${asok}" config set debug_rbd 20
index b23672c4f0a8b022d25103711ab2cc4a43f85049..aa8f8ed28f82b28a5230d2354ef3b52bea935eeb 100755 (executable)
@@ -71,6 +71,8 @@ function run() {
     $DRY_RUN ./do_cmake.sh $@ || return 1
     $DRY_RUN cd build
     $DRY_RUN make $BUILD_MAKEOPTS tests || return 1
+    # prevent OSD EMFILE death on tests
+    $DRY_RUN sudo ulimit -n 32768
     if ! $DRY_RUN ctest $CHECK_MAKEOPTS --output-on-failure; then
         rm -f ${TMPDIR:-/tmp}/ceph-asok.*
         return 1
index 6c097ab5bd91e4342770a355c54841d0300fb838..5d44bb535a3c46e2765e0a751500a7b74fd65eb5 100644 (file)
@@ -1,2 +1,2 @@
-b661348f156f148d764b998b65b90451f096cb27
-v12.1.2
+c56d9c07b342c08419bbc18dcf2a4c5fae62b9cf
+v12.1.3
index 5060b41bc2347f41152a4f59b4889dd6eb5704dd..915e10aa66493da8afc072f9f206f3caeda8ad98 100644 (file)
@@ -56,7 +56,7 @@ endif(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
 set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_CXX_FLAGS}")
 
 if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Default BUILD_TYPE is RelWithDebInfo, other options are: Debug, Release, and MinSizeRel." FORCE) 
+  set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Default BUILD_TYPE is RelWithDebInfo, other options are: Debug, Release, and MinSizeRel." FORCE)
 endif()
 
 if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
@@ -722,6 +722,7 @@ endif()
 
 add_subdirectory(pybind)
 add_subdirectory(ceph-disk)
+add_subdirectory(ceph-volume)
 add_subdirectory(ceph-detect-init)
 
 ## dencoder
index 75005f5871f223d6ccd78e4ba307ed32af2db475..c14c02f28dc0dcadfef53d30e8bb6d832586ded1 100755 (executable)
@@ -305,7 +305,11 @@ def main():
         type_='mds',
         wait_count=args.timeout,
         )
-
+    bootstrap_key(
+        cluster=args.cluster,
+        type_='rbd',
+        wait_count=args.timeout,
+        )
 
 if __name__ == '__main__':
     main()
index 5ae5840b6ac894b5962f827d37fbce8cdf3794e0..8b0c5dbc38961cc94ac4304e43bc0f7f3a51271c 100755 (executable)
@@ -1279,11 +1279,14 @@ def get_dmcrypt_key(
         osd_uuid = get_oneliner(path, 'osd-uuid')
         ceph_fsid = read_one_line(path, 'ceph_fsid')
         if ceph_fsid is None:
-            raise Error('No cluster uuid assigned.')
-        cluster = find_cluster_by_uuid(ceph_fsid)
-        if cluster is None:
-            raise Error('No cluster conf found in ' + SYSCONFDIR +
-                        ' with fsid %s' % ceph_fsid)
+            LOG.warning("no `ceph_fsid` found falling back to 'ceph' "
+                        "for cluster name")
+            cluster = 'ceph'
+        else:
+            cluster = find_cluster_by_uuid(ceph_fsid)
+            if cluster is None:
+                raise Error('No cluster conf found in ' + SYSCONFDIR +
+                            ' with fsid %s' % ceph_fsid)
 
         if mode == KEY_MANAGEMENT_MODE_V1:
             key, stderr, ret = command(
@@ -1450,6 +1453,7 @@ def mount(
 
 def unmount(
     path,
+    do_rm=True,
 ):
     """
     Unmount and removes the given mount point.
@@ -1473,7 +1477,8 @@ def unmount(
             else:
                 time.sleep(0.5 + retries * 1.0)
                 retries += 1
-
+    if not do_rm:
+        return
     os.rmdir(path)
 
 
@@ -3942,7 +3947,7 @@ def main_deactivate_locked(args):
         with open(os.path.join(mounted_path, 'deactive'), 'w'):
             path_set_context(os.path.join(mounted_path, 'deactive'))
 
-    unmount(mounted_path)
+    unmount(mounted_path, do_rm=not args.once)
     LOG.info("Umount `%s` successfully.", mounted_path)
 
     if dmcrypt:
index ef6d7d4456e67af0911ab64d7a73bc26fb12d606..57e4af23bf32cb037a83770895a06c163e2eef66 100644 (file)
@@ -812,7 +812,7 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
                 stop_daemon=lambda cluster, osd_id: True,
                 _remove_osd_directory_files=lambda path, cluster: True,
                 path_set_context=lambda path: True,
-                unmount=lambda path: True,
+                unmount=lambda path, do_rm: True,
                 dmcrypt_unmap=lambda part_uuid: True,
         ):
             main.main_deactivate(args)
@@ -846,7 +846,7 @@ class TestCephDiskDeactivateAndDestroy(unittest.TestCase):
                 stop_daemon=lambda cluster, osd_id: True,
                 _remove_osd_directory_files=lambda path, cluster: True,
                 path_set_context=lambda path: True,
-                unmount=lambda path: True,
+                unmount=lambda path, do_rm: True,
                 dmcrypt_unmap=lambda part_uuid: True,
         ):
             main.main_deactivate(args)
diff --git a/ceph/src/ceph-volume/CMakeLists.txt b/ceph/src/ceph-volume/CMakeLists.txt
new file mode 100644 (file)
index 0000000..20841ff
--- /dev/null
@@ -0,0 +1,4 @@
+
+include(Distutils)
+distutils_install_module(ceph_volume
+  INSTALL_SCRIPT ${CMAKE_INSTALL_FULL_SBINDIR})
diff --git a/ceph/src/ceph-volume/MANIFEST.in b/ceph/src/ceph-volume/MANIFEST.in
new file mode 100644 (file)
index 0000000..5b4a149
--- /dev/null
@@ -0,0 +1,2 @@
+include bin/ceph-volume
+include tox.ini
diff --git a/ceph/src/ceph-volume/bin/ceph-volume b/ceph/src/ceph-volume/bin/ceph-volume
new file mode 100755 (executable)
index 0000000..5905cfc
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+from ceph_volume import main
+
+if __name__ == '__main__':
+    main.Volume()
diff --git a/ceph/src/ceph-volume/bin/ceph-volume-systemd b/ceph/src/ceph-volume/bin/ceph-volume-systemd
new file mode 100755 (executable)
index 0000000..7da8ec6
--- /dev/null
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+from ceph_volume.systemd import main
+
+if __name__ == '__main__':
+    main.main()
diff --git a/ceph/src/ceph-volume/ceph_volume/__init__.py b/ceph/src/ceph-volume/ceph_volume/__init__.py
new file mode 100644 (file)
index 0000000..f550001
--- /dev/null
@@ -0,0 +1,5 @@
+from collections import namedtuple
+
+conf = namedtuple('config', ['ceph', 'cluster', 'verbosity', 'path', 'log_path'])
+
+__version__ = "1.0.0"
diff --git a/ceph/src/ceph-volume/ceph_volume/configuration.py b/ceph/src/ceph-volume/ceph_volume/configuration.py
new file mode 100644 (file)
index 0000000..eda58f0
--- /dev/null
@@ -0,0 +1,99 @@
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
+import logging
+import os
+import re
+from ceph_volume import terminal
+from ceph_volume import exceptions
+
+
+logger = logging.getLogger(__name__)
+
+
+class _TrimIndentFile(object):
+    """
+    This is used to take a file-like object and removes any
+    leading tabs from each line when it's read. This is important
+    because some ceph configuration files include tabs which break
+    ConfigParser.
+    """
+    def __init__(self, fp):
+        self.fp = fp
+
+    def readline(self):
+        line = self.fp.readline()
+        return line.lstrip(' \t')
+
+    def __iter__(self):
+        return iter(self.readline, '')
+
+
+def load(abspath=None):
+    parser = Conf()
+    try:
+        parser.read_path(abspath)
+        return parser
+    except configparser.ParsingError as error:
+        terminal.error('Unable to read configuration file: %s' % abspath)
+        terminal.error(str(error))
+        logger.exception('Unable to parse INI-style file: %s' % abspath)
+
+
+class Conf(configparser.SafeConfigParser):
+    """
+    Subclasses from SafeConfigParser to give a few helpers for Ceph
+    configuration.
+    """
+
+    def read_path(self, path):
+        self.path = path
+        return self.read(path)
+
+    def is_valid(self):
+        if not os.path.exists(self.path):
+            raise exceptions.ConfigurationError(abspath=self.path)
+
+        try:
+            self.get('global', 'fsid')
+        except (configparser.NoSectionError, configparser.NoOptionError):
+            raise exceptions.ConfigurationKeyError('global', 'fsid')
+
+    def get_safe(self, section, key, default=None):
+        """
+        Attempt to get a configuration value from a certain section
+        in a ``cfg`` object but returning None if not found. Avoids the need
+        to be doing try/except {ConfigParser Exceptions} every time.
+        """
+        self.is_valid()
+        try:
+            return self.get(section, key)
+        except (configparser.NoSectionError, configparser.NoOptionError):
+            return default
+
+    def get_list(self, section, key, default=None, split=','):
+        """
+        Assumes that the value for a given key is going to be a list separated
+        by commas. It gets rid of trailing comments.  If just one item is
+        present it returns a list with a single item, if no key is found an
+        empty list is returned.
+
+        Optionally split on other characters besides ',' and return a fallback
+        value if no items are found.
+        """
+        self.is_valid()
+        value = self.get_safe(section, key, [])
+        if value == []:
+            if default is not None:
+                return default
+            return value
+
+        # strip comments
+        value = re.split(r'\s+#', value)[0]
+
+        # split on commas
+        value = value.split(split)
+
+        # strip spaces
+        return [x.strip() for x in value]
diff --git a/ceph/src/ceph-volume/ceph_volume/decorators.py b/ceph/src/ceph-volume/ceph_volume/decorators.py
new file mode 100644 (file)
index 0000000..c1e14bc
--- /dev/null
@@ -0,0 +1,87 @@
+import os
+import sys
+from ceph_volume import terminal, exceptions
+from functools import wraps
+
+
+def needs_root(func):
+    """
+    Check for super user privileges on functions/methods. Raise
+    ``SuperUserError`` with a nice message.
+    """
+    @wraps(func)
+    def is_root(*a, **kw):
+        if not os.getuid() == 0:
+            raise exceptions.SuperUserError()
+        return func(*a, **kw)
+    return is_root
+
+
+def catches(catch=None, handler=None, exit=True):
+    """
+    Very simple decorator that tries any of the exception(s) passed in as
+    a single exception class or tuple (containing multiple ones) returning the
+    exception message and optionally handling the problem if it rises with the
+    handler if it is provided.
+
+    So instead of douing something like this::
+
+        def bar():
+            try:
+                some_call()
+                print "Success!"
+            except TypeError, exc:
+                print "Error while handling some call: %s" % exc
+                sys.exit(1)
+
+    You would need to decorate it like this to have the same effect::
+
+        @catches(TypeError)
+        def bar():
+            some_call()
+            print "Success!"
+
+    If multiple exceptions need to be catched they need to be provided as a
+    tuple::
+
+        @catches((TypeError, AttributeError))
+        def bar():
+            some_call()
+            print "Success!"
+    """
+    catch = catch or Exception
+
+    def decorate(f):
+
+        @wraps(f)
+        def newfunc(*a, **kw):
+            try:
+                return f(*a, **kw)
+            except catch as e:
+                if os.environ.get('CEPH_VOLUME_DEBUG'):
+                    raise
+                if handler:
+                    return handler(e)
+                else:
+                    sys.stderr.write(make_exception_message(e))
+                    if exit:
+                        sys.exit(1)
+        return newfunc
+
+    return decorate
+
+#
+# Decorator helpers
+#
+
+
+def make_exception_message(exc):
+    """
+    An exception is passed in and this function
+    returns the proper string depending on the result
+    so it is readable enough.
+    """
+    if str(exc):
+        return '%s %s: %s\n' % (terminal.red_arrow, exc.__class__.__name__, exc)
+    else:
+        return '%s %s\n' % (terminal.red_arrow, exc.__class__.__name__)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/__init__.py b/ceph/src/ceph-volume/ceph_volume/devices/__init__.py
new file mode 100644 (file)
index 0000000..c77c344
--- /dev/null
@@ -0,0 +1 @@
+from . import lvm # noqa
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/__init__.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/__init__.py
new file mode 100644 (file)
index 0000000..3c14712
--- /dev/null
@@ -0,0 +1 @@
+from .main import LVM # noqa
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py
new file mode 100644 (file)
index 0000000..7787aab
--- /dev/null
@@ -0,0 +1,114 @@
+from __future__ import print_function
+import argparse
+import os
+from textwrap import dedent
+from ceph_volume import process, conf, decorators
+from ceph_volume.util import system
+from ceph_volume.systemd import systemctl
+from . import api
+
+
+def activate_filestore(lvs):
+    # find the osd
+    osd_lv = lvs.get(lv_tags={'ceph.type': 'data'})
+    osd_id = osd_lv.tags['ceph.osd_id']
+    # it may have a volume with a journal
+    osd_journal_lv = lvs.get(lv_tags={'ceph.type': 'journal'})
+    # TODO: add sensible error reporting if this is ever the case
+    # blow up with a KeyError if this doesn't exist
+    osd_fsid = osd_lv.tags['ceph.osd_fsid']
+    if not osd_journal_lv:
+        osd_journal = osd_lv.tags.get('ceph.journal_device')
+    else:
+        osd_journal = osd_journal.lv_path
+
+    if not osd_journal:
+        raise RuntimeError('unable to detect an lv or device journal for OSD %s' % osd_id)
+
+    # mount the osd
+    source = osd_lv.lv_path
+    destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+    if not system.is_mounted(source, destination=destination):
+        process.run(['sudo', 'mount', '-v', source, destination])
+
+    # ensure that the symlink for the journal is there
+    if not os.path.exists(osd_journal):
+        source = osd_journal
+        destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id)
+        process.run(['sudo', 'ln', '-s', source, destination])
+
+    # make sure that the journal has proper permissions
+    system.chown(osd_journal)
+
+    # enable the ceph-volume unit for this OSD
+    systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
+
+    # start the OSD
+    systemctl.start_osd(osd_id)
+
+
+def activate_bluestore(lvs):
+    # TODO
+    pass
+
+
+class Activate(object):
+
+    help = 'Discover and mount the LVM device associated with an OSD ID and start the Ceph OSD'
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    @decorators.needs_root
+    def activate(self, args):
+        lvs = api.Volumes()
+        # filter them down for the OSD ID and FSID we need to activate
+        lvs.filter(lv_tags={'ceph.osd_id': args.osd_id, 'ceph.osd_fsid': args.osd_fsid})
+        if not lvs:
+            raise RuntimeError('could not find osd.%s with fsid %s' % (args.osd_id, args.osd_fsid))
+        activate_filestore(lvs)
+
+    def main(self):
+        sub_command_help = dedent("""
+        Activate OSDs by discovering them with LVM and mounting them in their
+        appropriate destination:
+
+            ceph-volume lvm activate {ID} {FSID}
+
+        The lvs associated with the OSD need to have been prepared previously,
+        so that all needed tags and metadata exist.
+
+        """)
+        parser = argparse.ArgumentParser(
+            prog='ceph-volume lvm activate',
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            description=sub_command_help,
+        )
+
+        parser.add_argument(
+            'osd_id',
+            metavar='ID',
+            nargs='?',
+            help='The ID of the OSD, usually an integer, like 0'
+        )
+        parser.add_argument(
+            'osd_fsid',
+            metavar='FSID',
+            nargs='?',
+            help='The FSID of the OSD, similar to a SHA1'
+        )
+        parser.add_argument(
+            '--bluestore',
+            action='store_true', default=False,
+            help='filestore objectstore (not yet implemented)',
+        )
+        parser.add_argument(
+            '--filestore',
+            action='store_true', default=True,
+            help='filestore objectstore (current default)',
+        )
+        if len(self.argv) == 0:
+            print(sub_command_help)
+            return
+        args = parser.parse_args(self.argv)
+        self.activate(args)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py
new file mode 100644 (file)
index 0000000..8ff2faf
--- /dev/null
@@ -0,0 +1,484 @@
+"""
+API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention
+that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
+set of utilities for interacting with LVM.
+"""
+import json
+from ceph_volume import process
+from ceph_volume.exceptions import MultipleLVsError, MultipleVGsError
+
+
+def parse_tags(lv_tags):
+    """
+    Return a dictionary mapping of all the tags associated with
+    a Volume from the comma-separated tags coming from the LVM API
+
+    Input look like::
+
+       "ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0"
+
+    For the above example, the expected return value would be::
+
+        {
+            "ceph.osd_fsid": "aaa-fff-bbbb",
+            "ceph.osd_id": "0"
+        }
+    """
+    if not lv_tags:
+        return {}
+    tag_mapping = {}
+    tags = lv_tags.split(',')
+    for tag_assignment in tags:
+        key, value = tag_assignment.split('=', 1)
+        tag_mapping[key] = value
+
+    return tag_mapping
+
+
+def get_api_vgs():
+    """
+    Return the list of group volumes available in the system using flags to include common
+    metadata associated with them
+
+    Command and sample JSON output, should look like::
+
+        $ sudo vgs --reportformat=json
+        {
+            "report": [
+                {
+                    "vg": [
+                        {
+                            "vg_name":"VolGroup00",
+                            "pv_count":"1",
+                            "lv_count":"2",
+                            "snap_count":"0",
+                            "vg_attr":"wz--n-",
+                            "vg_size":"38.97g",
+                            "vg_free":"0 "},
+                        {
+                            "vg_name":"osd_vg",
+                            "pv_count":"3",
+                            "lv_count":"1",
+                            "snap_count":"0",
+                            "vg_attr":"wz--n-",
+                            "vg_size":"32.21g",
+                            "vg_free":"9.21g"
+                        }
+                    ]
+                }
+            ]
+        }
+
+    """
+    stdout, stderr, returncode = process.call(
+        [
+            'sudo', 'vgs', '--reportformat=json'
+        ]
+    )
+    report = json.loads(''.join(stdout))
+    for report_item in report.get('report', []):
+        # is it possible to get more than one item in "report" ?
+        return report_item['vg']
+    return []
+
+
+def get_api_lvs():
+    """
+    Return the list of logical volumes available in the system using flags to include common
+    metadata associated with them
+
+    Command and sample JSON output, should look like::
+
+        $ sudo lvs -o  lv_tags,lv_path,lv_name,vg_name --reportformat=json
+        {
+            "report": [
+                {
+                    "lv": [
+                        {
+                            "lv_tags":"",
+                            "lv_path":"/dev/VolGroup00/LogVol00",
+                            "lv_name":"LogVol00",
+                            "vg_name":"VolGroup00"},
+                        {
+                            "lv_tags":"ceph.osd_fsid=aaa-fff-0000,ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0",
+                            "lv_path":"/dev/osd_vg/OriginLV",
+                            "lv_name":"OriginLV",
+                            "vg_name":"osd_vg"
+                        }
+                    ]
+                }
+            ]
+        }
+
+    """
+    stdout, stderr, returncode = process.call(
+        ['sudo', 'lvs', '-o', 'lv_tags,lv_path,lv_name,vg_name', '--reportformat=json'])
+    report = json.loads(''.join(stdout))
+    for report_item in report.get('report', []):
+        # is it possible to get more than one item in "report" ?
+        return report_item['lv']
+    return []
+
+
+def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_tags=None):
+    """
+    Return a matching lv for the current system, requiring ``lv_name``,
+    ``vg_name``, ``lv_path`` or ``tags``. Raises an error if more than one lv
+    is found.
+
+    It is useful to use ``tags`` when trying to find a specific logical volume,
+    but it can also lead to multiple lvs being found, since a lot of metadata
+    is shared between lvs of a distinct OSD.
+    """
+    if not any([lv_name, vg_name, lv_path, lv_tags]):
+        return None
+    lvs = Volumes()
+    return lvs.get(lv_name=lv_name, vg_name=vg_name, lv_path=lv_path, lv_tags=lv_tags)
+
+
+def create_lv(name, group, size=None, **tags):
+    """
+    Create a Logical Volume in a Volume Group. Command looks like::
+
+        lvcreate -L 50G -n gfslv vg0
+
+    ``name``, ``group``, and ``size`` are required. Tags are optional and are "translated" to include
+    the prefixes for the Ceph LVM tag API.
+
+    """
+    # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations
+    type_path_tag = {
+        'journal': 'ceph.journal_device',
+        'data': 'ceph.data_device',
+        'block': 'ceph.block',
+        'wal': 'ceph.wal',
+        'db': 'ceph.db',
+        'lockbox': 'ceph.lockbox_device',
+    }
+    if size:
+        process.run([
+            'sudo',
+            'lvcreate',
+            '--yes',
+            '-L',
+            '%sG' % size,
+            '-n', name, group
+        ])
+    # create the lv with all the space available, this is needed because the
+    # system call is different for LVM
+    else:
+        process.run([
+            'sudo',
+            'lvcreate',
+            '--yes',
+            '-l',
+            '100%FREE',
+            '-n', name, group
+        ])
+
+    lv = get_lv(lv_name=name, vg_name=group)
+    ceph_tags = {}
+    for k, v in tags.items():
+        ceph_tags['ceph.%s' % k] = v
+    lv.set_tags(ceph_tags)
+
+    # when creating a distinct type, the caller doesn't know what the path will
+    # be so this function will set it after creation using the mapping
+    path_tag = type_path_tag[tags['type']]
+    lv.set_tags(
+        {path_tag: lv.lv_path}
+    )
+    return lv
+
+
+def get_vg(vg_name=None, vg_tags=None):
+    """
+    Return a matching vg for the current system, requires ``vg_name`` or
+    ``tags``. Raises an error if more than one vg is found.
+
+    It is useful to use ``tags`` when trying to find a specific volume group,
+    but it can also lead to multiple vgs being found.
+    """
+    if not any([vg_name, vg_tags]):
+        return None
+    vgs = VolumeGroups()
+    return vgs.get(vg_name=vg_name, vg_tags=vg_tags)
+
+
+class VolumeGroups(list):
+    """
+    A list of all known volume groups for the current system, with the ability
+    to filter them via keyword arguments.
+    """
+
+    def __init__(self):
+        self._populate()
+
+    def _populate(self):
+        # get all the vgs in the current system
+        for vg_item in get_api_vgs():
+            self.append(VolumeGroup(**vg_item))
+
+    def _purge(self):
+        """
+        Deplete all the items in the list, used internally only so that we can
+        dynamically allocate the items when filtering without the concern of
+        messing up the contents
+        """
+        self[:] = []
+
+    def _filter(self, vg_name=None, vg_tags=None):
+        """
+        The actual method that filters using a new list. Useful so that other
+        methods that do not want to alter the contents of the list (e.g.
+        ``self.find``) can operate safely.
+
+        .. note:: ``vg_tags`` is not yet implemented
+        """
+        filtered = [i for i in self]
+        if vg_name:
+            filtered = [i for i in filtered if i.vg_name == vg_name]
+
+        # at this point, `filtered` has either all the volumes in self or is an
+        # actual filtered list if any filters were applied
+        if vg_tags:
+            tag_filtered = []
+            for k, v in vg_tags.items():
+                for volume in filtered:
+                    if volume.tags.get(k) == str(v):
+                        if volume not in tag_filtered:
+                            tag_filtered.append(volume)
+            # return the tag_filtered volumes here, the `filtered` list is no
+            # longer useable
+            return tag_filtered
+
+        return filtered
+
+    def filter(self, vg_name=None, vg_tags=None):
+        """
+        Filter out groups on top level attributes like ``vg_name`` or by
+        ``vg_tags`` where a dict is required. For example, to find a Ceph group
+        with dmcache as the type, the filter would look like::
+
+            vg_tags={'ceph.type': 'dmcache'}
+
+        .. warning:: These tags are not documented because they are currently
+                     unused, but are here to maintain API consistency
+        """
+        if not any([vg_name, vg_tags]):
+            raise TypeError('.filter() requires vg_name or vg_tags (none given)')
+        # first find the filtered volumes with the values in self
+        filtered_groups = self._filter(
+            vg_name=vg_name,
+            vg_tags=vg_tags
+        )
+        # then purge everything
+        self._purge()
+        # and add the filtered items
+        self.extend(filtered_groups)
+
+    def get(self, vg_name=None, vg_tags=None):
+        """
+        This is a bit expensive, since it will try to filter out all the
+        matching items in the list, filter them out applying anything that was
+        added and return the matching item.
+
+        This method does *not* alter the list, and it will raise an error if
+        multiple VGs are matched
+
+        It is useful to use ``tags`` when trying to find a specific volume group,
+        but it can also lead to multiple vgs being found (although unlikely)
+        """
+        if not any([vg_name, vg_tags]):
+            return None
+        vgs = self._filter(
+            vg_name=vg_name,
+            vg_tags=vg_tags
+        )
+        if not vgs:
+            return None
+        if len(vgs) > 1:
+            # this is probably never going to happen, but it is here to keep
+            # the API code consistent
+            raise MultipleVGsError(vg_name)
+        return vgs[0]
+
+
+class Volumes(list):
+    """
+    A list of all known (logical) volumes for the current system, with the ability
+    to filter them via keyword arguments.
+    """
+
+    def __init__(self):
+        self._populate()
+
+    def _populate(self):
+        # get all the lvs in the current system
+        for lv_item in get_api_lvs():
+            self.append(Volume(**lv_item))
+
+    def _purge(self):
+        """
+        Deplete all the items in the list, used internally only so that we can
+        dynamically allocate the items when filtering without the concern of
+        messing up the contents
+        """
+        self[:] = []
+
+    def _filter(self, lv_name=None, vg_name=None, lv_path=None, lv_tags=None):
+        """
+        The actual method that filters using a new list. Useful so that other
+        methods that do not want to alter the contents of the list (e.g.
+        ``self.find``) can operate safely.
+        """
+        filtered = [i for i in self]
+        if lv_name:
+            filtered = [i for i in filtered if i.lv_name == lv_name]
+
+        if vg_name:
+            filtered = [i for i in filtered if i.vg_name == vg_name]
+
+        if lv_path:
+            filtered = [i for i in filtered if i.lv_path == lv_path]
+
+        # at this point, `filtered` has either all the volumes in self or is an
+        # actual filtered list if any filters were applied
+        if lv_tags:
+            tag_filtered = []
+            for k, v in lv_tags.items():
+                for volume in filtered:
+                    if volume.tags.get(k) == str(v):
+                        if volume not in tag_filtered:
+                            tag_filtered.append(volume)
+            # return the tag_filtered volumes here, the `filtered` list is no
+            # longer useable
+            return tag_filtered
+
+        return filtered
+
+    def filter(self, lv_name=None, vg_name=None, lv_path=None, lv_tags=None):
+        """
+        Filter out volumes on top level attributes like ``lv_name`` or by
+        ``lv_tags`` where a dict is required. For example, to find a volume
+        that has an OSD ID of 0, the filter would look like::
+
+            lv_tags={'ceph.osd_id': '0'}
+
+        """
+        if not any([lv_name, vg_name, lv_path, lv_tags]):
+            raise TypeError('.filter() requires lv_name, vg_name, lv_path, or tags (none given)')
+        # first find the filtered volumes with the values in self
+        filtered_volumes = self._filter(
+            lv_name=lv_name,
+            vg_name=vg_name,
+            lv_path=lv_path,
+            lv_tags=lv_tags
+        )
+        # then purge everything
+        self._purge()
+        # and add the filtered items
+        self.extend(filtered_volumes)
+
+    def get(self, lv_name=None, vg_name=None, lv_path=None, lv_tags=None):
+        """
+        This is a bit expensive, since it will try to filter out all the
+        matching items in the list, filter them out applying anything that was
+        added and return the matching item.
+
+        This method does *not* alter the list, and it will raise an error if
+        multiple LVs are matched
+
+        It is useful to use ``tags`` when trying to find a specific logical volume,
+        but it can also lead to multiple lvs being found, since a lot of metadata
+        is shared between lvs of a distinct OSD.
+        """
+        if not any([lv_name, vg_name, lv_path, lv_tags]):
+            return None
+        lvs = self._filter(
+            lv_name=lv_name,
+            vg_name=vg_name,
+            lv_path=lv_path,
+            lv_tags=lv_tags
+        )
+        if not lvs:
+            return None
+        if len(lvs) > 1:
+            raise MultipleLVsError(lv_name, lv_path)
+        return lvs[0]
+
+
+class VolumeGroup(object):
+    """
+    Represents an LVM group, with some top-level attributes like ``vg_name``
+    """
+
+    def __init__(self, **kw):
+        for k, v in kw.items():
+            setattr(self, k, v)
+        self.name = kw['vg_name']
+        self.tags = parse_tags(kw.get('vg_tags', ''))
+
+    def __str__(self):
+        return '<%s>' % self.name
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class Volume(object):
+    """
+    Represents a Logical Volume from LVM, with some top-level attributes like
+    ``lv_name`` and parsed tags as a dictionary of key/value pairs.
+    """
+
+    def __init__(self, **kw):
+        for k, v in kw.items():
+            setattr(self, k, v)
+        self.lv_api = kw
+        self.name = kw['lv_name']
+        self.tags = parse_tags(kw['lv_tags'])
+
+    def __str__(self):
+        return '<%s>' % self.lv_api['lv_path']
+
+    def __repr__(self):
+        return self.__str__()
+
+    def set_tags(self, tags):
+        """
+        :param tags: A dictionary of tag names and values, like::
+
+            {
+                "ceph.osd_fsid": "aaa-fff-bbbb",
+                "ceph.osd_id": "0"
+            }
+
+        At the end of all modifications, the tags are refreshed to reflect
+        LVM's most current view.
+        """
+        for k, v in tags.items():
+            self.set_tag(k, v)
+        # after setting all the tags, refresh them for the current object, use the
+        # lv_* identifiers to filter because those shouldn't change
+        lv_object = get_lv(lv_name=self.lv_name, lv_path=self.lv_path)
+        self.tags = lv_object.tags
+
+    def set_tag(self, key, value):
+        """
+        Set the key/value pair as an LVM tag. Does not "refresh" the values of
+        the current object for its tags. Meant to be a "fire and forget" type
+        of modification.
+        """
+        # remove it first if it exists
+        if self.tags.get(key):
+            current_value = self.tags[key]
+            tag = "%s=%s" % (key, current_value)
+            process.call(['sudo', 'lvchange', '--deltag', tag, self.lv_api['lv_path']])
+
+        process.call(
+            [
+                'sudo', 'lvchange',
+                '--addtag', '%s=%s' % (key, value), self.lv_path
+            ]
+        )
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py
new file mode 100644 (file)
index 0000000..599bbe6
--- /dev/null
@@ -0,0 +1,55 @@
+import argparse
+
+
+def common_parser(prog, description):
+    """
+    Both prepare and create share the same parser, those are defined here to
+    avoid duplication
+    """
+    parser = argparse.ArgumentParser(
+        prog=prog,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=description,
+    )
+    required_args = parser.add_argument_group('required arguments')
+    parser.add_argument(
+        '--journal',
+        help='A logical group name, path to a logical volume, or path to a device',
+    )
+    required_args.add_argument(
+        '--data',
+        required=True,
+        help='A logical group name or a path to a logical volume',
+    )
+    parser.add_argument(
+        '--journal-size',
+        default=5,
+        metavar='GB',
+        type=int,
+        help='Size (in GB) A logical group name or a path to a logical volume',
+    )
+    parser.add_argument(
+        '--bluestore',
+        action='store_true', default=False,
+        help='Use the bluestore objectstore (not currently supported)',
+    )
+    parser.add_argument(
+        '--filestore',
+        action='store_true', default=True,
+        help='Use the filestore objectstore (currently the only supported object store)',
+    )
+    parser.add_argument(
+        '--osd-id',
+        help='Reuse an existing OSD id',
+    )
+    parser.add_argument(
+        '--osd-fsid',
+        help='Reuse an existing OSD fsid',
+    )
+    # Do not parse args, so that consumers can do something before the args get
+    # parsed triggering argparse behavior
+    return parser
+
+
+create_parser = common_parser  # noqa
+prepare_parser = common_parser  # noqa
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py
new file mode 100644 (file)
index 0000000..1d3f6a3
--- /dev/null
@@ -0,0 +1,63 @@
+from __future__ import print_function
+from textwrap import dedent
+from ceph_volume.util import system
+from ceph_volume import decorators
+from .common import create_parser
+from .prepare import Prepare
+from .activate import Activate
+
+
+class Create(object):
+
+    help = 'Create a new OSD from  an LVM device'
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    @decorators.needs_root
+    def create(self, args):
+        if not args.osd_fsid:
+            args.osd_fsid = system.generate_uuid()
+        Prepare([]).prepare(args)
+        Activate([]).activate(args)
+
+    def main(self):
+        sub_command_help = dedent("""
+        Create an OSD by assigning an ID and FSID, registering them with the
+        cluster with an ID and FSID, formatting and mounting the volume, adding
+        all the metadata to the logical volumes using LVM tags, and starting
+        the OSD daemon.
+
+        Most basic Usage looks like (journal will be collocated from the same volume group):
+
+            ceph-volume lvm create --data {volume group name}
+
+
+        Example calls for supported scenarios:
+
+        Dedicated volume group for Journal(s)
+        -------------------------------------
+
+          Existing logical volume (lv) or device:
+
+              ceph-volume lvm create --data {logical volume} --journal /path/to/{lv}|{device}
+
+          Or:
+
+              ceph-volume lvm create --data {data volume group} --journal {journal volume group}
+
+        Collocated (same group) for data and journal
+        --------------------------------------------
+
+              ceph-volume lvm create --data {volume group}
+
+        """)
+        parser = create_parser(
+            prog='ceph-volume lvm create',
+            description=sub_command_help,
+        )
+        if len(self.argv) == 0:
+            print(sub_command_help)
+            return
+        args = parser.parse_args(self.argv)
+        self.create(args)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py
new file mode 100644 (file)
index 0000000..59e6932
--- /dev/null
@@ -0,0 +1,42 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal
+from . import activate
+from . import prepare
+from . import create
+from . import trigger
+
+
+class LVM(object):
+
+    help = 'Use LVM and LVM-based technologies like dmcache to deploy OSDs'
+
+    _help = dedent("""
+    Use LVM and LVM-based technologies like dmcache to deploy OSDs
+
+    {sub_help}
+    """)
+
+    mapper = {
+        'activate': activate.Activate,
+        'prepare': prepare.Prepare,
+        'create': create.Create,
+        'trigger': trigger.Trigger,
+    }
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    def print_help(self, sub_help):
+        return self._help.format(sub_help=sub_help)
+
+    def main(self):
+        terminal.dispatch(self.mapper, self.argv)
+        parser = argparse.ArgumentParser(
+            prog='ceph-volume lvm',
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            description=self.print_help(terminal.subhelp(self.mapper)),
+        )
+        parser.parse_args(self.argv)
+        if len(self.argv) <= 1:
+            return parser.print_help()
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
new file mode 100644 (file)
index 0000000..bd84aab
--- /dev/null
@@ -0,0 +1,202 @@
+from __future__ import print_function
+import json
+import os
+from textwrap import dedent
+from ceph_volume.util import prepare as prepare_utils
+from ceph_volume.util import system
+from ceph_volume import conf, decorators
+from . import api
+from .common import prepare_parser
+
+
+def canonical_device_path(device):
+    """
+    Ensure that a device is canonical (full path) and that it exists so that
+    it can be used throughout the prepare/activate process
+    """
+    # FIXME: this is obviously super naive
+    inferred = os.path.join('/dev', device)
+    if os.path.exists(os.path.abspath(device)):
+        return device
+    elif os.path.exists(inferred):
+        return inferred
+    raise RuntimeError('Selected device does not exist: %s' % device)
+
+
+def prepare_filestore(device, journal, secrets, id_=None, fsid=None):
+    """
+    :param device: The name of the volume group or lvm to work with
+    :param journal: similar to device but can also be a regular/plain disk
+    :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+    :param id_: The OSD id
+    :param fsid: The OSD fsid, also known as the OSD UUID
+    """
+    cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+    json_secrets = json.dumps(secrets)
+
+    # allow re-using an existing fsid, in case prepare failed
+    fsid = fsid or system.generate_uuid()
+    # allow re-using an id, in case a prepare failed
+    osd_id = id_ or prepare_utils.create_id(fsid, json_secrets)
+    # create the directory
+    prepare_utils.create_path(osd_id)
+    # format the device
+    prepare_utils.format_device(device)
+    # mount the data device
+    prepare_utils.mount_osd(device, osd_id)
+    # symlink the journal
+    prepare_utils.link_journal(journal, osd_id)
+    # get the latest monmap
+    prepare_utils.get_monmap(osd_id)
+    # prepare the osd filesystem
+    prepare_utils.osd_mkfs(osd_id, fsid)
+    # write the OSD keyring if it doesn't exist already
+    prepare_utils.write_keyring(osd_id, cephx_secret)
+
+
+def prepare_bluestore():
+    raise NotImplemented()
+
+
+class Prepare(object):
+
+    help = 'Format an LVM device and associate it with an OSD'
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    @decorators.needs_root
+    def prepare(self, args):
+        # FIXME we don't allow re-using a keyring, we always generate one for the
+        # OSD, this needs to be fixed. This could either be a file (!) or a string
+        # (!!) or some flags that we would need to compound into a dict so that we
+        # can convert to JSON (!!!)
+        secrets = {'cephx_secret': prepare_utils.create_key()}
+
+        cluster_fsid = conf.ceph.get('global', 'fsid')
+        fsid = args.osd_fsid or system.generate_uuid()
+        #osd_id = args.osd_id or prepare_utils.create_id(fsid)
+        # allow re-using an id, in case a prepare failed
+        osd_id = args.osd_id or prepare_utils.create_id(fsid, json.dumps(secrets))
+        journal_name = "journal_%s" % fsid
+        osd_name = "osd_%s" % fsid
+
+        if args.filestore:
+            data_vg = api.get_vg(vg_name=args.data)
+            data_lv = api.get_lv(lv_name=args.data)
+            journal_vg = api.get_vg(vg_name=args.journal)
+            journal_lv = api.get_lv(lv_name=args.journal)
+            journal_device = None
+            # it is possible to pass a device as a journal that is not
+            # an actual logical volume (or group)
+            if not args.journal:
+                if data_lv:
+                    raise RuntimeError('--journal is required when not using a vg for OSD data')
+                # collocated: carve out the journal from the data vg
+                if data_vg:
+                    journal_lv = api.create_lv(
+                        name=journal_name,
+                        group=data_vg.name,
+                        size=args.journal_size,
+                        osd_fsid=fsid,
+                        osd_id=osd_id,
+                        type='journal',
+                        cluster_fsid=cluster_fsid
+                    )
+
+            # if a volume group was defined for the journal create that first
+            if journal_vg:
+                journal_lv = api.create_lv(
+                    name=journal_name,
+                    group=args.journal,
+                    size=args.journal_size,
+                    osd_fsid=fsid,
+                    osd_id=osd_id,
+                    type='journal',
+                    cluster_fsid=cluster_fsid
+                )
+            if journal_lv:
+                journal_device = journal_lv.lv_path
+            # The journal is probably a device, not in LVM
+            elif args.journal:
+                journal_device = canonical_device_path(args.journal)
+            # At this point we must have a journal_lv or a journal device
+            # now create the osd from the group if that was found
+            if data_vg:
+                # XXX make sure that a there aren't more OSDs than physical
+                # devices from this volume group
+                data_lv = api.create_lv(
+                    name=osd_name,
+                    group=args.data,
+                    osd_fsid=fsid,
+                    osd_id=osd_id,
+                    type='data',
+                    journal_device=journal_device,
+                    cluster_fsid=cluster_fsid
+                )
+            # we must have either an existing data_lv or a newly created, so lets make
+            # sure that the tags are correct
+            if not data_lv:
+                raise RuntimeError('no data logical volume found with: %s' % args.data)
+            data_lv.set_tags({
+                'ceph.type': 'data',
+                'ceph.osd_fsid': fsid,
+                'ceph.osd_id': osd_id,
+                'ceph.cluster_fsid': cluster_fsid,
+                'ceph.journal_device': journal_device,
+                'ceph.data_device': data_lv.lv_path,
+            })
+
+            prepare_filestore(
+                data_lv.lv_path,
+                journal_device,
+                secrets,
+                id_=osd_id,
+                fsid=fsid,
+            )
+        elif args.bluestore:
+            prepare_bluestore(args)
+
+    def main(self):
+        sub_command_help = dedent("""
+        Prepare an OSD by assigning an ID and FSID, registering them with the
+        cluster with an ID and FSID, formatting and mounting the volume, and
+        finally by adding all the metadata to the logical volumes using LVM
+        tags, so that it can later be discovered.
+
+        Once the OSD is ready, an ad-hoc systemd unit will be enabled so that
+        it can later get activated and the OSD daemon can get started.
+
+        Most basic Usage looks like (journal will be collocated from the same volume group):
+
+            ceph-volume lvm prepare --data {volume group name}
+
+
+        Example calls for supported scenarios:
+
+        Dedicated volume group for Journal(s)
+        -------------------------------------
+
+          Existing logical volume (lv) or device:
+
+              ceph-volume lvm prepare --data {logical volume} --journal /path/to/{lv}|{device}
+
+          Or:
+
+              ceph-volume lvm prepare --data {data volume group} --journal {journal volume group}
+
+        Collocated (same group) for data and journal
+        --------------------------------------------
+
+              ceph-volume lvm prepare --data {volume group}
+
+        """)
+        parser = prepare_parser(
+            prog='ceph-volume lvm prepare',
+            description=sub_command_help,
+        )
+        if len(self.argv) == 0:
+            print(sub_command_help)
+            return
+        args = parser.parse_args(self.argv)
+        self.prepare(args)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/trigger.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/trigger.py
new file mode 100644 (file)
index 0000000..7486bfa
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import print_function
+import argparse
+from textwrap import dedent
+from ceph_volume.exceptions import SuffixParsingError
+from ceph_volume import decorators
+from .activate import Activate
+
+
+def parse_osd_id(string):
+    osd_id = string.split('-', 1)[0]
+    if not osd_id:
+        raise SuffixParsingError('OSD id', string)
+    if osd_id.isdigit():
+        return osd_id
+    raise SuffixParsingError('OSD id', string)
+
+
+def parse_osd_uuid(string):
+    osd_id = '%s-' % parse_osd_id(string)
+    # remove the id first
+    osd_uuid = string.split(osd_id)[-1]
+    if not osd_uuid:
+        raise SuffixParsingError('OSD uuid', string)
+    return osd_uuid
+
+
+class Trigger(object):
+
+    help = 'systemd helper to activate an OSD'
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    @decorators.needs_root
+    def main(self):
+        sub_command_help = dedent("""
+        ** DO NOT USE DIRECTLY **
+        This tool is meant to help the systemd unit that knows about OSDs.
+
+        Proxy OSD activation to ``ceph-volume lvm activate`` by parsing the
+        input from systemd, detecting the UUID and ID associated with an OSD::
+
+            ceph-volume lvm trigger {SYSTEMD-DATA}
+
+        The systemd "data" is expected to be in the format of::
+
+            {OSD ID}-{OSD UUID}
+
+        The lvs associated with the OSD need to have been prepared previously,
+        so that all needed tags and metadata exist.
+        """)
+        parser = argparse.ArgumentParser(
+            prog='ceph-volume lvm trigger',
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            description=sub_command_help,
+        )
+
+        parser.add_argument(
+            'systemd_data',
+            metavar='SYSTEMD_DATA',
+            nargs='?',
+            help='Data from a systemd unit containing ID and UUID of the OSD, like asdf-lkjh-0'
+        )
+        if len(self.argv) == 0:
+            print(sub_command_help)
+            return
+        args = parser.parse_args(self.argv)
+        osd_id = parse_osd_id(args.systemd_data)
+        osd_uuid = parse_osd_uuid(args.systemd_data)
+        Activate([osd_id, osd_uuid]).main()
diff --git a/ceph/src/ceph-volume/ceph_volume/exceptions.py b/ceph/src/ceph-volume/ceph_volume/exceptions.py
new file mode 100644 (file)
index 0000000..75c6b6c
--- /dev/null
@@ -0,0 +1,71 @@
+import os
+
+
+class ConfigurationError(Exception):
+
+    def __init__(self, cluster_name='ceph', path='/etc/ceph', abspath=None):
+        self.cluster_name = cluster_name
+        self.path = path
+        self.abspath = abspath or "%s.conf" % os.path.join(self.path, self.cluster_name)
+
+    def __str__(self):
+        return 'Unable to load expected Ceph config at: %s' % self.abspath
+
+
+class ConfigurationSectionError(Exception):
+
+    def __init__(self, section):
+        self.section = section
+
+    def __str__(self):
+        return 'Unable to find expected configuration section: "%s"' % self.section
+
+
+class ConfigurationKeyError(Exception):
+
+    def __init__(self, section, key):
+        self.section = section
+        self.key = key
+
+    def __str__(self):
+        return 'Unable to find expected configuration key: "%s" from section "%s"' % (
+            self.key,
+            self.section
+        )
+
+
+class SuffixParsingError(Exception):
+
+    def __init__(self, suffix, part=None):
+        self.suffix = suffix
+        self.part = part
+
+    def __str__(self):
+        return 'Unable to parse the %s from systemd suffix: %s' % (self.part, self.suffix)
+
+
+class SuperUserError(Exception):
+
+    def __str__(self):
+        return 'This command needs to be executed with sudo or as root'
+
+
+class MultipleLVsError(Exception):
+
+    def __init__(self, lv_name, lv_path):
+        self.lv_name = lv_name
+        self.lv_path = lv_path
+
+    def __str__(self):
+        msg = "Got more than 1 result looking for %s with path: %s" % (self.lv_name, self.lv_path)
+        return msg
+
+
+class MultipleVGsError(Exception):
+
+    def __init__(self, vg_name):
+        self.vg_name = vg_name
+
+    def __str__(self):
+        msg = "Got more than 1 result looking for volume group: %s" % self.vg_name
+        return msg
diff --git a/ceph/src/ceph-volume/ceph_volume/log.py b/ceph/src/ceph-volume/ceph_volume/log.py
new file mode 100644 (file)
index 0000000..890b6da
--- /dev/null
@@ -0,0 +1,33 @@
+import logging
+import os
+from ceph_volume import terminal
+from ceph_volume import conf
+
+BASE_FORMAT = "[%(name)s][%(levelname)-6s] %(message)s"
+FILE_FORMAT = "[%(asctime)s]" + BASE_FORMAT
+
+
+def setup(name='ceph-volume.log', log_path=None):
+    log_path = log_path or conf.log_path
+    # if a non-root user calls help or other no-sudo-required command the
+    # logger will fail to write to /var/lib/ceph/ so this /tmp/ path is used as
+    # a fallback
+    tmp_log_file = os.path.join('/tmp/', name)
+    root_logger = logging.getLogger()
+    # The default path is where all ceph log files are, and will get rotated by
+    # Ceph's logrotate rules.
+
+    root_logger.setLevel(logging.DEBUG)
+
+    try:
+        fh = logging.FileHandler(log_path)
+    except (OSError, IOError) as err:
+        terminal.warning("Falling back to /tmp/ for logging. Can't use %s" % log_path)
+        terminal.warning(str(err))
+        conf.log_path = tmp_log_file
+        fh = logging.FileHandler(tmp_log_file)
+
+    fh.setLevel(logging.DEBUG)
+    fh.setFormatter(logging.Formatter(FILE_FORMAT))
+
+    root_logger.addHandler(fh)
diff --git a/ceph/src/ceph-volume/ceph_volume/main.py b/ceph/src/ceph-volume/ceph_volume/main.py
new file mode 100644 (file)
index 0000000..d4bee15
--- /dev/null
@@ -0,0 +1,174 @@
+from __future__ import print_function
+import argparse
+import os
+import pkg_resources
+import sys
+import logging
+
+import ceph_volume
+from ceph_volume.decorators import catches
+from ceph_volume import log, devices, configuration, conf, exceptions, terminal
+
+
+class Volume(object):
+    _help = """
+ceph-volume: Deploy Ceph OSDs using different device technologies like lvm or
+physical disks.
+
+Version: {version}
+
+Log Path: {log_path}
+Ceph Conf: {ceph_path}
+
+{sub_help}
+{plugins}
+{environ_vars}
+{warning}
+    """
+
+    def __init__(self, argv=None, parse=True):
+        self.mapper = {'lvm': devices.lvm.LVM}
+        self.plugin_help = "No plugins found/loaded"
+        if argv is None:
+            self.argv = sys.argv
+        else:
+            self.argv = argv
+        if parse:
+            self.main(self.argv)
+
+    def help(self, warning=False):
+        warning = 'See "ceph-volume --help" for full list of options.' if warning else ''
+        return self._help.format(
+            warning=warning,
+            version=ceph_volume.__version__,
+            log_path=conf.log_path,
+            ceph_path=self.stat_ceph_conf(),
+            plugins=self.plugin_help,
+            sub_help=terminal.subhelp(self.mapper),
+            environ_vars=self.get_environ_vars()
+        )
+
+    def get_environ_vars(self):
+        environ_vars = []
+        for key, value in os.environ.items():
+            if key.startswith('CEPH_'):
+                environ_vars.append("%s=%s" % (key, value))
+        if not environ_vars:
+            return ''
+        else:
+            environ_vars.insert(0, '\nEnviron Variables:')
+            return '\n'.join(environ_vars)
+
+    def enable_plugins(self):
+        """
+        Load all plugins available, add them to the mapper and extend the help
+        string with the information from each one
+        """
+        plugins = _load_library_extensions()
+        for plugin in plugins:
+            self.mapper[plugin._ceph_volume_name_] = plugin
+        self.plugin_help = '\n'.join(['%-19s %s\n' % (
+            plugin.name, getattr(plugin, 'help_menu', ''))
+            for plugin in plugins])
+        if self.plugin_help:
+            self.plugin_help = '\nPlugins:\n' + self.plugin_help
+
+    def load_ceph_conf_path(self, cluster_name='ceph'):
+        abspath = '/etc/ceph/%s.conf' % cluster_name
+        conf.path = os.getenv('CEPH_CONF', abspath)
+        conf.cluster = cluster_name
+
+    def load_log_path(self):
+        conf.log_path = os.getenv('CEPH_VOLUME_LOG_PATH', '/var/log/ceph')
+
+    def stat_ceph_conf(self):
+        try:
+            configuration.load(conf.path)
+            return terminal.green(conf.path)
+        except exceptions.ConfigurationError as error:
+            return terminal.red(error)
+
+    def _get_split_args(self):
+        subcommands = self.mapper.keys()
+        slice_on_index = len(self.argv) + 1
+        pruned_args = self.argv[1:]
+        for count, arg in enumerate(pruned_args):
+            if arg in subcommands:
+                slice_on_index = count
+                break
+        return pruned_args[:slice_on_index], pruned_args[slice_on_index:]
+
+    @catches()
+    def main(self, argv):
+        # these need to be available for the help, which gets parsed super
+        # early
+        self.load_ceph_conf_path()
+        self.load_log_path()
+        self.enable_plugins()
+        main_args, subcommand_args = self._get_split_args()
+        # no flags where passed in, return the help menu instead of waiting for
+        # argparse which will end up complaning that there are no args
+        if len(argv) <= 1:
+            print(self.help(warning=True))
+            return
+        parser = argparse.ArgumentParser(
+            prog='ceph-volume',
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            description=self.help(),
+        )
+        parser.add_argument(
+            '--cluster',
+            default='ceph',
+            help='Cluster name (defaults to "ceph")',
+        )
+        parser.add_argument(
+            '--log-level',
+            default='debug',
+            help='Change the file log level (defaults to debug)',
+        )
+        parser.add_argument(
+            '--log-path',
+            default='/var/log/ceph/',
+            help='Change the log path (defaults to /var/log/ceph)',
+        )
+        args = parser.parse_args(main_args)
+        conf.log_path = args.log_path
+        if os.path.isdir(conf.log_path):
+            conf.log_path = os.path.join(args.log_path, 'ceph-volume.log')
+        log.setup()
+        # set all variables from args and load everything needed according to
+        # them
+        self.load_ceph_conf_path(cluster_name=args.cluster)
+        conf.ceph = configuration.load(conf.path)
+        # dispatch to sub-commands
+        terminal.dispatch(self.mapper, subcommand_args)
+
+
+def _load_library_extensions():
+    """
+    Locate all setuptools entry points by the name 'ceph_volume_handlers'
+    and initialize them.
+    Any third-party library may register an entry point by adding the
+    following to their setup.py::
+
+        entry_points = {
+            'ceph_volume_handlers': [
+                'plugin_name = mylib.mymodule:Handler_Class',
+            ],
+        },
+
+    `plugin_name` will be used to load it as a sub command.
+    """
+    logger = logging.getLogger('ceph_volume.plugins')
+    group = 'ceph_volume_handlers'
+    entry_points = pkg_resources.iter_entry_points(group=group)
+    plugins = []
+    for ep in entry_points:
+        try:
+            logger.debug('loading %s' % ep.name)
+            plugin = ep.load()
+            plugin._ceph_volume_name_ = ep.name
+            plugins.append(plugin)
+        except Exception as error:
+            logger.exception("Error initializing plugin %s: %s" % (ep, error))
+    return plugins
diff --git a/ceph/src/ceph-volume/ceph_volume/process.py b/ceph/src/ceph-volume/ceph_volume/process.py
new file mode 100644 (file)
index 0000000..bc5047a
--- /dev/null
@@ -0,0 +1,156 @@
+from fcntl import fcntl, F_GETFL, F_SETFL
+from os import O_NONBLOCK, read
+import subprocess
+from select import select
+from ceph_volume import terminal
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def log_output(descriptor, message, terminal_logging):
+    """
+    log output to both the logger and the terminal if terminal_logging is
+    enabled
+    """
+    if not message:
+        return
+    message = message.strip()
+    line = '%s %s' % (descriptor, message)
+    if terminal_logging:
+        getattr(terminal, descriptor)(message)
+    logger.info(line)
+
+
+def log_descriptors(reads, process, terminal_logging):
+    """
+    Helper to send output to the terminal while polling the subprocess
+    """
+    # these fcntl are set to O_NONBLOCK for the filedescriptors coming from
+    # subprocess so that the logging does not block. Without these a prompt in
+    # a subprocess output would hang and nothing would get printed. Note how
+    # these are just set when logging subprocess, not globally.
+    stdout_flags = fcntl(process.stdout, F_GETFL) # get current p.stdout flags
+    stderr_flags = fcntl(process.stderr, F_GETFL) # get current p.stderr flags
+    fcntl(process.stdout, F_SETFL, stdout_flags | O_NONBLOCK)
+    fcntl(process.stderr, F_SETFL, stderr_flags | O_NONBLOCK)
+    descriptor_names = {
+        process.stdout.fileno(): 'stdout',
+        process.stderr.fileno(): 'stderr'
+    }
+    for descriptor in reads:
+        descriptor_name = descriptor_names[descriptor]
+        try:
+            log_output(descriptor_name, read(descriptor, 1024), terminal_logging)
+        except (IOError, OSError):
+            # nothing else to log
+            pass
+
+
+def run(command, **kw):
+    """
+    A real-time-logging implementation of a remote subprocess.Popen call where
+    a command is just executed on the remote end and no other handling is done.
+
+    :param command: The command to pass in to the remote subprocess.Popen as a list
+    :param stop_on_error: If a nonzero exit status is return, it raises a ``RuntimeError``
+    """
+    stop_on_error = kw.pop('stop_on_error', True)
+    command_msg = "Running command: %s" % ' '.join(command)
+    stdin = kw.pop('stdin', None)
+    logger.info(command_msg)
+    terminal.write(command_msg)
+    terminal_logging = kw.pop('terminal_logging', True)
+
+    process = subprocess.Popen(
+        command,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        close_fds=True,
+        **kw
+    )
+
+    if stdin:
+        process.communicate(stdin)
+    while True:
+        reads, _, _ = select(
+            [process.stdout.fileno(), process.stderr.fileno()],
+            [], []
+        )
+        log_descriptors(reads, process, terminal_logging)
+
+        if process.poll() is not None:
+            # ensure we do not have anything pending in stdout or stderr
+            log_descriptors(reads, process, terminal_logging)
+
+            break
+
+    returncode = process.wait()
+    if returncode != 0:
+        msg = "command returned non-zero exit status: %s" % returncode
+        if stop_on_error:
+            raise RuntimeError(msg)
+        else:
+            if terminal_logging:
+                terminal.warning(msg)
+            logger.warning(msg)
+
+
+def call(command, **kw):
+    """
+    Similar to ``subprocess.Popen`` with the following changes:
+
+    * returns stdout, stderr, and exit code (vs. just the exit code)
+    * logs the full contents of stderr and stdout (separately) to the file log
+
+    By default, no terminal output is given, not even the command that is going
+    to run.
+
+    Useful when system calls are needed to act on output, and that same output
+    shouldn't get displayed on the terminal.
+
+    :param terminal_verbose: Log command output to terminal, defaults to False, and
+                             it is forcefully set to True if a return code is non-zero
+    """
+    terminal_verbose = kw.pop('terminal_verbose', False)
+    command_msg = "Running command: %s" % ' '.join(command)
+    stdin = kw.pop('stdin', None)
+    logger.info(command_msg)
+    terminal.write(command_msg)
+
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        stdin=subprocess.PIPE,
+        close_fds=True,
+        **kw
+    )
+    if stdin:
+        stdout_stream, stderr_stream = process.communicate(stdin)
+    else:
+        stdout_stream = process.stdout.read()
+        stderr_stream = process.stderr.read()
+    returncode = process.wait()
+    if not isinstance(stdout_stream, str):
+        stdout_stream = stdout_stream.decode('utf-8')
+    if not isinstance(stderr_stream, str):
+        stderr_stream = stderr_stream.decode('utf-8')
+    stdout = stdout_stream.splitlines()
+    stderr = stderr_stream.splitlines()
+
+    if returncode != 0:
+        # set to true so that we can log the stderr/stdout that callers would
+        # do anyway
+        terminal_verbose = True
+
+    # the following can get a messed up order in the log if the system call
+    # returns output with both stderr and stdout intermingled. This separates
+    # that.
+    for line in stdout:
+        log_output('stdout', line, terminal_verbose)
+    for line in stderr:
+        log_output('stderr', line, terminal_verbose)
+    return stdout, stderr, returncode
diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/__init__.py b/ceph/src/ceph-volume/ceph_volume/systemd/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/main.py b/ceph/src/ceph-volume/ceph_volume/systemd/main.py
new file mode 100644 (file)
index 0000000..69c0f38
--- /dev/null
@@ -0,0 +1,109 @@
+"""
+This file is used only by systemd units that are passing their instance suffix
+as arguments to this script so that it can parse the suffix into arguments that
+``ceph-volume <sub command>`` can consume
+"""
+import os
+import sys
+import time
+import logging
+from ceph_volume import log, process
+from ceph_volume.exceptions import SuffixParsingError
+
+
+def parse_subcommand(string):
+    subcommand = string.split('-', 1)[0]
+    if not subcommand:
+        raise SuffixParsingError('subcommand', string)
+    return subcommand
+
+
+def parse_extra_data(string):
+    # get the subcommand to split on that
+    sub_command = parse_subcommand(string)
+
+    # the split will leave data with a dash, so remove that
+    data = string.split(sub_command)[-1]
+    if not data:
+        raise SuffixParsingError('data', string)
+    return data.lstrip('-')
+
+
+def parse_osd_id(string):
+    osd_id = string.split('-', 1)[0]
+    if not osd_id:
+        raise SuffixParsingError('OSD id', string)
+    if osd_id.isdigit():
+        return osd_id
+    raise SuffixParsingError('OSD id', string)
+
+
+def parse_osd_uuid(string):
+    osd_id = '%s-' % parse_osd_id(string)
+    osd_subcommand = '-%s' % parse_subcommand(string)
+    # remove the id first
+    trimmed_suffix = string.split(osd_id)[-1]
+    # now remove the sub command
+    osd_uuid = trimmed_suffix.split(osd_subcommand)[0]
+    if not osd_uuid:
+        raise SuffixParsingError('OSD uuid', string)
+    return osd_uuid
+
+
+def main(args=None):
+    """
+    Main entry point for the ``ceph-volume-systemd`` executable. ``args`` are
+    optional for easier testing of arguments.
+
+    Expected input is similar to::
+
+        ['/path/to/ceph-volume-systemd', '<osd id>-<osd uuid>-<device type>']
+        ['/path/to/ceph-volume-systemd', '<type>-<extra metadata>']
+
+    For example::
+
+        [
+            '/usr/bin/ceph-volume-systemd',
+            'lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41'
+        ]
+
+    The first part of the argument is the only interesting bit, which contains
+    the metadata needed to proxy the call to ``ceph-volume`` itself.
+
+    Reusing the example, the proxy call to ``ceph-volume`` would look like::
+
+        ceph-volume lvm trigger 0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
+
+    That means that ``lvm`` is used as the subcommand and it is **expected**
+    that a ``trigger`` sub-commmand will be present to make sense of the extra
+    piece of the string.
+
+    """
+    log.setup(name='ceph-volume-systemd.log', log_path='/var/log/ceph/ceph-volume-systemd.log')
+    logger = logging.getLogger('systemd')
+
+    args = args if args is not None else sys.argv
+    try:
+        suffix = args[-1]
+    except IndexError:
+        raise RuntimeError('no arguments supplied')
+    sub_command = parse_subcommand(suffix)
+    extra_data = parse_extra_data(suffix)
+    logger.info('raw systemd input received: %s', suffix)
+    logger.info('parsed sub-command: %s, extra data: %s', sub_command, extra_data)
+    command = ['ceph-volume', sub_command, 'trigger', extra_data]
+
+    tries = os.environ.get('CEPH_VOLUME_SYSTEMD_TRIES', 30)
+    interval = os.environ.get('CEPH_VOLUME_SYSTEMD_INTERVAL', 5)
+    while tries > 0:
+        try:
+            # don't log any output to the terminal, just rely on stderr/stdout
+            # going to logging
+            process.run(command, terminal_logging=False)
+            logger.info('successfully trggered activation for: %s', extra_data)
+            break
+        except RuntimeError as error:
+            logger.warning(error)
+            logger.warning('failed activating OSD, retries left: %s', tries)
+            tries -= 1
+            time.sleep(interval)
diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py b/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py
new file mode 100644 (file)
index 0000000..9bb4d7d
--- /dev/null
@@ -0,0 +1,48 @@
+"""
+Utilities to control systemd units
+"""
+from ceph_volume import process
+
+
+def start(unit):
+    process.run(['sudo', 'systemctl', 'start', unit])
+
+
+def stop(unit):
+    process.run(['sudo', 'systemctl', 'stop', unit])
+
+
+def enable(unit):
+    process.run(['sudo', 'systemctl', 'enable', unit])
+
+
+def disable(unit):
+    process.run(['sudo', 'systemctl', 'disable', unit])
+
+
+def start_osd(id_):
+    return start(osd_unit % id_)
+
+
+def stop_osd(id_):
+    return stop(osd_unit % id_)
+
+
+def enable_osd(id_):
+    return enable(osd_unit % id_)
+
+
+def disable_osd(id_):
+    return disable(osd_unit % id_)
+
+
+def enable_volume(id_, fsid, device_type='lvm'):
+    return enable(volume_unit % (device_type, id_, fsid))
+
+
+#
+# templates
+#
+
+osd_unit = "ceph-osd@%s"
+volume_unit = "ceph-volume@%s-%s-%s"
diff --git a/ceph/src/ceph-volume/ceph_volume/terminal.py b/ceph/src/ceph-volume/ceph_volume/terminal.py
new file mode 100644 (file)
index 0000000..55ce2d4
--- /dev/null
@@ -0,0 +1,151 @@
+import sys
+
+
+class colorize(str):
+    """
+    Pretty simple to use::
+
+        colorize.make('foo').bold
+        colorize.make('foo').green
+        colorize.make('foo').yellow
+        colorize.make('foo').red
+        colorize.make('foo').blue
+
+    Otherwise you could go the long way (for example if you are
+    testing this class)::
+
+        string = colorize('foo')
+        string._set_attributes()
+        string.red
+
+    """
+
+    def __init__(self, string):
+        self.stdout = sys.__stdout__
+        self.appends = ''
+        self.prepends = ''
+        self.isatty = self.stdout.isatty()
+
+    def _set_attributes(self):
+        """
+        Sets the attributes here because the str class does not
+        allow to pass in anything other than a string to the constructor
+        so we can't really mess with the other attributes.
+        """
+        for k, v in self.__colors__.items():
+            setattr(self, k, self.make_color(v))
+
+    def make_color(self, color):
+        if not self.isatty:
+            return self
+        return color + self + '\033[0m' + self.appends
+
+    @property
+    def __colors__(self):
+        return dict(
+            blue='\033[34m',
+            green='\033[92m',
+            yellow='\033[33m',
+            red='\033[91m',
+            bold='\033[1m',
+            ends='\033[0m'
+        )
+
+    @classmethod
+    def make(cls, string):
+        """
+        A helper method to return itself and workaround the fact that
+        the str object doesn't allow extra arguments passed in to the
+        constructor
+        """
+        obj = cls(string)
+        obj._set_attributes()
+        return obj
+
+#
+# Common string manipulations
+#
+yellow = lambda x: colorize.make(x).yellow  # noqa
+blue = lambda x: colorize.make(x).blue  # noqa
+green = lambda x: colorize.make(x).green  # noqa
+red = lambda x: colorize.make(x).red  # noqa
+bold = lambda x: colorize.make(x).bold  # noqa
+red_arrow = red('--> ')
+blue_arrow = blue('--> ')
+green_arrow = green('--> ')
+yellow_arrow = yellow('--> ')
+
+
+class _Write(object):
+
+    def __init__(self, _writer=None, prefix='', suffix='', flush=False):
+        self._writer = _writer or sys.stdout
+        self.suffix = suffix
+        self.prefix = prefix
+        self.flush = flush
+
+    def bold(self, string):
+        self.write(bold(string))
+
+    def raw(self, string):
+        if not string.endswith('\n'):
+            string = '%s\n' % string
+        self.write(string)
+
+    def write(self, line):
+        self._writer.write(self.prefix + line + self.suffix)
+        if self.flush:
+            self._writer.flush()
+
+
+def stdout(msg):
+    return _Write(prefix=blue(' stdout: ')).raw(msg)
+
+
+def stderr(msg):
+    return _Write(prefix=yellow(' stderr: ')).raw(msg)
+
+
+def write(msg):
+    return _Write().raw(msg)
+
+
+def error(msg):
+    return _Write(prefix=red_arrow).raw(msg)
+
+
+def warning(msg):
+    return _Write(prefix=yellow_arrow).raw(msg)
+
+
+def success(msg):
+    return _Write(prefix=green_arrow).raw(msg)
+
+
+def dispatch(mapper, argv=None):
+    argv = argv or sys.argv
+    for count, arg in enumerate(argv, 1):
+        if arg in mapper.keys():
+            instance = mapper.get(arg)(argv[count:])
+            if hasattr(instance, 'main'):
+                instance.main()
+                raise SystemExit(0)
+
+
+def subhelp(mapper):
+    """
+    Look at every value of every key in the mapper and will output any
+    ``class.help`` possible to return it as a string that will be sent to
+    stdout.
+    """
+    help_text_lines = []
+    for key, value in mapper.items():
+        try:
+            help_text = value.help
+        except AttributeError:
+            continue
+        help_text_lines.append("%-24s %s" % (key, help_text))
+
+    if help_text_lines:
+        return "Available subcommands:\n\n%s" % '\n'.join(help_text_lines)
+    return ''
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/__init__.py b/ceph/src/ceph-volume/ceph_volume/tests/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py
new file mode 100644 (file)
index 0000000..869979e
--- /dev/null
@@ -0,0 +1,16 @@
+import pytest
+
+class Capture(object):
+
+    def __init__(self, *a, **kw):
+        self.a = a
+        self.kw = kw
+        self.calls = []
+
+    def __call__(self, *a, **kw):
+        self.calls.append({'args': a, 'kwargs': kw})
+
+
+@pytest.fixture
+def capture():
+    return Capture()
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/__init__.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/__init__.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py
new file mode 100644 (file)
index 0000000..c849fac
--- /dev/null
@@ -0,0 +1,240 @@
+import pytest
+from ceph_volume import process, exceptions
+from ceph_volume.devices.lvm import api
+
+
+class TestParseTags(object):
+
+    def test_no_tags_means_empty_dict(self):
+        result = api.parse_tags('')
+        assert result == {}
+
+    def test_single_tag_gets_parsed(self):
+        result = api.parse_tags('ceph.osd_something=1')
+        assert result == {'ceph.osd_something': '1'}
+
+    def test_multiple_csv_expands_in_dict(self):
+        result = api.parse_tags('ceph.osd_something=1,ceph.foo=2,ceph.fsid=0000')
+        # assert them piecemeal to avoid the un-ordered dict nature
+        assert result['ceph.osd_something'] == '1'
+        assert result['ceph.foo'] == '2'
+        assert result['ceph.fsid'] == '0000'
+
+
+class TestGetAPIVgs(object):
+
+    def test_report_is_emtpy(self, monkeypatch):
+        monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0))
+        assert api.get_api_vgs() == []
+
+    def test_report_has_stuff(self, monkeypatch):
+        report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}]}]}'
+        monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
+        assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}]
+
+    def test_report_has_multiple_items(self, monkeypatch):
+        report = '{"report":[{"vg":[{"vg_name":"VolGroup00"},{"vg_name":"ceph_vg"}]}]}'
+        monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
+        assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}, {'vg_name': 'ceph_vg'}]
+
+    def test_does_not_get_poluted_with_non_vg_items(self, monkeypatch):
+        report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}],"lv":[{"lv":"1"}]}]}'
+        monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
+        assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}]
+
+
+class TestGetAPILvs(object):
+
+    def test_report_is_emtpy(self, monkeypatch):
+        monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0))
+        assert api.get_api_lvs() == []
+
+    def test_report_has_stuff(self, monkeypatch):
+        report = '{"report":[{"lv":[{"lv_name":"VolGroup00"}]}]}'
+        monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
+        assert api.get_api_lvs() == [{'lv_name': 'VolGroup00'}]
+
+    def test_report_has_multiple_items(self, monkeypatch):
+        report = '{"report":[{"lv":[{"lv_name":"VolName"},{"lv_name":"ceph_lv"}]}]}'
+        monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
+        assert api.get_api_lvs() == [{'lv_name': 'VolName'}, {'lv_name': 'ceph_lv'}]
+
+    def test_does_not_get_poluted_with_non_lv_items(self, monkeypatch):
+        report = '{"report":[{"lv":[{"lv_name":"VolName"}],"vg":[{"vg":"1"}]}]}'
+        monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
+        assert api.get_api_lvs() == [{'lv_name': 'VolName'}]
+
+
+@pytest.fixture
+def volumes(monkeypatch):
+    monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0))
+    volumes = api.Volumes()
+    volumes._purge()
+    return volumes
+
+
+@pytest.fixture
+def volume_groups(monkeypatch):
+    monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0))
+    vgs = api.VolumeGroups()
+    vgs._purge()
+    return vgs
+
+
+class TestGetLV(object):
+
+    def test_nothing_is_passed_in(self):
+        # so we return a None
+        assert api.get_lv() is None
+
+    def test_single_lv_is_matched(self, volumes, monkeypatch):
+        FooVolume = api.Volume(lv_name='foo', lv_path='/dev/vg/foo', lv_tags="ceph.type=data")
+        volumes.append(FooVolume)
+        monkeypatch.setattr(api, 'Volumes', lambda: volumes)
+        assert api.get_lv(lv_name='foo') == FooVolume
+
+
+class TestGetVG(object):
+
+    def test_nothing_is_passed_in(self):
+        # so we return a None
+        assert api.get_vg() is None
+
+    def test_single_vg_is_matched(self, volume_groups, monkeypatch):
+        FooVG = api.VolumeGroup(vg_name='foo')
+        volume_groups.append(FooVG)
+        monkeypatch.setattr(api, 'VolumeGroups', lambda: volume_groups)
+        assert api.get_vg(vg_name='foo') == FooVG
+
+
+class TestVolumes(object):
+
+    def test_volume_get_has_no_volumes(self, volumes):
+        assert volumes.get() is None
+
+    def test_volume_get_filtered_has_no_volumes(self, volumes):
+        assert volumes.get(lv_name='ceph') is None
+
+    def test_volume_has_multiple_matches(self, volumes):
+        volume1 = volume2 = api.Volume(lv_name='foo', lv_path='/dev/vg/lv', lv_tags='')
+        volumes.append(volume1)
+        volumes.append(volume2)
+        with pytest.raises(exceptions.MultipleLVsError):
+            volumes.get(lv_name='foo')
+
+    def test_find_the_correct_one(self, volumes):
+        volume1 = api.Volume(lv_name='volume1', lv_path='/dev/vg/lv', lv_tags='')
+        volume2 = api.Volume(lv_name='volume2', lv_path='/dev/vg/lv', lv_tags='')
+        volumes.append(volume1)
+        volumes.append(volume2)
+        assert volumes.get(lv_name='volume1') == volume1
+
+    def test_filter_by_tag(self, volumes):
+        lv_tags = "ceph.type=data,ceph.fsid=000-aaa"
+        osd = api.Volume(lv_name='volume1', lv_path='/dev/vg/lv', lv_tags=lv_tags)
+        journal = api.Volume(lv_name='volume2', lv_path='/dev/vg/lv', lv_tags='ceph.type=journal')
+        volumes.append(osd)
+        volumes.append(journal)
+        volumes.filter(lv_tags={'ceph.type': 'data'})
+        assert len(volumes) == 1
+        assert volumes[0].lv_name == 'volume1'
+
+    def test_filter_by_vg_name(self, volumes):
+        lv_tags = "ceph.type=data,ceph.fsid=000-aaa"
+        osd = api.Volume(lv_name='volume1', vg_name='ceph_vg', lv_tags=lv_tags)
+        journal = api.Volume(lv_name='volume2', vg_name='system_vg', lv_tags='ceph.type=journal')
+        volumes.append(osd)
+        volumes.append(journal)
+        volumes.filter(vg_name='ceph_vg')
+        assert len(volumes) == 1
+        assert volumes[0].lv_name == 'volume1'
+
+    def test_filter_by_lv_path(self, volumes):
+        osd = api.Volume(lv_name='volume1', lv_path='/dev/volume1', lv_tags='')
+        journal = api.Volume(lv_name='volume2', lv_path='/dev/volume2', lv_tags='')
+        volumes.append(osd)
+        volumes.append(journal)
+        volumes.filter(lv_path='/dev/volume1')
+        assert len(volumes) == 1
+        assert volumes[0].lv_name == 'volume1'
+
+    def test_filter_requires_params(self, volumes):
+        with pytest.raises(TypeError):
+            volumes.filter()
+
+
+class TestVolumeGroups(object):
+
+    def test_volume_get_has_no_volume_groups(self, volume_groups):
+        assert volume_groups.get() is None
+
+    def test_volume_get_filtered_has_no_volumes(self, volume_groups):
+        assert volume_groups.get(vg_name='ceph') is None
+
+    def test_volume_has_multiple_matches(self, volume_groups):
+        volume1 = volume2 = api.VolumeGroup(vg_name='foo', lv_path='/dev/vg/lv', lv_tags='')
+        volume_groups.append(volume1)
+        volume_groups.append(volume2)
+        with pytest.raises(exceptions.MultipleVGsError):
+            volume_groups.get(vg_name='foo')
+
+    def test_find_the_correct_one(self, volume_groups):
+        volume1 = api.VolumeGroup(vg_name='volume1', lv_tags='')
+        volume2 = api.VolumeGroup(vg_name='volume2', lv_tags='')
+        volume_groups.append(volume1)
+        volume_groups.append(volume2)
+        assert volume_groups.get(vg_name='volume1') == volume1
+
+    def test_filter_by_tag(self, volume_groups):
+        vg_tags = "ceph.group=dmcache"
+        osd = api.VolumeGroup(vg_name='volume1', vg_tags=vg_tags)
+        journal = api.VolumeGroup(vg_name='volume2', vg_tags='ceph.group=plain')
+        volume_groups.append(osd)
+        volume_groups.append(journal)
+        volume_groups.filter(vg_tags={'ceph.group': 'dmcache'})
+        assert len(volume_groups) == 1
+        assert volume_groups[0].vg_name == 'volume1'
+
+    def test_filter_by_vg_name(self, volume_groups):
+        vg_tags = "ceph.type=data,ceph.fsid=000-aaa"
+        osd = api.VolumeGroup(vg_name='ceph_vg', vg_tags=vg_tags)
+        journal = api.VolumeGroup(vg_name='volume2', vg_tags='ceph.type=journal')
+        volume_groups.append(osd)
+        volume_groups.append(journal)
+        volume_groups.filter(vg_name='ceph_vg')
+        assert len(volume_groups) == 1
+        assert volume_groups[0].vg_name == 'ceph_vg'
+
+    def test_filter_requires_params(self, volume_groups):
+        with pytest.raises(TypeError):
+            volume_groups.filter()
+
+
+class TestCreateLV(object):
+
+    def setup(self):
+        self.foo_volume = api.Volume(lv_name='foo', lv_path='/path', vg_name='foo_group', lv_tags='')
+
+    def test_uses_size(self, monkeypatch, capture):
+        monkeypatch.setattr(process, 'run', capture)
+        monkeypatch.setattr(process, 'call', capture)
+        monkeypatch.setattr(api, 'get_lv', lambda *a, **kw: self.foo_volume)
+        api.create_lv('foo', 'foo_group', size=5, type='data')
+        expected = ['sudo', 'lvcreate', '--yes', '-L', '5G', '-n', 'foo', 'foo_group']
+        assert capture.calls[0]['args'][0] == expected
+
+    def test_calls_to_set_type_tag(self, monkeypatch, capture):
+        monkeypatch.setattr(process, 'run', capture)
+        monkeypatch.setattr(process, 'call', capture)
+        monkeypatch.setattr(api, 'get_lv', lambda *a, **kw: self.foo_volume)
+        api.create_lv('foo', 'foo_group', size=5, type='data')
+        ceph_tag = ['sudo', 'lvchange', '--addtag', 'ceph.type=data', '/path']
+        assert capture.calls[1]['args'][0] == ceph_tag
+
+    def test_calls_to_set_data_tag(self, monkeypatch, capture):
+        monkeypatch.setattr(process, 'run', capture)
+        monkeypatch.setattr(process, 'call', capture)
+        monkeypatch.setattr(api, 'get_lv', lambda *a, **kw: self.foo_volume)
+        api.create_lv('foo', 'foo_group', size=5, type='data')
+        data_tag = ['sudo', 'lvchange', '--addtag', 'ceph.data_device=/path', '/path']
+        assert capture.calls[2]['args'][0] == data_tag
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py
new file mode 100644 (file)
index 0000000..b8402a7
--- /dev/null
@@ -0,0 +1,53 @@
+import pytest
+from ceph_volume.devices import lvm
+
+
+class TestLVM(object):
+
+    def test_main_spits_help_with_no_arguments(self, capsys):
+        lvm.main.LVM([]).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'Use LVM and LVM-based technologies like dmcache to deploy' in stdout
+
+    def test_main_shows_activate_subcommands(self, capsys):
+        lvm.main.LVM([]).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'activate ' in stdout
+        assert 'Discover and mount' in stdout
+
+    def test_main_shows_prepare_subcommands(self, capsys):
+        lvm.main.LVM([]).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'prepare ' in stdout
+        assert 'Format an LVM device' in stdout
+
+
+class TestPrepare(object):
+
+    def test_main_spits_help_with_no_arguments(self, capsys):
+        lvm.prepare.Prepare([]).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'Prepare an OSD by assigning an ID and FSID' in stdout
+
+    def test_main_shows_full_help(self, capsys):
+        with pytest.raises(SystemExit):
+            lvm.prepare.Prepare(argv=['--help']).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'required arguments:' in stdout
+        assert 'A logical group name or a path' in stdout
+
+
+class TestActivate(object):
+
+    def test_main_spits_help_with_no_arguments(self, capsys):
+        lvm.activate.Activate([]).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'Activate OSDs by discovering them with' in stdout
+
+    def test_main_shows_full_help(self, capsys):
+        with pytest.raises(SystemExit):
+            lvm.activate.Activate(argv=['--help']).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'optional arguments' in stdout
+        assert 'positional arguments' in stdout
+
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_trigger.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_trigger.py
new file mode 100644 (file)
index 0000000..f1dff2d
--- /dev/null
@@ -0,0 +1,39 @@
+import pytest
+from ceph_volume import exceptions
+from ceph_volume.devices.lvm import trigger
+
+
+class TestParseOSDid(object):
+
+    def test_no_id_found_if_no_digit(self):
+        with pytest.raises(exceptions.SuffixParsingError):
+            trigger.parse_osd_id('asdlj-ljahsdfaslkjhdfa')
+
+    def test_no_id_found(self):
+        with pytest.raises(exceptions.SuffixParsingError):
+            trigger.parse_osd_id('ljahsdfaslkjhdfa')
+
+    def test_id_found(self):
+        result = trigger.parse_osd_id('1-ljahsdfaslkjhdfa')
+        assert result == '1'
+
+
+class TestParseOSDUUID(object):
+
+    def test_uuid_is_parsed(self):
+        result = trigger.parse_osd_uuid('1-asdf-ljkh-asdf-ljkh-asdf')
+        assert result == 'asdf-ljkh-asdf-ljkh-asdf'
+
+    def test_uuid_is_parsed_longer_sha1(self):
+        result = trigger.parse_osd_uuid('1-foo-bar-asdf-ljkh-asdf-ljkh-asdf')
+        assert result == 'foo-bar-asdf-ljkh-asdf-ljkh-asdf'
+
+    def test_uuid_is_not_found(self):
+        with pytest.raises(exceptions.SuffixParsingError):
+            trigger.parse_osd_uuid('ljahsdfaslkjhdfa')
+
+    def test_uuid_is_not_found_missing_id(self):
+        with pytest.raises(exceptions.SuffixParsingError):
+            trigger.parse_osd_uuid('ljahs-dfa-slkjhdfa-foo')
+
+
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/Vagrantfile
new file mode 100644 (file)
index 0000000..4a79495
--- /dev/null
@@ -0,0 +1,398 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+require 'yaml'
+require 'time'
+VAGRANTFILE_API_VERSION = '2'
+
+DEBUG = false
+
+config_file=File.expand_path(File.join(File.dirname(__FILE__), 'vagrant_variables.yml'))
+settings=YAML.load_file(config_file)
+
+LABEL_PREFIX   = settings['label_prefix'] ? settings['label_prefix'] + "-" : ""
+NMONS          = settings['mon_vms']
+NOSDS          = settings['osd_vms']
+NMDSS          = settings['mds_vms']
+NRGWS          = settings['rgw_vms']
+NNFSS          = settings['nfs_vms']
+RESTAPI        = settings['restapi']
+NRBD_MIRRORS   = settings['rbd_mirror_vms']
+CLIENTS        = settings['client_vms']
+NISCSI_GWS     = settings['iscsi_gw_vms']
+PUBLIC_SUBNET  = settings['public_subnet']
+CLUSTER_SUBNET = settings['cluster_subnet']
+BOX            = settings['vagrant_box']
+CLIENT_BOX     = settings['client_vagrant_box']
+BOX_URL        = settings['vagrant_box_url']
+SYNC_DIR       = settings['vagrant_sync_dir']
+MEMORY         = settings['memory']
+ETH            = settings['eth']
+USER           = settings['ssh_username']
+
+ASSIGN_STATIC_IP = settings.fetch('assign_static_ip', true)
+DISABLE_SYNCED_FOLDER = settings.fetch('vagrant_disable_synced_folder', false)
+DISK_UUID = Time.now.utc.to_i
+
+def create_vmdk(name, size)
+  dir = Pathname.new(__FILE__).expand_path.dirname
+  path = File.join(dir, '.vagrant', name + '.vmdk')
+  `vmware-vdiskmanager -c -s #{size} -t 0 -a scsi #{path} \
+   2>&1 > /dev/null` unless File.exist?(path)
+end
+
+Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
+  config.ssh.insert_key = false # workaround for https://github.com/mitchellh/vagrant/issues/5048
+  config.ssh.private_key_path = settings['ssh_private_key_path']
+  config.ssh.username = USER
+
+  # When using libvirt, avoid errors like:
+  # "CPU feature cmt not found"
+  config.vm.provider :libvirt do |lv|
+    lv.cpu_mode = 'host-passthrough'
+  end
+
+  # Faster bootup. Disables mounting the sync folder for libvirt and virtualbox
+  if DISABLE_SYNCED_FOLDER
+    config.vm.provider :virtualbox do |v,override|
+      override.vm.synced_folder '.', SYNC_DIR, disabled: true
+    end
+    config.vm.provider :libvirt do |v,override|
+      override.vm.synced_folder '.', SYNC_DIR, disabled: true
+    end
+  end
+
+  (0..CLIENTS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}client#{i}" do |client|
+      client.vm.box = CLIENT_BOX
+      client.vm.hostname = "#{LABEL_PREFIX}ceph-client#{i}"
+      if ASSIGN_STATIC_IP
+        client.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.4#{i}"
+      end
+      # Virtualbox
+      client.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      client.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      client.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+
+      # Parallels
+      client.vm.provider "parallels" do |prl|
+        prl.name = "ceph-client#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      client.vm.provider :linode do |provider|
+        provider.label = client.vm.hostname
+      end
+    end
+  end
+
+  (0..NRGWS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}rgw#{i}" do |rgw|
+      rgw.vm.box = BOX
+      rgw.vm.box_url = BOX_URL
+      rgw.vm.hostname = "#{LABEL_PREFIX}ceph-rgw#{i}"
+      if ASSIGN_STATIC_IP
+        rgw.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.5#{i}"
+      end
+
+      # Virtualbox
+      rgw.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      rgw.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      rgw.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+
+      # Parallels
+      rgw.vm.provider "parallels" do |prl|
+        prl.name = "ceph-rgw#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      rgw.vm.provider :linode do |provider|
+        provider.label = rgw.vm.hostname
+      end
+    end
+  end
+
+  (0..NNFSS - 1).each do |i|
+    config.vm.define "nfs#{i}" do |nfs|
+      nfs.vm.box = BOX
+      nfs.vm.box_url = BOX_URL
+      nfs.vm.hostname = "ceph-nfs#{i}"
+      if ASSIGN_STATIC_IP
+        nfs.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.6#{i}"
+      end
+
+      # Virtualbox
+      nfs.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      nfs.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      nfs.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+
+      # Parallels
+      nfs.vm.provider "parallels" do |prl|
+        prl.name = "ceph-nfs#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      nfs.vm.provider :linode do |provider|
+        provider.label = nfs.vm.hostname
+      end
+    end
+  end
+
+  (0..NMDSS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}mds#{i}" do |mds|
+      mds.vm.box = BOX
+      mds.vm.box_url = BOX_URL
+      mds.vm.hostname = "#{LABEL_PREFIX}ceph-mds#{i}"
+      if ASSIGN_STATIC_IP
+        mds.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.7#{i}"
+      end
+      # Virtualbox
+      mds.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      mds.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      mds.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+      # Parallels
+      mds.vm.provider "parallels" do |prl|
+        prl.name = "ceph-mds#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      mds.vm.provider :linode do |provider|
+        provider.label = mds.vm.hostname
+      end
+    end
+  end
+
+  (0..NRBD_MIRRORS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}rbd_mirror#{i}" do |rbd_mirror|
+      rbd_mirror.vm.box = BOX
+      rbd_mirror.vm.box_url = BOX_URL
+      rbd_mirror.vm.hostname = "#{LABEL_PREFIX}ceph-rbd-mirror#{i}"
+      if ASSIGN_STATIC_IP
+        rbd_mirror.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.8#{i}"
+      end
+      # Virtualbox
+      rbd_mirror.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      rbd_mirror.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      rbd_mirror.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+      # Parallels
+      rbd_mirror.vm.provider "parallels" do |prl|
+        prl.name = "ceph-rbd-mirror#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      rbd_mirror.vm.provider :linode do |provider|
+        provider.label = rbd_mirror.vm.hostname
+      end
+    end
+  end
+
+  (0..NISCSI_GWS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}iscsi_gw#{i}" do |iscsi_gw|
+      iscsi_gw.vm.box = BOX
+      iscsi_gw.vm.box_url = BOX_URL
+      iscsi_gw.vm.hostname = "#{LABEL_PREFIX}ceph-iscsi-gw#{i}"
+      if ASSIGN_STATIC_IP
+        iscsi_gw.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.9#{i}"
+      end
+      # Virtualbox
+      iscsi_gw.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      iscsi_gw.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      iscsi_gw.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+      # Parallels
+      iscsi_gw.vm.provider "parallels" do |prl|
+        prl.name = "ceph-iscsi-gw#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      iscsi_gw.vm.provider :linode do |provider|
+        provider.label = iscsi_gw.vm.hostname
+      end
+    end
+  end
+
+  (0..NMONS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}mon#{i}" do |mon|
+      mon.vm.box = BOX
+      mon.vm.box_url = BOX_URL
+      mon.vm.hostname = "#{LABEL_PREFIX}ceph-mon#{i}"
+      if ASSIGN_STATIC_IP
+        mon.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.1#{i}"
+      end
+      # Virtualbox
+      mon.vm.provider :virtualbox do |vb|
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      mon.vm.provider :vmware_fusion do |v|
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      mon.vm.provider :libvirt do |lv|
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+
+      # Parallels
+      mon.vm.provider "parallels" do |prl|
+        prl.name = "ceph-mon#{i}"
+        prl.memory = "#{MEMORY}"
+      end
+
+      mon.vm.provider :linode do |provider|
+        provider.label = mon.vm.hostname
+      end
+    end
+  end
+
+  (0..NOSDS - 1).each do |i|
+    config.vm.define "#{LABEL_PREFIX}osd#{i}" do |osd|
+      osd.vm.box = BOX
+      osd.vm.box_url = BOX_URL
+      osd.vm.hostname = "#{LABEL_PREFIX}ceph-osd#{i}"
+      if ASSIGN_STATIC_IP
+        osd.vm.network :private_network,
+          ip: "#{PUBLIC_SUBNET}.10#{i}"
+        osd.vm.network :private_network,
+          ip: "#{CLUSTER_SUBNET}.20#{i}"
+      end
+      # Virtualbox
+      osd.vm.provider :virtualbox do |vb|
+        # Create our own controller for consistency and to remove VM dependency
+        # but only do it once, otherwise it would fail when rebooting machines.
+        # We assume this has run if one disk was created before
+        unless File.exist?("disk-#{i}-0.vdi")
+            vb.customize ['storagectl', :id,
+                          '--name', 'OSD Controller',
+                          '--add', 'scsi']
+        end
+        (0..2).each do |d|
+          vb.customize ['createhd',
+                        '--filename', "disk-#{i}-#{d}",
+                        '--size', '11000'] unless File.exist?("disk-#{i}-#{d}.vdi")
+          vb.customize ['storageattach', :id,
+                        '--storagectl', 'OSD Controller',
+                        '--port', 3 + d,
+                        '--device', 0,
+                        '--type', 'hdd',
+                        '--medium', "disk-#{i}-#{d}.vdi"]
+        end
+        vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
+      end
+
+      # VMware
+      osd.vm.provider :vmware_fusion do |v|
+        (0..1).each do |d|
+          v.vmx["scsi0:#{d + 1}.present"] = 'TRUE'
+          v.vmx["scsi0:#{d + 1}.fileName"] =
+            create_vmdk("disk-#{i}-#{d}", '11000MB')
+        end
+        v.vmx['memsize'] = "#{MEMORY}"
+      end
+
+      # Libvirt
+      driverletters = ('a'..'z').to_a
+      osd.vm.provider :libvirt do |lv|
+        # always make /dev/sd{a/b/c/d} so that CI can ensure that
+        # virtualbox and libvirt will have the same devices to use for OSDs
+        (0..3).each do |d|
+          lv.storage :file, :device => "hd#{driverletters[d]}", :path => "disk-#{i}-#{d}-#{DISK_UUID}.disk", :size => '12G', :bus => "ide"
+        end
+        lv.memory = MEMORY
+        lv.random_hostname = true
+      end
+
+      # Parallels
+      osd.vm.provider "parallels" do |prl|
+        prl.name = "ceph-osd#{i}"
+        prl.memory = "#{MEMORY}"
+        (0..1).each do |d|
+          prl.customize ["set", :id,
+                         "--device-add",
+                         "hdd",
+                         "--iface",
+                         "sata"]
+        end
+      end
+
+      osd.vm.provider :linode do |provider|
+        provider.label = osd.vm.hostname
+      end
+
+    end
+  end
+end
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/Vagrantfile
new file mode 120000 (symlink)
index 0000000..2572fa2
--- /dev/null
@@ -0,0 +1 @@
+../../Vagrantfile
\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all
new file mode 100644 (file)
index 0000000..971c63d
--- /dev/null
@@ -0,0 +1,21 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+journal_size: 100
+osd_objectstore: "filestore"
+osd_scenario: lvm
+copy_admin_key: true
+# test-volume is created by tests/functional/lvm_setup.yml from /dev/sda
+lvm_volumes:
+  test_volume: /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/hosts
new file mode 100644 (file)
index 0000000..f6a265a
--- /dev/null
@@ -0,0 +1,5 @@
+[mons]
+mon0
+
+[osds]
+osd0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/vagrant_variables.yml
new file mode 100644 (file)
index 0000000..7d1a444
--- /dev/null
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: centos/7
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/scripts/generate_ssh_config.sh b/ceph/src/ceph-volume/ceph_volume/tests/functional/scripts/generate_ssh_config.sh
new file mode 100644 (file)
index 0000000..43e64a6
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Generate a custom ssh config from Vagrant so that it can then be used by
+# ansible.cfg 
+
+path=$1
+
+if [ $# -eq 0 ]
+  then
+    echo "A path to the scenario is required as an argument and it wasn't provided"
+    exit 1
+fi
+
+cd "$path"
+vagrant ssh-config > vagrant_ssh_config
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/tox.ini
new file mode 100644 (file)
index 0000000..6fe16a5
--- /dev/null
@@ -0,0 +1,52 @@
+[tox]
+envlist = {centos7,xenial}-{create,prepare_activate}
+skipsdist = True
+
+[testenv]
+whitelist_externals =
+    vagrant
+    bash
+    git
+passenv=*
+setenv=
+  ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config
+  ANSIBLE_STDOUT_CALLBACK = debug
+  ANSIBLE_RETRY_FILES_ENABLED = False
+  VAGRANT_CWD = {changedir}
+  CEPH_VOLUME_DEBUG = 1
+deps=
+  ansible==2.2.3
+  testinfra==1.6.0
+  pytest-xdist
+changedir=
+  centos7-create: {toxinidir}/centos7/create
+  xenial-create: {toxinidir}/xenial/create
+  # TODO: these are placeholders for now, eventually we want to
+  # test the prepare/activate workflow of ceph-volume as well
+  xenial-prepare_activate: {toxinidir}/xenial/prepare_activate
+  centos7-prepare_activate: {toxinidir}/xenial/prepare_activate
+commands=
+  git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch https://github.com/ceph/ceph-ansible.git {envdir}/tmp/ceph-ansible
+
+  vagrant up --no-provision {posargs:--provider=virtualbox}
+  bash {toxinidir}/scripts/generate_ssh_config.sh {changedir}
+
+  # create logical volumes to test with on the vms
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/lvm_setup.yml
+
+  # use ceph-ansible to deploy a ceph cluster on the vms
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/site.yml.sample --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest}"
+
+  # prepare nodes for testing with testinfra
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml
+
+  # test cluster state using ceph-ansible tests
+  testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests
+
+  # reboot all vms
+  vagrant reload --no-provision
+
+  # retest to ensure cluster came back up correctly after rebooting
+  testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests
+
+  vagrant destroy --force
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/Vagrantfile
new file mode 120000 (symlink)
index 0000000..2572fa2
--- /dev/null
@@ -0,0 +1 @@
+../../Vagrantfile
\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all
new file mode 100644 (file)
index 0000000..971c63d
--- /dev/null
@@ -0,0 +1,21 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+journal_size: 100
+osd_objectstore: "filestore"
+osd_scenario: lvm
+copy_admin_key: true
+# test-volume is created by tests/functional/lvm_setup.yml from /dev/sda
+lvm_volumes:
+  test_volume: /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/hosts
new file mode 100644 (file)
index 0000000..f6a265a
--- /dev/null
@@ -0,0 +1,5 @@
+[mons]
+mon0
+
+[osds]
+osd0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/vagrant_variables.yml
new file mode 100644 (file)
index 0000000..82b330e
--- /dev/null
@@ -0,0 +1,54 @@
+---
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: ceph/ubuntu-xenial
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/systemd/test_main.py b/ceph/src/ceph-volume/ceph_volume/tests/systemd/test_main.py
new file mode 100644 (file)
index 0000000..86a685a
--- /dev/null
@@ -0,0 +1,51 @@
+import pytest
+from ceph_volume import exceptions, conf
+from ceph_volume.systemd import main
+
+
+class TestParseSubcommand(object):
+
+    def test_no_subcommand_found(self):
+        with pytest.raises(exceptions.SuffixParsingError):
+            main.parse_subcommand('')
+
+    def test_sub_command_is_found(self):
+        result = main.parse_subcommand('lvm-1-sha-1-something-0')
+        assert result == 'lvm'
+
+
+class Capture(object):
+
+    def __init__(self, *a, **kw):
+        self.a = a
+        self.kw = kw
+        self.calls = []
+
+    def __call__(self, *a, **kw):
+        self.calls.append(a)
+        self.calls.append(kw)
+
+
+class TestMain(object):
+
+    def setup(self):
+        conf.log_path = '/tmp/'
+
+    def test_no_arguments_parsing_error(self):
+        with pytest.raises(RuntimeError):
+            main.main(args=[])
+
+    def test_parsing_suffix_error(self):
+        with pytest.raises(exceptions.SuffixParsingError):
+            main.main(args=['asdf'])
+
+    def test_correct_command(self, monkeypatch):
+        run = Capture()
+        monkeypatch.setattr(main.process, 'run', run)
+        main.main(args=['ceph-volume-systemd', 'lvm-8715BEB4-15C5-49DE-BA6F-401086EC7B41-0' ])
+        command = run.calls[0][0]
+        assert command == [
+            'ceph-volume',
+            'lvm', 'trigger',
+            '8715BEB4-15C5-49DE-BA6F-401086EC7B41-0'
+        ]
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py b/ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py
new file mode 100644 (file)
index 0000000..5ef7fae
--- /dev/null
@@ -0,0 +1,79 @@
+import os
+try:
+    from cStringIO import StringIO
+except ImportError: # pragma: no cover
+    from io import StringIO # pragma: no cover
+from textwrap import dedent
+import pytest
+from ceph_volume import configuration, exceptions
+
+
+class TestConf(object):
+
+    def setup(self):
+        self.conf_file = StringIO(dedent("""
+        [foo]
+        default = 0
+        """))
+
+    def test_get_non_existing_list(self):
+        cfg = configuration.Conf()
+        cfg.is_valid = lambda: True
+        cfg.readfp(self.conf_file)
+        assert cfg.get_list('global', 'key') == []
+
+    def test_get_non_existing_list_get_default(self):
+        cfg = configuration.Conf()
+        cfg.is_valid = lambda: True
+        cfg.readfp(self.conf_file)
+        assert cfg.get_list('global', 'key', ['a']) == ['a']
+
+    def test_get_rid_of_comments(self):
+        cfg = configuration.Conf()
+        cfg.is_valid = lambda: True
+        conf_file = StringIO(dedent("""
+        [foo]
+        default = 0  # this is a comment
+        """))
+
+        cfg.readfp(conf_file)
+        assert cfg.get_list('foo', 'default') == ['0']
+
+    def test_gets_split_on_commas(self):
+        cfg = configuration.Conf()
+        cfg.is_valid = lambda: True
+        conf_file = StringIO(dedent("""
+        [foo]
+        default = 0,1,2,3  # this is a comment
+        """))
+
+        cfg.readfp(conf_file)
+        assert cfg.get_list('foo', 'default') == ['0', '1', '2', '3']
+
+    def test_spaces_and_tabs_are_ignored(self):
+        cfg = configuration.Conf()
+        cfg.is_valid = lambda: True
+        conf_file = StringIO(dedent("""
+        [foo]
+        default = 0,        1,  2 ,3  # this is a comment
+        """))
+
+        cfg.readfp(conf_file)
+        assert cfg.get_list('foo', 'default') == ['0', '1', '2', '3']
+
+
+class TestLoad(object):
+
+    def test_path_does_not_exist(self):
+        with pytest.raises(exceptions.ConfigurationError):
+            conf = configuration.load('/path/does/not/exist/ceph.con')
+            conf.is_valid()
+
+    def test_unable_to_read_configuration(self, tmpdir, capsys):
+        ceph_conf = os.path.join(str(tmpdir), 'ceph.conf')
+        with open(ceph_conf, 'w') as config:
+            config.write(']broken] config\n[[')
+        configuration.load(ceph_conf)
+        stdout, stderr = capsys.readouterr()
+        assert 'Unable to read configuration file' in stdout
+        assert 'File contains no section headers' in stdout
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_decorators.py b/ceph/src/ceph-volume/ceph_volume/tests/test_decorators.py
new file mode 100644 (file)
index 0000000..8df8914
--- /dev/null
@@ -0,0 +1,71 @@
+import os
+import pytest
+from ceph_volume import exceptions, decorators, terminal
+
+
+class TestNeedsRoot(object):
+
+    def test_is_root(self, monkeypatch):
+        def func():
+            return True
+        monkeypatch.setattr(decorators.os, 'getuid', lambda: 0)
+        assert decorators.needs_root(func)() is True
+
+    def test_is_not_root(self, monkeypatch):
+        def func():
+            return True # pragma: no cover
+        monkeypatch.setattr(decorators.os, 'getuid', lambda: 20)
+        with pytest.raises(exceptions.SuperUserError) as error:
+            decorators.needs_root(func)()
+
+        msg = 'This command needs to be executed with sudo or as root'
+        assert str(error.value) == msg
+
+
+class TestExceptionMessage(object):
+
+    def test_has_str_method(self):
+        result = decorators.make_exception_message(RuntimeError('an error'))
+        expected = "%s %s\n" % (terminal.red_arrow, 'RuntimeError: an error')
+        assert result == expected
+
+    def test_has_no_str_method(self):
+        class Error(Exception):
+            pass
+        result = decorators.make_exception_message(Error())
+        expected = "%s %s\n" % (terminal.red_arrow, 'Error')
+        assert result == expected
+
+
+class TestCatches(object):
+
+    def teardown(self):
+        try:
+            del(os.environ['CEPH_VOLUME_DEBUG'])
+        except KeyError:
+            pass
+
+    def test_ceph_volume_debug_enabled(self):
+        os.environ['CEPH_VOLUME_DEBUG'] = '1'
+        @decorators.catches() # noqa
+        def func():
+            raise RuntimeError()
+        with pytest.raises(RuntimeError):
+            func()
+
+    def test_ceph_volume_debug_disabled_no_exit(self, capsys):
+        @decorators.catches(exit=False)
+        def func():
+            raise RuntimeError()
+        func()
+        stdout, stderr = capsys.readouterr()
+        assert 'RuntimeError\n' in stderr
+
+    def test_ceph_volume_debug_exits(self, capsys):
+        @decorators.catches()
+        def func():
+            raise RuntimeError()
+        with pytest.raises(SystemExit):
+            func()
+        stdout, stderr = capsys.readouterr()
+        assert 'RuntimeError\n' in stderr
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_main.py b/ceph/src/ceph-volume/ceph_volume/tests/test_main.py
new file mode 100644 (file)
index 0000000..c617294
--- /dev/null
@@ -0,0 +1,30 @@
+import os
+import pytest
+from ceph_volume import main
+
+
+class TestVolume(object):
+
+    def test_main_spits_help_with_no_arguments(self, capsys):
+        main.Volume(argv=[])
+        stdout, stderr = capsys.readouterr()
+        assert 'Log Path' in stdout
+
+    def test_warn_about_using_help_for_full_options(self, capsys):
+        main.Volume(argv=[])
+        stdout, stderr = capsys.readouterr()
+        assert 'See "ceph-volume --help" for full list' in stdout
+
+    def test_environ_vars_show_up(self, capsys):
+        os.environ['CEPH_CONF'] = '/opt/ceph.conf'
+        main.Volume(argv=[])
+        stdout, stderr = capsys.readouterr()
+        assert 'CEPH_CONF' in stdout
+        assert '/opt/ceph.conf' in stdout
+
+    def test_flags_are_parsed_with_help(self, capsys):
+        with pytest.raises(SystemExit):
+            main.Volume(argv=['ceph-volume', '--help'])
+        stdout, stderr = capsys.readouterr()
+        assert '--cluster' in stdout
+        assert '--log-path' in stdout
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_terminal.py b/ceph/src/ceph-volume/ceph_volume/tests/test_terminal.py
new file mode 100644 (file)
index 0000000..9435dbb
--- /dev/null
@@ -0,0 +1,68 @@
+import pytest
+from ceph_volume import terminal
+
+
+class SubCommand(object):
+
+    help = "this is the subcommand help"
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    def main(self):
+        pass
+
+
+class BadSubCommand(object):
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    def main(self):
+        raise SystemExit(100)
+
+
+class TestSubhelp(object):
+
+    def test_no_sub_command_help(self):
+        assert terminal.subhelp({}) == ''
+
+    def test_single_level_help(self):
+        result = terminal.subhelp({'sub': SubCommand})
+
+        assert 'this is the subcommand help' in result
+
+    def test_has_title_header(self):
+        result = terminal.subhelp({'sub': SubCommand})
+        assert 'Available subcommands:' in result
+
+    def test_command_with_no_help(self):
+        class SubCommandNoHelp(object):
+            pass
+        result = terminal.subhelp({'sub': SubCommandNoHelp})
+        assert result == ''
+
+
+class TestDispatch(object):
+
+    def test_no_subcommand_found(self):
+        result = terminal.dispatch({'sub': SubCommand}, argv=[])
+        assert result is None
+
+    def test_no_main_found(self):
+        class NoMain(object):
+
+            def __init__(self, argv):
+                pass
+        result = terminal.dispatch({'sub': NoMain}, argv=['sub'])
+        assert result is None
+
+    def test_subcommand_found_and_dispatched(self):
+        with pytest.raises(SystemExit) as error:
+            terminal.dispatch({'sub': SubCommand}, argv=['sub'])
+        assert str(error.value) == '0'
+
+    def test_subcommand_found_and_dispatched_with_errors(self):
+        with pytest.raises(SystemExit) as error:
+            terminal.dispatch({'sub': BadSubCommand}, argv=['sub'])
+        assert str(error.value) == '100'
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py
new file mode 100644 (file)
index 0000000..7cb6a1f
--- /dev/null
@@ -0,0 +1,89 @@
+import os
+import pwd
+import getpass
+from textwrap import dedent
+from ceph_volume.util import system
+
+
+class TestMkdirP(object):
+
+    def test_existing_dir_does_not_raise_w_chown(self, monkeypatch, tmpdir):
+        user = pwd.getpwnam(getpass.getuser())
+        uid, gid = user[2], user[3]
+        monkeypatch.setattr(system, 'get_ceph_user_ids', lambda: (uid, gid,))
+        path = str(tmpdir)
+        system.mkdir_p(path)
+        assert os.path.isdir(path)
+
+    def test_new_dir_w_chown(self, monkeypatch, tmpdir):
+        user = pwd.getpwnam(getpass.getuser())
+        uid, gid = user[2], user[3]
+        monkeypatch.setattr(system, 'get_ceph_user_ids', lambda: (uid, gid,))
+        path = os.path.join(str(tmpdir), 'new')
+        system.mkdir_p(path)
+        assert os.path.isdir(path)
+
+    def test_existing_dir_does_not_raise_no_chown(self, tmpdir):
+        path = str(tmpdir)
+        system.mkdir_p(path, chown=False)
+        assert os.path.isdir(path)
+
+    def test_new_dir_no_chown(self, tmpdir):
+        path = os.path.join(str(tmpdir), 'new')
+        system.mkdir_p(path, chown=False)
+        assert os.path.isdir(path)
+
+
+class TestIsMounted(object):
+
+    def test_not_mounted(self, tmpdir, monkeypatch):
+        PROCDIR = str(tmpdir)
+        proc_path = os.path.join(PROCDIR, 'mounts')
+        with open(proc_path, 'w') as f:
+            f.write('')
+        monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
+        assert system.is_mounted('sdb') is False
+
+    def test_is_mounted_(self, tmpdir, monkeypatch):
+        PROCDIR = str(tmpdir)
+        proc_path = os.path.join(PROCDIR, 'mounts')
+        with open(proc_path, 'w') as f:
+            f.write(dedent("""nfsd /proc/fs/nfsd nfsd rw,relatime 0 0
+                    /dev/sdc2 /boot xfs rw,seclabel,relatime,attr2,inode64,noquota 0 0
+                    tmpfs /run/user/1000 tmpfs rw,seclabel,mode=700,uid=1000,gid=1000 0 0"""))
+        monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
+        monkeypatch.setattr(os.path, 'exists', lambda x: True)
+        assert system.is_mounted('/dev/sdc2') is True
+
+    def test_ignores_two_fields(self, tmpdir, monkeypatch):
+        PROCDIR = str(tmpdir)
+        proc_path = os.path.join(PROCDIR, 'mounts')
+        with open(proc_path, 'w') as f:
+            f.write(dedent("""nfsd /proc/fs/nfsd nfsd rw,relatime 0 0
+                    /dev/sdc2 /boot
+                    tmpfs /run/user/1000 tmpfs rw,seclabel,mode=700,uid=1000,gid=1000 0 0"""))
+        monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
+        monkeypatch.setattr(os.path, 'exists', lambda x: True)
+        assert system.is_mounted('/dev/sdc2') is False
+
+    def test_not_mounted_at_destination(self, tmpdir, monkeypatch):
+        PROCDIR = str(tmpdir)
+        proc_path = os.path.join(PROCDIR, 'mounts')
+        with open(proc_path, 'w') as f:
+            f.write(dedent("""nfsd /proc/fs/nfsd nfsd rw,relatime 0 0
+                    /dev/sdc2 /var/lib/ceph/osd/ceph-9 xfs rw,attr2,inode64,noquota 0 0
+                    tmpfs /run/user/1000 tmpfs rw,seclabel,mode=700,uid=1000,gid=1000 0 0"""))
+        monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
+        monkeypatch.setattr(os.path, 'exists', lambda x: True)
+        assert system.is_mounted('/dev/sdc2', '/var/lib/ceph/osd/ceph-0') is False
+
+    def test_is_mounted_at_destination(self, tmpdir, monkeypatch):
+        PROCDIR = str(tmpdir)
+        proc_path = os.path.join(PROCDIR, 'mounts')
+        with open(proc_path, 'w') as f:
+            f.write(dedent("""nfsd /proc/fs/nfsd nfsd rw,relatime 0 0
+                    /dev/sdc2 /var/lib/ceph/osd/ceph-0 xfs rw,attr2,inode64,noquota 0 0
+                    tmpfs /run/user/1000 tmpfs rw,seclabel,mode=700,uid=1000,gid=1000 0 0"""))
+        monkeypatch.setattr(system, 'PROCDIR', PROCDIR)
+        monkeypatch.setattr(os.path, 'exists', lambda x: True)
+        assert system.is_mounted('/dev/sdc2', '/var/lib/ceph/osd/ceph-0') is True
diff --git a/ceph/src/ceph-volume/ceph_volume/util/__init__.py b/ceph/src/ceph-volume/ceph_volume/util/__init__.py
new file mode 100644 (file)
index 0000000..3b8c309
--- /dev/null
@@ -0,0 +1,10 @@
+
+def as_string(string):
+    """
+    Ensure that whatever type of string is incoming, it is returned as an
+    actual string, versus 'bytes' which Python 3 likes to use.
+    """
+    if isinstance(string, bytes):
+        # we really ignore here if we can't properly decode with utf-8
+        return string.decode('utf-8', 'ignore')
+    return string
diff --git a/ceph/src/ceph-volume/ceph_volume/util/constants.py b/ceph/src/ceph-volume/ceph_volume/util/constants.py
new file mode 100644 (file)
index 0000000..07df5a4
--- /dev/null
@@ -0,0 +1,17 @@
+
+# mount flags
+mount = dict(
+    xfs='noatime,inode64',
+)
+
+
+# format flags
+mkfs = dict(
+    xfs=[
+        # force overwriting previous fs
+        '-f',
+        # set the inode size to 2kb
+        '-i', 'size=2048',
+    ],
+)
+
diff --git a/ceph/src/ceph-volume/ceph_volume/util/prepare.py b/ceph/src/ceph-volume/ceph_volume/util/prepare.py
new file mode 100644 (file)
index 0000000..eefa0ad
--- /dev/null
@@ -0,0 +1,165 @@
+"""
+These utilities for prepare provide all the pieces needed to prepare a device
+but also a compounded ("single call") helper to do them in order. Some plugins
+may want to change some part of the process, while others might want to consume
+the single-call helper
+"""
+import os
+import logging
+from ceph_volume import process, conf
+from ceph_volume.util import system, constants
+
+logger = logging.getLogger(__name__)
+
+
+def create_key():
+    stdout, stderr, returncode = process.call(['ceph-authtool', '--gen-print-key'])
+    if returncode != 0:
+        raise RuntimeError('Unable to generate a new auth key')
+    return ' '.join(stdout).strip()
+
+
+def write_keyring(osd_id, secret):
+    # FIXME this only works for cephx, but there will be other types of secrets
+    # later
+    osd_keyring = '/var/lib/ceph/osd/%s-%s/keyring' % (conf.cluster, osd_id)
+    process.run(
+        [
+            'ceph-authtool', osd_keyring,
+            '--create-keyring',
+            '--name', 'osd.%s' % str(osd_id),
+            '--add-key', secret
+        ])
+    system.chown(osd_keyring)
+    # TODO: do the restorecon dance on the osd_keyring path
+
+
+def create_id(fsid, json_secrets):
+    """
+    :param fsid: The osd fsid to create, always required
+    :param json_secrets: a json-ready object with whatever secrets are wanted
+                         to be passed to the monitor
+    """
+    bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+    stdout, stderr, returncode = process.call(
+        [
+            'ceph',
+            '--cluster', conf.cluster,
+            '--name', 'client.bootstrap-osd',
+            '--keyring', bootstrap_keyring,
+            '-i', '-',
+            'osd', 'new', fsid
+        ],
+        stdin=json_secrets
+    )
+    if returncode != 0:
+        raise RuntimeError('Unable to create a new OSD id')
+    return ' '.join(stdout).strip()
+
+
+def create_path(osd_id):
+    system.mkdir_p('/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id))
+
+
+def format_device(device):
+    # only supports xfs
+    command = ['sudo', 'mkfs', '-t', 'xfs']
+
+    # get the mkfs options if any for xfs,
+    # fallback to the default options defined in constants.mkfs
+    flags = conf.ceph.get_list(
+        'osd',
+        'osd_mkfs_options_xfs',
+        default=constants.mkfs.get('xfs'),
+        split=' ',
+    )
+
+    # always force
+    if '-f' not in flags:
+        flags.insert(0, '-f')
+
+    command.extend(flags)
+    command.append(device)
+    process.run(command)
+
+
+def mount_osd(device, osd_id):
+    destination = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+    command = ['sudo', 'mount', '-t', 'xfs', '-o']
+    flags = conf.ceph.get_list(
+        'osd',
+        'osd_mount_options_xfs',
+        default=constants.mount.get('xfs'),
+        split=' ',
+    )
+    command.append(flags)
+    command.append(device)
+    command.append(destination)
+    process.run(command)
+
+
+def link_journal(journal_device, osd_id):
+    journal_path = '/var/lib/ceph/osd/%s-%s/journal' % (
+        conf.cluster,
+        osd_id
+    )
+    command = ['sudo', 'ln', '-s', journal_device, journal_path]
+    process.run(command)
+
+
+def get_monmap(osd_id):
+    """
+    Before creating the OSD files, a monmap needs to be retrieved so that it
+    can be used to tell the monitor(s) about the new OSD. A call will look like::
+
+        ceph --cluster ceph --name client.bootstrap-osd \
+             --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \
+             mon getmap -o /var/lib/ceph/osd/ceph-0/activate.monmap
+    """
+    path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
+    bootstrap_keyring = '/var/lib/ceph/bootstrap-osd/%s.keyring' % conf.cluster
+    monmap_destination = os.path.join(path, 'activate.monmap')
+
+    process.run([
+        'sudo',
+        'ceph',
+        '--cluster', conf.cluster,
+        '--name', 'client.bootstrap-osd',
+        '--keyring', bootstrap_keyring,
+        'mon', 'getmap', '-o', monmap_destination
+    ])
+
+
+def osd_mkfs(osd_id, fsid):
+    """
+    Create the files for the OSD to function. A normal call will look like:
+
+          ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
+                   --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
+                   --osd-data /var/lib/ceph/osd/ceph-0 \
+                   --osd-journal /var/lib/ceph/osd/ceph-0/journal \
+                   --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
+                   --keyring /var/lib/ceph/osd/ceph-0/keyring \
+                   --setuser ceph --setgroup ceph
+
+    """
+    path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
+    monmap = os.path.join(path, 'activate.monmap')
+    journal = os.path.join(path, 'journal')
+
+    system.chown(journal)
+    system.chown(path)
+
+    process.run([
+        'sudo',
+        'ceph-osd',
+        '--cluster', conf.cluster,
+        '--mkfs',
+        '-i', osd_id,
+        '--monmap', monmap,
+        '--osd-data', path,
+        '--osd-journal', journal,
+        '--osd-uuid', fsid,
+        '--setuser', 'ceph',
+        '--setgroup', 'ceph'
+    ])
diff --git a/ceph/src/ceph-volume/ceph_volume/util/system.py b/ceph/src/ceph-volume/ceph_volume/util/system.py
new file mode 100644 (file)
index 0000000..084a0e0
--- /dev/null
@@ -0,0 +1,104 @@
+import errno
+import os
+import pwd
+import platform
+import uuid
+from ceph_volume import process
+from . import as_string
+
+
+# TODO: get these out of here and into a common area for others to consume
+if platform.system() == 'FreeBSD':
+    FREEBSD = True
+    DEFAULT_FS_TYPE = 'zfs'
+    PROCDIR = '/compat/linux/proc'
+    # FreeBSD does not have blockdevices any more
+    BLOCKDIR = '/dev'
+    ROOTGROUP = 'wheel'
+else:
+    FREEBSD = False
+    DEFAULT_FS_TYPE = 'xfs'
+    PROCDIR = '/proc'
+    BLOCKDIR = '/sys/block'
+    ROOTGROUP = 'root'
+
+
+def generate_uuid():
+    return str(uuid.uuid4())
+
+
+def get_ceph_user_ids():
+    """
+    Return the id and gid of the ceph user
+    """
+    try:
+        user = pwd.getpwnam('ceph')
+    except KeyError:
+        # is this even possible?
+        raise RuntimeError('"ceph" user is not available in the current system')
+    return user[2], user[3]
+
+
+def mkdir_p(path, chown=True):
+    """
+    A `mkdir -p` that defaults to chown the path to the ceph user
+    """
+    try:
+        os.mkdir(path)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            raise
+    if chown:
+        uid, gid = get_ceph_user_ids()
+        os.chown(path, uid, gid)
+
+
+def chown(path, recursive=True):
+    """
+    ``chown`` a path to the ceph user (uid and guid fetched at runtime)
+    """
+    uid, gid = get_ceph_user_ids()
+    if os.path.islink(path):
+        path = os.path.realpath(path)
+    if recursive:
+        process.run(['chown', '-R', 'ceph:ceph', path])
+    else:
+        os.chown(path, uid, gid)
+
+
+def is_mounted(source, destination=None):
+    """
+    Check if the given device is mounted, optionally validating destination.
+    This relies on absolute path devices, it will ignore non-absolute
+    entries like::
+
+        tmpfs /run tmpfs rw,seclabel,nosuid,nodev,mode=755 0 0
+
+    But will parse paths that are absolute like::
+
+        /dev/sdc2 /boot xfs rw,attr2,inode64,noquota 0 0
+
+    When destination is passed in, it will check that the entry where the
+    source appears is mounted to where destination defines. This is useful so
+    that an error message can report that a source is not mounted at an
+    expected destination.
+    """
+    dev = os.path.realpath(source)
+    with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
+        for line in proc_mounts:
+            fields = line.split()
+            if len(fields) < 3:
+                continue
+            mounted_device = fields[0]
+            mounted_path = fields[1]
+            if os.path.isabs(mounted_device) and os.path.exists(mounted_device):
+                mounted_device = os.path.realpath(mounted_device)
+                if as_string(mounted_device) == dev:
+                    if destination:
+                        destination = os.path.realpath(destination)
+                        return destination == as_string(os.path.realpath(mounted_path))
+                    else:
+                        return True
+    return False
diff --git a/ceph/src/ceph-volume/setup.py b/ceph/src/ceph-volume/setup.py
new file mode 100644 (file)
index 0000000..cfdb1e1
--- /dev/null
@@ -0,0 +1,33 @@
+from setuptools import setup, find_packages
+
+
+setup(
+    name='ceph-volume',
+    version='1.0.0',
+    packages=find_packages(),
+
+    author='',
+    author_email='contact@redhat.com',
+    description='Deploy Ceph OSDs using different device technologies like lvm or physical disks',
+    license='LGPLv2+',
+    keywords='ceph volume disk devices lvm',
+    url="https://github.com/ceph/ceph",
+    zip_safe = False,
+    tests_require=[
+        'pytest >=2.1.3',
+        'tox',
+    ],
+    scripts = ['bin/ceph-volume', 'bin/ceph-volume-systemd'],
+    classifiers = [
+        'Environment :: Console',
+        'Intended Audience :: Information Technology',
+        'Intended Audience :: System Administrators',
+        'Operating System :: POSIX :: Linux',
+        'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+    ]
+
+)
diff --git a/ceph/src/ceph-volume/tox.ini b/ceph/src/ceph-volume/tox.ini
new file mode 100644 (file)
index 0000000..514d208
--- /dev/null
@@ -0,0 +1,14 @@
+[tox]
+envlist = py27, py35, py36, flake8
+
+[testenv]
+deps=
+  pytest
+commands=py.test -v {posargs:ceph_volume/tests}
+
+[testenv:flake8]
+deps=flake8
+commands=flake8 --select=F,E9 {posargs:ceph_volume}
+
+[tool:pytest]
+norecursedirs = .* _* virtualenv
index e34c468a7e92dbfa1f68de9f09bb718f59d2a6dc..bb2516c788f9953cccba5d71498f3c288dce4432 100755 (executable)
@@ -355,6 +355,9 @@ def do_extended_help(parser, args, target, partial):
                          partial=partial))
 
     def help_for_target(target, partial=None):
+        # wait for osdmap because we know this is sent after the mgrmap
+        # and monmap (it's alphabetical).
+        cluster_handle.wait_for_latest_osdmap()
         ret, outbuf, outs = json_command(cluster_handle, target=target,
                                          prefix='get_command_descriptions',
                                          timeout=10)
index f3d585b47759a4d18b0c2600987edca7424d8ac8..d7e54a3a3faf5d8938d9444c4ee89c74bbe5a6f3 100644 (file)
@@ -246,6 +246,7 @@ int main(int argc, const char **argv)
       bl.read_fd(fd, 64);
       if (bl.length()) {
        store_type = string(bl.c_str(), bl.length() - 1);  // drop \n
+       g_conf->set_val("osd_objectstore", store_type);
        dout(5) << "object store type is " << store_type << dendl;
       }
       ::close(fd);
@@ -333,28 +334,28 @@ int main(int argc, const char **argv)
   }
   if (check_wants_journal) {
     if (store->wants_journal()) {
-      cout << "yes" << std::endl;
+      cout << "wants journal: yes" << std::endl;
       exit(0);
     } else {
-      cout << "no" << std::endl;
+      cout << "wants journal: no" << std::endl;
       exit(1);
     }
   }
   if (check_allows_journal) {
     if (store->allows_journal()) {
-      cout << "yes" << std::endl;
+      cout << "allows journal: yes" << std::endl;
       exit(0);
     } else {
-      cout << "no" << std::endl;
+      cout << "allows journal: no" << std::endl;
       exit(1);
     }
   }
   if (check_needs_journal) {
     if (store->needs_journal()) {
-      cout << "yes" << std::endl;
+      cout << "needs journal: yes" << std::endl;
       exit(0);
     } else {
-      cout << "no" << std::endl;
+      cout << "needs journal: no" << std::endl;
       exit(1);
     }
   }
index 6b34e4a330e43df4983fc48be64fd78045675a49..e461ab4a59eb2157f20e0db8c7441858d511e84a 100644 (file)
@@ -9430,7 +9430,13 @@ int Client::statfs(const char *path, struct statvfs *stbuf,
 
   ceph_statfs stats;
   C_SaferCond cond;
-  objecter->get_fs_stats(stats, &cond);
+
+  const vector<int64_t> &data_pools = mdsmap->get_data_pools();
+  if (data_pools.size() == 1) {
+    objecter->get_fs_stats(stats, data_pools[0], &cond);
+  } else {
+    objecter->get_fs_stats(stats, boost::optional<int64_t>(), &cond);
+  }
 
   client_lock.Unlock();
   int rval = cond.wait();
@@ -9501,7 +9507,7 @@ int Client::statfs(const char *path, struct statvfs *stbuf,
     stbuf->f_bfree = free;
     stbuf->f_bavail = free;
   } else {
-    // General case: report the overall RADOS cluster's statistics.  Because
+    // General case: report the cluster statistics returned from RADOS. Because
     // multiple pools may be used without one filesystem namespace via
     // layouts, this is the most correct thing we can do.
     stbuf->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10);
@@ -12708,6 +12714,12 @@ int Client::fdescribe_layout(int fd, file_layout_t *lp)
   return 0;
 }
 
+int64_t Client::get_default_pool_id()
+{
+  Mutex::Locker lock(client_lock);
+  /* first data pool is the default */ 
+  return mdsmap->get_first_data_pool(); 
+}
 
 // expose osdmap
 
@@ -12900,6 +12912,7 @@ void Client::ms_handle_remote_reset(Connection *con)
        }
       }
       if (mds >= 0) {
+       assert (s != NULL);
        switch (s->state) {
        case MetaSession::STATE_CLOSING:
          ldout(cct, 1) << "reset from mds we were closing; we'll call that closed" << dendl;
index b6369a29363e13da06ff54751d53a15f8a935dd1..beefa1eba5c8aa2db27777c7cb2e2aa5419cf531 100644 (file)
@@ -1101,6 +1101,9 @@ public:
   int get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>& address);
   int get_file_extent_osds(int fd, loff_t off, loff_t *len, vector<int>& osds);
   int get_osd_addr(int osd, entity_addr_t& addr);
+  
+  // expose mdsmap
+  int64_t get_default_pool_id();
 
   // expose osdmap
   int get_local_osd();
index 5cb5bc80ebb3e24d5f7a4bb7f191704a32ee84bc..3c84570c5dfbf4b3108161d898473aed8bffadca 100644 (file)
@@ -643,7 +643,7 @@ static const luaL_Reg clslua_lib[] = {
 };
 
 /*
- * Set int const in table at top of stack
+ * Set const int in table at top of stack
  */
 #define SET_INT_CONST(var) do { \
   lua_pushinteger(L, var); \
index d56e24c3e512f821f32f3fda7edaf2210371a0dd..3e6505d468d31fdfc6afe6da0f75b49ec88b1058 100644 (file)
@@ -27,6 +27,8 @@ template<typename T>
 using remove_reference_t = typename std::remove_reference<T>::type;
 template<typename T>
 using result_of_t = typename std::result_of<T>::type;
+template<typename T>
+using decay_t = typename std::decay<T>::type;
 
 namespace _backport14 {
 template<typename T>
@@ -75,10 +77,52 @@ template <typename T, std::size_t N>
 constexpr std::size_t size(const T (&array)[N]) noexcept {
   return N;
 }
+
+/// http://en.cppreference.com/w/cpp/utility/functional/not_fn
+// this implementation uses c++14's result_of_t (above) instead of the c++17
+// invoke_result_t, and so may not behave correctly when SFINAE is required
+template <typename F>
+class not_fn_result {
+  using DecayF = decay_t<F>;
+  DecayF fn;
+ public:
+  explicit not_fn_result(F&& f) : fn(std::forward<F>(f)) {}
+  not_fn_result(not_fn_result&& f) = default;
+  not_fn_result(const not_fn_result& f) = default;
+
+  template<class... Args>
+  auto operator()(Args&&... args) &
+  -> decltype(!std::declval<result_of_t<DecayF&(Args...)>>()) {
+    return !fn(std::forward<Args>(args)...);
+  }
+  template<class... Args>
+  auto operator()(Args&&... args) const&
+  -> decltype(!std::declval<result_of_t<DecayF const&(Args...)>>()) {
+    return !fn(std::forward<Args>(args)...);
+  }
+
+  template<class... Args>
+  auto operator()(Args&&... args) &&
+  -> decltype(!std::declval<result_of_t<DecayF(Args...)>>()) {
+    return !std::move(fn)(std::forward<Args>(args)...);
+  }
+  template<class... Args>
+  auto operator()(Args&&... args) const&&
+  -> decltype(!std::declval<result_of_t<DecayF const(Args...)>>()) {
+    return !std::move(fn)(std::forward<Args>(args)...);
+  }
+};
+
+template <typename F>
+not_fn_result<F> not_fn(F&& fn) {
+  return not_fn_result<F>(std::forward<F>(fn));
+}
+
 } // namespace _backport17
 using _backport14::make_unique;
 using _backport17::size;
 using _backport14::max;
+using _backport17::not_fn;
 } // namespace ceph
 
 #endif // CEPH_COMMON_BACKPORT14_H
index 4b75b9b631791c5e1e8f01cf393bd292516a1f74..b8e87d1eea3dc16b9908cc163f937d0ffd4ffe3b 100644 (file)
@@ -2382,7 +2382,7 @@ int buffer::list::write_fd(int fd) const
     }
     ++p;
 
-    if (iovlen == IOV_MAX-1 ||
+    if (iovlen == IOV_MAX ||
        p == _buffers.end()) {
       iovec *start = iov;
       int num = iovlen;
index 592b889b4597f83ab3decc9a162f75da1aba5c47..9a873f5144216e8cde871a4aa6c98e46eda246b7 100644 (file)
@@ -55,12 +55,10 @@ dump_cmd_to_json(Formatter *f, const string& cmd)
   // elements are: "name", meaning "the typeless name that means a literal"
   // an object {} with key:value pairs representing an argument
 
-  int argnum = 0;
   stringstream ss(cmd);
   std::string word;
 
   while (std::getline(ss, word, ' ')) {
-    argnum++;
     // if no , or =, must be a plain word to put out
     if (word.find_first_of(",=") == string::npos) {
       f->dump_string("arg", word);
index b651cf08f4e85474f866c82b80a2fffb5a97ec88..7f7b7c35b5c8c1fdebdfd3417ad21a71b0c331b0 100644 (file)
@@ -298,7 +298,7 @@ namespace ceph {
 
     void enqueue(K cl, unsigned priority, unsigned cost, T item) override final {
       // priority is ignored
-      queue.add_request(item, cl, cost);
+      queue.add_request(std::move(item), cl, cost);
     }
 
     void enqueue_front(K cl,
index f97d619ed923ee5e8ccd0422ce91325a98423e78..cfb518d774d1aeab67be52c4d2238eb1e85987a2 100644 (file)
@@ -88,7 +88,6 @@ void Option::dump(Formatter *f) const
   f->dump_string("name", name);
 
   f->dump_string("type", type_to_str(type));
-  std::string level_str;
 
   f->dump_string("level", level_to_str(level));
 
@@ -131,5530 +130,5806 @@ void Option::dump(Formatter *f) const
 }
 
 
-std::vector<Option> global_options = {
-  Option("host", Option::TYPE_STR, Option::LEVEL_BASIC)
-  .set_description("local hostname")
-  .set_long_description("if blank, ceph assumes the short hostname (hostname -s)")
-  .add_service("common")
-  .add_tag("network"),
-
-  Option("fsid", Option::TYPE_UUID, Option::LEVEL_BASIC)
-  .set_description("cluster fsid (uuid)")
-  .add_service("common")
-  .add_tag("service"),
-
-  Option("public_addr", Option::TYPE_ADDR, Option::LEVEL_BASIC)
-  .set_description("public-facing address to bind to")
-  .add_service({"mon", "mds", "osd", "mgr"}),
-
-  Option("public_bind_addr", Option::TYPE_ADDR, Option::LEVEL_ADVANCED)
-  .set_default(entity_addr_t())
-  .add_service("mon")
-  .set_description(""),
-
-  Option("cluster_addr", Option::TYPE_ADDR, Option::LEVEL_BASIC)
-  .set_description("cluster-facing address to bind to")
-  .add_service("osd")
-  .add_tag("network"),
-
-  Option("public_network", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .add_service({"mon", "mds", "osd", "mgr"})
-  .add_tag("network")
-  .set_description(""),
-
-  Option("cluster_network", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .add_service("osd")
-  .add_tag("network")
-  .set_description(""),
-
-  Option("monmap", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("path to MonMap file")
-  .set_long_description("This option is normally used during mkfs, but can also "
-                       "be used to identify which monitors to connect to.")
-  .add_service("mon")
-  .add_tag("mkfs"),
-
-  Option("mon_host", Option::TYPE_STR, Option::LEVEL_BASIC)
-  .set_description("list of hosts or addresses to search for a monitor")
-  .set_long_description("This is a comma, whitespace, or semicolon separated "
-                       "list of IP addresses or hostnames. Hostnames are "
-                       "resolved via DNS and all A or AAAA records are "
-                       "included in the search list.")
-  .add_service("common"),
-
-  Option("mon_dns_srv_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("name of DNS SRV record to check for monitor addresses")
-  .add_service("common")
-  .add_tag("network")
-  .add_see_also("mon_host"),
-
-  // lockdep
-  Option("lockdep", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_description("enable lockdep lock dependency analyzer")
-  .add_service("common"),
-
-  Option("lockdep_force_backtrace", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_description("always gather current backtrace at every lock")
-  .add_service("common")
-  .add_see_also("lockdep"),
-
-  Option("run_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/run/ceph")
-  .set_description("path for the 'run' directory for storing pid and socket files")
-  .add_service("common")
-  .add_see_also("admin_socket"),
-
-  Option("admin_socket", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_daemon_default("$run_dir/$cluster-$name.asok")
-  .set_description("path for the runtime control socket file, used by the 'ceph daemon' command")
-  .add_service("common"),
-
-  Option("admin_socket_mode", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("file mode to set for the admin socket file, e.g, '0755'")
-  .add_service("common")
-  .add_see_also("admin_socket"),
-
-  Option("crushtool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("name of the 'crushtool' utility")
-  .add_service("mon"),
-
-  // daemon
-  Option("daemonize", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_daemon_default(true)
-  .set_description("whether to daemonize (background) after startup")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service")
-  .add_see_also({"pid_file", "chdir"}),
-
-  Option("setuser", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("uid or user name to switch to on startup")
-  .set_long_description("This is normally specified by the systemd unit file.")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service")
-  .add_see_also("setgroup"),
-
-  Option("setgroup", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("gid or group name to switch to on startup")
-  .set_long_description("This is normally specified by the systemd unit file.")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service")
-  .add_see_also("setuser"),
-
-  Option("setuser_match_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("if set, setuser/setgroup is condition on this path matching ownership")
-  .set_long_description("If setuser or setgroup are specified, and this option is non-empty, then the uid/gid of the daemon will only be changed if the file or directory specified by this option has a matching uid and/or gid.  This exists primarily to allow switching to user ceph for OSDs to be conditional on whether the osd data contents have also been chowned after an upgrade.  This is normally specified by the systemd unit file.")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service")
-  .add_see_also({"setuser", "setgroup"}),
-
-  Option("pid_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("path to write a pid file (if any)")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service"),
-
-  Option("chdir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("path to chdir(2) to after daemonizing")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service")
-  .add_see_also("daemonize"),
-
-  Option("fatal_signal_handlers", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description("whether to register signal handlers for SIGABRT etc that dump a stack trace")
-  .set_long_description("This is normally true for daemons and values for libraries.")
-  .add_service({"mon", "mgr", "osd", "mds"})
-  .add_tag("service"),
-
-  // restapi
-  Option("restapi_log_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("default set by python code"),
-
-  Option("restapi_base_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_description("default set by python code"),
-
-  Option("erasure_code_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(CEPH_PKGLIBDIR"/erasure-code")
-  .set_description("directory where erasure-code plugins can be found")
-  .add_service({"mon", "osd"})
-  .set_safe(),
-
-  // logging
-  Option("log_file", Option::TYPE_STR, Option::LEVEL_BASIC)
-  .set_default("")
-  .set_daemon_default("/var/log/ceph/$cluster-$name.log")
-  .set_description("path to log file")
-  .add_see_also({"log_to_stderr",
-                 "err_to_stderr",
-                 "log_to_syslog",
-                 "err_to_syslog"}),
-
-  Option("log_max_new", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description("max unwritten log entries to allow before waiting to flush to the log")
-  .add_see_also("log_max_recent"),
-
-  Option("log_max_recent", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_daemon_default(10000)
-  .set_description("recent log entries to keep in memory to dump in the event of a crash")
-  .set_long_description("The purpose of this option is to log at a higher debug level only to the in-memory buffer, and write out the detailed log messages only if there is a crash.  Only log entries below the lower log level will be written unconditionally to the log.  For example, debug_osd=1/5 will write everything <= 1 to the log unconditionally but keep entries at levels 2-5 in memory.  If there is a seg fault or assertion failure, all entries will be dumped to the log."),
-
-  Option("log_to_stderr", Option::TYPE_BOOL, Option::LEVEL_BASIC)
-  .set_default(true)
-  .set_daemon_default(false)
-  .set_description("send log lines to stderr"),
-
-  Option("err_to_stderr", Option::TYPE_BOOL, Option::LEVEL_BASIC)
-  .set_default(false)
-  .set_daemon_default(true)
-  .set_description("send critical error log lines to stderr"),
-
-  Option("log_to_syslog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
-  .set_default(false)
-  .set_description("send log lines to syslog facility"),
-
-  Option("err_to_syslog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
-  .set_default(false)
-  .set_description("send critical error log lines to syslog facility"),
-
-  Option("log_flush_on_exit", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description("set a process exit handler to ensure the log is flushed on exit"),
-
-  Option("log_stop_at_utilization", Option::TYPE_FLOAT, Option::LEVEL_BASIC)
-  .set_default(.97)
-  .set_min_max(0.0, 1.0)
-  .set_description("stop writing to the log file when device utilization reaches this ratio")
-  .add_see_also("log_file"),
-
-  Option("log_to_graylog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
-  .set_default(false)
-  .set_description("send log lines to remote graylog server")
-  .add_see_also({"err_to_graylog",
-                 "log_graylog_host",
-                 "log_graylog_port"}),
-
-  Option("err_to_graylog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
-  .set_default(false)
-  .set_description("send critical error log lines to remote graylog server")
-  .add_see_also({"log_to_graylog",
-                 "log_graylog_host",
-                 "log_graylog_port"}),
-
-  Option("log_graylog_host", Option::TYPE_STR, Option::LEVEL_BASIC)
-  .set_default("127.0.0.1")
-  .set_description("address or hostname of graylog server to log to")
-  .add_see_also({"log_to_graylog",
-                 "err_to_graylog",
-                 "log_graylog_port"}),
-
-  Option("log_graylog_port", Option::TYPE_INT, Option::LEVEL_BASIC)
-  .set_default(12201)
-  .set_description("port number for the remote graylog server")
-  .add_see_also("log_graylog_host"),
-
-
-
-  // unmodified
-  Option("clog_to_monitors", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default=true")
-  .set_description(""),
-
-  Option("clog_to_syslog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("false")
-  .set_description(""),
-
-  Option("clog_to_syslog_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("info")
-  .set_description(""),
-
-  Option("clog_to_syslog_facility", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default=daemon audit=local0")
-  .set_description(""),
-
-  Option("clog_to_graylog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("false")
-  .set_description(""),
-
-  Option("clog_to_graylog_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("127.0.0.1")
-  .set_description(""),
-
-  Option("clog_to_graylog_port", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("12201")
-  .set_description(""),
-
-  Option("mon_cluster_log_to_syslog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default=false")
-  .set_description(""),
-
-  Option("mon_cluster_log_to_syslog_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("info")
-  .set_description(""),
-
-  Option("mon_cluster_log_to_syslog_facility", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("daemon")
-  .set_description(""),
-
-  Option("mon_cluster_log_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
-  .set_description(""),
-
-  Option("mon_cluster_log_file_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("info")
-  .set_description(""),
-
-  Option("mon_cluster_log_to_graylog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("false")
-  .set_description(""),
-
-  Option("mon_cluster_log_to_graylog_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("127.0.0.1")
-  .set_description(""),
-
-  Option("mon_cluster_log_to_graylog_port", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("12201")
-  .set_description(""),
-
-  Option("enable_experimental_unrecoverable_data_corrupting_features", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("plugin_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(CEPH_PKGLIBDIR)
-  .set_description("")
-  .set_safe(),
-
-  Option("xio_trace_mempool", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("xio_trace_msgcnt", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("xio_trace_xcon", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("xio_queue_depth", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
-
-  Option("xio_mp_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
-
-  Option("xio_mp_max_64", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
-
-  Option("xio_mp_max_256", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8192)
-  .set_description(""),
+std::vector<Option> get_global_options() {
+  return std::vector<Option>({
+    Option("host", Option::TYPE_STR, Option::LEVEL_BASIC)
+    .set_description("local hostname")
+    .set_long_description("if blank, ceph assumes the short hostname (hostname -s)")
+    .add_service("common")
+    .add_tag("network"),
+
+    Option("fsid", Option::TYPE_UUID, Option::LEVEL_BASIC)
+    .set_description("cluster fsid (uuid)")
+    .add_service("common")
+    .add_tag("service"),
+
+    Option("public_addr", Option::TYPE_ADDR, Option::LEVEL_BASIC)
+    .set_description("public-facing address to bind to")
+    .add_service({"mon", "mds", "osd", "mgr"}),
+
+    Option("public_bind_addr", Option::TYPE_ADDR, Option::LEVEL_ADVANCED)
+    .set_default(entity_addr_t())
+    .add_service("mon")
+    .set_description(""),
+
+    Option("cluster_addr", Option::TYPE_ADDR, Option::LEVEL_BASIC)
+    .set_description("cluster-facing address to bind to")
+    .add_service("osd")
+    .add_tag("network"),
+
+    Option("public_network", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .add_service({"mon", "mds", "osd", "mgr"})
+    .add_tag("network")
+    .set_description(""),
+
+    Option("cluster_network", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .add_service("osd")
+    .add_tag("network")
+    .set_description(""),
+
+    Option("monmap", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("path to MonMap file")
+    .set_long_description("This option is normally used during mkfs, but can also "
+                       "be used to identify which monitors to connect to.")
+    .add_service("mon")
+    .add_tag("mkfs"),
+
+    Option("mon_host", Option::TYPE_STR, Option::LEVEL_BASIC)
+    .set_description("list of hosts or addresses to search for a monitor")
+    .set_long_description("This is a comma, whitespace, or semicolon separated "
+                       "list of IP addresses or hostnames. Hostnames are "
+                       "resolved via DNS and all A or AAAA records are "
+                       "included in the search list.")
+    .add_service("common"),
+
+    Option("mon_dns_srv_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("name of DNS SRV record to check for monitor addresses")
+    .add_service("common")
+    .add_tag("network")
+    .add_see_also("mon_host"),
+
+    // lockdep
+    Option("lockdep", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_description("enable lockdep lock dependency analyzer")
+    .add_service("common"),
+
+    Option("lockdep_force_backtrace", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_description("always gather current backtrace at every lock")
+    .add_service("common")
+    .add_see_also("lockdep"),
+
+    Option("run_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/run/ceph")
+    .set_description("path for the 'run' directory for storing pid and socket files")
+    .add_service("common")
+    .add_see_also("admin_socket"),
+
+    Option("admin_socket", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_daemon_default("$run_dir/$cluster-$name.asok")
+    .set_description("path for the runtime control socket file, used by the 'ceph daemon' command")
+    .add_service("common"),
+
+    Option("admin_socket_mode", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("file mode to set for the admin socket file, e.g, '0755'")
+    .add_service("common")
+    .add_see_also("admin_socket"),
+
+    Option("crushtool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("name of the 'crushtool' utility")
+    .add_service("mon"),
+
+    // daemon
+    Option("daemonize", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_daemon_default(true)
+    .set_description("whether to daemonize (background) after startup")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service")
+    .add_see_also({"pid_file", "chdir"}),
+
+    Option("setuser", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("uid or user name to switch to on startup")
+    .set_long_description("This is normally specified by the systemd unit file.")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service")
+    .add_see_also("setgroup"),
+
+    Option("setgroup", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("gid or group name to switch to on startup")
+    .set_long_description("This is normally specified by the systemd unit file.")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service")
+    .add_see_also("setuser"),
+
+    Option("setuser_match_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("if set, setuser/setgroup is condition on this path matching ownership")
+    .set_long_description("If setuser or setgroup are specified, and this option is non-empty, then the uid/gid of the daemon will only be changed if the file or directory specified by this option has a matching uid and/or gid.  This exists primarily to allow switching to user ceph for OSDs to be conditional on whether the osd data contents have also been chowned after an upgrade.  This is normally specified by the systemd unit file.")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service")
+    .add_see_also({"setuser", "setgroup"}),
+
+    Option("pid_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("path to write a pid file (if any)")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service"),
+
+    Option("chdir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("path to chdir(2) to after daemonizing")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service")
+    .add_see_also("daemonize"),
+
+    Option("fatal_signal_handlers", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("whether to register signal handlers for SIGABRT etc that dump a stack trace")
+    .set_long_description("This is normally true for daemons and values for libraries.")
+    .add_service({"mon", "mgr", "osd", "mds"})
+    .add_tag("service"),
+
+    // restapi
+    Option("restapi_log_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("default set by python code"),
+
+    Option("restapi_base_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_description("default set by python code"),
+
+    Option("erasure_code_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(CEPH_PKGLIBDIR"/erasure-code")
+    .set_description("directory where erasure-code plugins can be found")
+    .add_service({"mon", "osd"})
+    .set_safe(),
+
+    // logging
+    Option("log_file", Option::TYPE_STR, Option::LEVEL_BASIC)
+    .set_default("")
+    .set_daemon_default("/var/log/ceph/$cluster-$name.log")
+    .set_description("path to log file")
+    .add_see_also({"log_to_stderr",
+                   "err_to_stderr",
+                   "log_to_syslog",
+                   "err_to_syslog"}),
+
+    Option("log_max_new", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description("max unwritten log entries to allow before waiting to flush to the log")
+    .add_see_also("log_max_recent"),
+
+    Option("log_max_recent", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_daemon_default(10000)
+    .set_description("recent log entries to keep in memory to dump in the event of a crash")
+    .set_long_description("The purpose of this option is to log at a higher debug level only to the in-memory buffer, and write out the detailed log messages only if there is a crash.  Only log entries below the lower log level will be written unconditionally to the log.  For example, debug_osd=1/5 will write everything <= 1 to the log unconditionally but keep entries at levels 2-5 in memory.  If there is a seg fault or assertion failure, all entries will be dumped to the log."),
+
+    Option("log_to_stderr", Option::TYPE_BOOL, Option::LEVEL_BASIC)
+    .set_default(true)
+    .set_daemon_default(false)
+    .set_description("send log lines to stderr"),
+
+    Option("err_to_stderr", Option::TYPE_BOOL, Option::LEVEL_BASIC)
+    .set_default(false)
+    .set_daemon_default(true)
+    .set_description("send critical error log lines to stderr"),
+
+    Option("log_to_syslog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
+    .set_default(false)
+    .set_description("send log lines to syslog facility"),
+
+    Option("err_to_syslog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
+    .set_default(false)
+    .set_description("send critical error log lines to syslog facility"),
+
+    Option("log_flush_on_exit", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description("set a process exit handler to ensure the log is flushed on exit"),
+
+    Option("log_stop_at_utilization", Option::TYPE_FLOAT, Option::LEVEL_BASIC)
+    .set_default(.97)
+    .set_min_max(0.0, 1.0)
+    .set_description("stop writing to the log file when device utilization reaches this ratio")
+    .add_see_also("log_file"),
+
+    Option("log_to_graylog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
+    .set_default(false)
+    .set_description("send log lines to remote graylog server")
+    .add_see_also({"err_to_graylog",
+                   "log_graylog_host",
+                   "log_graylog_port"}),
+
+    Option("err_to_graylog", Option::TYPE_BOOL, Option::LEVEL_BASIC)
+    .set_default(false)
+    .set_description("send critical error log lines to remote graylog server")
+    .add_see_also({"log_to_graylog",
+                   "log_graylog_host",
+                   "log_graylog_port"}),
+
+    Option("log_graylog_host", Option::TYPE_STR, Option::LEVEL_BASIC)
+    .set_default("127.0.0.1")
+    .set_description("address or hostname of graylog server to log to")
+    .add_see_also({"log_to_graylog",
+                   "err_to_graylog",
+                   "log_graylog_port"}),
+
+    Option("log_graylog_port", Option::TYPE_INT, Option::LEVEL_BASIC)
+    .set_default(12201)
+    .set_description("port number for the remote graylog server")
+    .add_see_also("log_graylog_host"),
+
+
+
+    // unmodified
+    Option("clog_to_monitors", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default=true")
+    .set_description(""),
+
+    Option("clog_to_syslog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("false")
+    .set_description(""),
+
+    Option("clog_to_syslog_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("info")
+    .set_description(""),
+
+    Option("clog_to_syslog_facility", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default=daemon audit=local0")
+    .set_description(""),
+
+    Option("clog_to_graylog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("false")
+    .set_description(""),
+
+    Option("clog_to_graylog_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("127.0.0.1")
+    .set_description(""),
+
+    Option("clog_to_graylog_port", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("12201")
+    .set_description(""),
+
+    Option("mon_cluster_log_to_syslog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default=false")
+    .set_description(""),
+
+    Option("mon_cluster_log_to_syslog_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("info")
+    .set_description(""),
+
+    Option("mon_cluster_log_to_syslog_facility", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("daemon")
+    .set_description(""),
+
+    Option("mon_cluster_log_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default=/var/log/ceph/$cluster.$channel.log cluster=/var/log/ceph/$cluster.log")
+    .set_description(""),
+
+    Option("mon_cluster_log_file_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("info")
+    .set_description(""),
+
+    Option("mon_cluster_log_to_graylog", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("false")
+    .set_description(""),
+
+    Option("mon_cluster_log_to_graylog_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("127.0.0.1")
+    .set_description(""),
+
+    Option("mon_cluster_log_to_graylog_port", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("12201")
+    .set_description(""),
+
+    Option("enable_experimental_unrecoverable_data_corrupting_features", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("plugin_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(CEPH_PKGLIBDIR)
+    .set_description("")
+    .set_safe(),
+
+    Option("xio_trace_mempool", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("xio_trace_msgcnt", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("xio_trace_xcon", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("xio_queue_depth", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
+
+    Option("xio_mp_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
+
+    Option("xio_mp_max_64", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
+
+    Option("xio_mp_max_256", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8192)
+    .set_description(""),
 
-  Option("xio_mp_max_1k", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8192)
-  .set_description(""),
-
-  Option("xio_mp_max_page", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
-
-  Option("xio_mp_max_hint", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
-
-  Option("xio_portal_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("xio_max_conns_per_portal", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
-
-  Option("xio_transport_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("rdma")
-  .set_description(""),
-
-  Option("xio_max_send_inline", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(512)
-  .set_description(""),
-
-  Option("compressor_zlib_isal", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("compressor_zlib_level", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("async_compressor_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("async_compressor_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("snappy")
-  .set_description(""),
-
-  Option("async_compressor_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("async_compressor_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("async_compressor_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
-
-  Option("plugin_crypto_accelerator", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("crypto_isal")
-  .set_description(""),
-
-  Option("mempool_debug", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("key", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("keyfile", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("keyring", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(
-    "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,"
-    "/etc/ceph/keyring,/etc/ceph/keyring.bin," 
-#if defined(__FreeBSD)
-    "/usr/local/etc/ceph/$cluster.$name.keyring,"
-    "/usr/local/etc/ceph/$cluster.keyring,"
-    "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin," 
-#endif
-  )
-  .set_description(""),
-
-  Option("heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("heartbeat_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("heartbeat_inject_failure", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("perf", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("ms_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("async+posix")
-  .set_description("")
-  .set_safe(),
-
-  Option("ms_public_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_cluster_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_tcp_nodelay", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("ms_tcp_rcvbuf", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("ms_tcp_prefetch_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
-
-  Option("ms_initial_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.2)
-  .set_description(""),
-
-  Option("ms_max_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(15.0)
-  .set_description(""),
-
-  Option("ms_crc_data", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("ms_crc_header", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("ms_die_on_bad_msg", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_die_on_unhandled_msg", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_die_on_old_message", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_die_on_skipped_message", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_dispatch_throttle_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100 << 20)
-  .set_description(""),
-
-  Option("ms_bind_ipv6", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_bind_port_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(6800)
-  .set_description(""),
-
-  Option("ms_bind_port_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(7300)
-  .set_description(""),
-
-  Option("ms_bind_retry_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-#if !defined(__FreeBSD__)
-  .set_default(3)
-#else
-  // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
-  .set_default(6)
-#endif
-  .set_description(""),
-
-  Option("ms_bind_retry_delay", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-#if !defined(__FreeBSD__)
-  .set_default(5)
-#else
-  // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
-  .set_default(6)
-#endif
-  .set_description(""),
-
-  Option("ms_bind_before_connect", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_tcp_listen_backlog", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(512)
-  .set_description(""),
-
-  Option("ms_rwthread_stack_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024 << 10)
-  .set_description(""),
-
-  Option("ms_tcp_read_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(900)
-  .set_description(""),
-
-  Option("ms_pq_max_tokens_per_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16777216)
-  .set_description(""),
-
-  Option("ms_pq_min_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
-
-  Option("ms_inject_socket_failures", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("ms_inject_delay_type", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("")
-  .set_description("")
-  .set_safe(),
-
-  Option("ms_inject_delay_msg_type", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_inject_delay_max", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(1)
-  .set_description(""),
-
-  Option("ms_inject_delay_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("ms_inject_internal_delays", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("ms_dump_on_send", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_dump_corrupt_message_level", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
-
-  Option("ms_async_op_threads", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
-
-  Option("ms_async_max_op_threads", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("ms_async_set_affinity", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("ms_async_affinity_cores", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_async_rdma_device_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_async_rdma_enable_hugepage", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("ms_async_rdma_buffer_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128 << 10)
-  .set_description(""),
-
-  Option("ms_async_rdma_send_buffers", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
-
-  Option("ms_async_rdma_receive_buffers", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
-
-  Option("ms_async_rdma_port_num", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
-
-  Option("ms_async_rdma_polling_us", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
-
-  Option("ms_async_rdma_local_gid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_async_rdma_roce_ver", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
-
-  Option("ms_async_rdma_sl", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
-
-  Option("ms_async_rdma_dscp", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(96)
-  .set_description(""),
-
-  Option("ms_dpdk_port_id", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("ms_dpdk_coremask", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("1")
-  .set_description("")
-  .set_safe(),
-
-  Option("ms_dpdk_memory_channel", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("4")
-  .set_description(""),
-
-  Option("ms_dpdk_hugepages", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_dpdk_pmd", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("ms_dpdk_host_ipv4_addr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description("")
-  .set_safe(),
-
-  Option("ms_dpdk_gateway_ipv4_addr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description("")
-  .set_safe(),
-
-  Option("ms_dpdk_netmask_ipv4_addr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description("")
-  .set_safe(),
+    Option("xio_mp_max_1k", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8192)
+    .set_description(""),
+
+    Option("xio_mp_max_page", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
+
+    Option("xio_mp_max_hint", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
+
+    Option("xio_portal_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
+
+    Option("xio_max_conns_per_portal", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
+
+    Option("xio_transport_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("rdma")
+    .set_description(""),
+
+    Option("xio_max_send_inline", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(512)
+    .set_description(""),
+
+    Option("compressor_zlib_isal", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("compressor_zlib_level", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("async_compressor_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("async_compressor_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("snappy")
+    .set_description(""),
+
+    Option("async_compressor_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
+
+    Option("async_compressor_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("async_compressor_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
+
+    Option("plugin_crypto_accelerator", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("crypto_isal")
+    .set_description(""),
+
+    Option("mempool_debug", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("key", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("keyfile", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("keyring", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(
+      "/etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,"
+      "/etc/ceph/keyring,/etc/ceph/keyring.bin," 
+  #if defined(__FreeBSD)
+      "/usr/local/etc/ceph/$cluster.$name.keyring,"
+      "/usr/local/etc/ceph/$cluster.keyring,"
+      "/usr/local/etc/ceph/keyring,/usr/local/etc/ceph/keyring.bin," 
+  #endif
+    )
+    .set_description(""),
+
+    Option("heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("heartbeat_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("heartbeat_inject_failure", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("perf", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("ms_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("async+posix")
+    .set_description("")
+    .set_safe(),
+
+    Option("ms_public_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_cluster_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_tcp_nodelay", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("ms_tcp_rcvbuf", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("ms_tcp_prefetch_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
+
+    Option("ms_initial_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.2)
+    .set_description(""),
+
+    Option("ms_max_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(15.0)
+    .set_description(""),
+
+    Option("ms_crc_data", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("ms_crc_header", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("ms_die_on_bad_msg", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_die_on_unhandled_msg", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_die_on_old_message", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_die_on_skipped_message", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_dispatch_throttle_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100 << 20)
+    .set_description(""),
+
+    Option("ms_bind_ipv6", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_bind_port_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(6800)
+    .set_description(""),
+
+    Option("ms_bind_port_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(7300)
+    .set_description(""),
+
+    Option("ms_bind_retry_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+  #if !defined(__FreeBSD__)
+    .set_default(3)
+  #else
+    // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
+    .set_default(6)
+  #endif
+    .set_description(""),
+
+    Option("ms_bind_retry_delay", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+  #if !defined(__FreeBSD__)
+    .set_default(5)
+  #else
+    // FreeBSD does not use SO_REAUSEADDR so allow for a bit more time per default
+    .set_default(6)
+  #endif
+    .set_description(""),
+
+    Option("ms_bind_before_connect", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_tcp_listen_backlog", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(512)
+    .set_description(""),
+
+    Option("ms_rwthread_stack_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024 << 10)
+    .set_description(""),
+
+    Option("ms_tcp_read_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(900)
+    .set_description(""),
+
+    Option("ms_pq_max_tokens_per_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16777216)
+    .set_description(""),
+
+    Option("ms_pq_min_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
+
+    Option("ms_inject_socket_failures", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("ms_inject_delay_type", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("")
+    .set_description("")
+    .set_safe(),
+
+    Option("ms_inject_delay_msg_type", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_inject_delay_max", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(1)
+    .set_description(""),
+
+    Option("ms_inject_delay_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("ms_inject_internal_delays", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("ms_dump_on_send", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_dump_corrupt_message_level", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("ms_async_op_threads", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
+
+    Option("ms_async_max_op_threads", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("ms_async_set_affinity", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("ms_async_affinity_cores", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_async_rdma_device_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_async_rdma_enable_hugepage", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("ms_async_rdma_buffer_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128 << 10)
+    .set_description(""),
+
+    Option("ms_async_rdma_send_buffers", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
+
+    Option("ms_async_rdma_receive_buffers", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
+
+    Option("ms_async_rdma_port_num", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("ms_async_rdma_polling_us", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
+
+    Option("ms_async_rdma_local_gid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_async_rdma_roce_ver", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("ms_async_rdma_sl", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
+
+    Option("ms_async_rdma_dscp", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(96)
+    .set_description(""),
+
+    Option("ms_dpdk_port_id", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("ms_dpdk_coremask", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("1")
+    .set_description("")
+    .set_safe(),
+
+    Option("ms_dpdk_memory_channel", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("4")
+    .set_description(""),
+
+    Option("ms_dpdk_hugepages", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_dpdk_pmd", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("ms_dpdk_host_ipv4_addr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description("")
+    .set_safe(),
+
+    Option("ms_dpdk_gateway_ipv4_addr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description("")
+    .set_safe(),
+
+    Option("ms_dpdk_netmask_ipv4_addr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description("")
+    .set_safe(),
 
-  Option("ms_dpdk_lro", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("ms_dpdk_lro", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("ms_dpdk_hw_flow_control", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("ms_dpdk_hw_flow_control", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("ms_dpdk_hw_queue_weight", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("ms_dpdk_hw_queue_weight", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("ms_dpdk_debug_allow_loopback", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("ms_dpdk_debug_allow_loopback", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("ms_dpdk_rx_buffer_count_per_core", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8192)
-  .set_description(""),
+    Option("ms_dpdk_rx_buffer_count_per_core", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8192)
+    .set_description(""),
 
-  Option("inject_early_sigterm", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("inject_early_sigterm", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/lib/ceph/mon/$cluster-$id")
-  .set_description(""),
+    Option("mon_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/lib/ceph/mon/$cluster-$id")
+    .set_description(""),
 
-  Option("mon_initial_members", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("mon_initial_members", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("mon_compact_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_compact_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_compact_on_bootstrap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_compact_on_bootstrap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_compact_on_trim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_compact_on_trim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_osd_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mon_osd_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("mon_cpu_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("mon_cpu_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("mon_osd_mapping_pgs_per_chunk", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("mon_osd_mapping_pgs_per_chunk", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("mon_osd_max_creating_pgs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("mon_osd_max_creating_pgs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("mon_tick_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mon_tick_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mon_session_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
+    Option("mon_session_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
 
-  Option("mon_subscribe_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(24*3600)
-  .set_description(""),
+    Option("mon_subscribe_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(24*3600)
+    .set_description(""),
 
-  Option("mon_delta_reset_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mon_delta_reset_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("mon_osd_laggy_halflife", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60*60)
-  .set_description(""),
+    Option("mon_osd_laggy_halflife", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60*60)
+    .set_description(""),
 
-  Option("mon_osd_laggy_weight", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.3)
-  .set_description(""),
+    Option("mon_osd_laggy_weight", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.3)
+    .set_description(""),
 
-  Option("mon_osd_laggy_max_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
+    Option("mon_osd_laggy_max_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
 
-  Option("mon_osd_adjust_heartbeat_grace", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_osd_adjust_heartbeat_grace", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_osd_adjust_down_out_interval", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_osd_adjust_down_out_interval", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_osd_auto_mark_in", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_osd_auto_mark_in", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_osd_auto_mark_auto_out_in", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_osd_auto_mark_auto_out_in", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_osd_auto_mark_new_in", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_osd_auto_mark_new_in", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_osd_destroyed_out_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("mon_osd_destroyed_out_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("mon_osd_down_out_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("mon_osd_down_out_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("mon_osd_down_out_subtree_limit", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("rack")
-  .set_description(""),
+    Option("mon_osd_down_out_subtree_limit", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("rack")
+    .set_description(""),
 
-  Option("mon_osd_min_up_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.3)
-  .set_description(""),
+    Option("mon_osd_min_up_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.3)
+    .set_description(""),
 
-  Option("mon_osd_min_in_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.75)
-  .set_description(""),
+    Option("mon_osd_min_in_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.75)
+    .set_description(""),
 
-  Option("mon_osd_warn_op_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("mon_osd_warn_op_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("mon_osd_err_op_age_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("mon_osd_err_op_age_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("mon_osd_max_split_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("mon_osd_max_split_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("mon_osd_allow_primary_temp", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_osd_allow_primary_temp", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_osd_allow_primary_affinity", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_osd_allow_primary_affinity", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_osd_prime_pg_temp", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_osd_prime_pg_temp", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_osd_prime_pg_temp_max_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.5)
-  .set_description(""),
+    Option("mon_osd_prime_pg_temp_max_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.5)
+    .set_description(""),
 
-  Option("mon_osd_prime_pg_temp_max_estimate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.25)
-  .set_description(""),
+    Option("mon_osd_prime_pg_temp_max_estimate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.25)
+    .set_description(""),
 
-  Option("mon_osd_pool_ec_fast_read", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_osd_pool_ec_fast_read", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_stat_smooth_intervals", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(6)
-  .set_description(""),
+    Option("mon_stat_smooth_intervals", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(6)
+    .set_description(""),
 
-  Option("mon_election_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mon_election_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mon_lease", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mon_lease", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mon_lease_renew_interval_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.6)
-  .set_description(""),
+    Option("mon_lease_renew_interval_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.6)
+    .set_description(""),
 
-  Option("mon_lease_ack_timeout_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(2.0)
-  .set_description(""),
+    Option("mon_lease_ack_timeout_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(2.0)
+    .set_description(""),
 
-  Option("mon_accept_timeout_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(2.0)
-  .set_description(""),
+    Option("mon_accept_timeout_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(2.0)
+    .set_description(""),
 
-  Option("mon_clock_drift_allowed", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.050)
-  .set_description(""),
+    Option("mon_clock_drift_allowed", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.050)
+    .set_description(""),
 
-  Option("mon_clock_drift_warn_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mon_clock_drift_warn_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mon_timecheck_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(300.0)
-  .set_description(""),
+    Option("mon_timecheck_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(300.0)
+    .set_description(""),
 
-  Option("mon_timecheck_skew_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30.0)
-  .set_description(""),
+    Option("mon_timecheck_skew_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30.0)
+    .set_description(""),
 
-  Option("mon_pg_stuck_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("mon_pg_stuck_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("mon_pg_min_inactive", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("mon_pg_min_inactive", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("mon_pg_warn_min_per_osd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("mon_pg_warn_min_per_osd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mon_pg_warn_max_per_osd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
+    Option("mon_pg_warn_max_per_osd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
 
-  Option("mon_pg_warn_max_object_skew", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10.0)
-  .set_description(""),
+    Option("mon_pg_warn_max_object_skew", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("mon_pg_warn_min_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("mon_pg_warn_min_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("mon_pg_warn_min_pool_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("mon_pg_warn_min_pool_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("mon_pg_check_down_all_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.5)
-  .set_description(""),
+    Option("mon_pg_check_down_all_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.5)
+    .set_description(""),
 
-  Option("mon_cache_target_full_warn_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.66)
-  .set_description(""),
+    Option("mon_cache_target_full_warn_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.66)
+    .set_description(""),
 
-  Option("mon_osd_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.95)
-  .set_description(""),
+    Option("mon_osd_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.95)
+    .set_description(""),
 
-  Option("mon_osd_backfillfull_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.90)
-  .set_description(""),
+    Option("mon_osd_backfillfull_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.90)
+    .set_description(""),
 
-  Option("mon_osd_nearfull_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.85)
-  .set_description(""),
+    Option("mon_osd_nearfull_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.85)
+    .set_description(""),
 
-  Option("mon_osd_initial_require_min_compat_client", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("jewel")
-  .set_description(""),
+    Option("mon_osd_initial_require_min_compat_client", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("jewel")
+    .set_description(""),
 
-  Option("mon_allow_pool_delete", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_allow_pool_delete", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_fake_pool_delete", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_fake_pool_delete", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_globalid_prealloc", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("mon_globalid_prealloc", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("mon_osd_report_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(900)
-  .set_description(""),
+    Option("mon_osd_report_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(900)
+    .set_description(""),
 
-  Option("mon_force_standby_active", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_force_standby_active", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_warn_on_legacy_crush_tunables", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_warn_on_legacy_crush_tunables", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_crush_min_required_version", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("firefly")
-  .set_description(""),
+    Option("mon_crush_min_required_version", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("firefly")
+    .set_description(""),
 
-  Option("mon_warn_on_crush_straw_calc_version_zero", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_warn_on_crush_straw_calc_version_zero", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_warn_on_osd_down_out_interval_zero", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_warn_on_osd_down_out_interval_zero", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_warn_on_cache_pools_without_hit_sets", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_warn_on_cache_pools_without_hit_sets", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_min_osdmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("mon_warn_on_pool_no_app", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Enable POOL_APP_NOT_ENABLED health check"),
 
-  Option("mon_max_pgmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("mon_min_osdmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("mon_max_log_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("mon_max_pgmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("mon_max_mdsmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("mon_max_log_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("mon_max_osd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("mon_max_mdsmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("mon_probe_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(2.0)
-  .set_description(""),
+    Option("mon_max_osd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("mon_client_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100ul << 20)
-  .set_description(""),
+    Option("mon_probe_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(2.0)
+    .set_description(""),
 
-  Option("mon_mgr_proxy_client_bytes_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.3)
-  .set_description(""),
+    Option("mon_client_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100ul << 20)
+    .set_description(""),
 
-  Option("mon_log_max_summary", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(50)
-  .set_description(""),
+    Option("mon_mgr_proxy_client_bytes_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.3)
+    .set_description(""),
 
-  Option("mon_daemon_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(400ul << 20)
-  .set_description(""),
+    Option("mon_log_max_summary", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(50)
+    .set_description(""),
 
-  Option("mon_max_log_entries_per_event", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("mon_daemon_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(400ul << 20)
+    .set_description(""),
 
-  Option("mon_reweight_min_pgs_per_osd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mon_max_log_entries_per_event", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("mon_reweight_min_bytes_per_osd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100*1024*1024)
-  .set_description(""),
+    Option("mon_reweight_min_pgs_per_osd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("mon_reweight_max_osds", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("mon_reweight_min_bytes_per_osd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100*1024*1024)
+    .set_description(""),
 
-  Option("mon_reweight_max_change", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.05)
-  .set_description(""),
+    Option("mon_reweight_max_osds", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("mon_health_data_update_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60.0)
-  .set_description(""),
+    Option("mon_reweight_max_change", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.05)
+    .set_description(""),
 
-  Option("mon_health_to_clog", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_health_data_update_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60.0)
+    .set_description(""),
 
-  Option("mon_health_to_clog_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600)
-  .set_description(""),
+    Option("mon_health_to_clog", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mon_health_to_clog_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60.0)
-  .set_description(""),
+    Option("mon_health_to_clog_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600)
+    .set_description(""),
 
-  Option("mon_health_preluminous_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_health_to_clog_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60.0)
+    .set_description(""),
 
-  Option("mon_health_max_detail", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(50)
-  .set_description(""),
+    Option("mon_health_preluminous_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description("Include health warnings in preluminous JSON fields"),
 
-  Option("mon_data_avail_crit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mon_health_preluminous_compat_warning", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("Warn about the health JSON format change in preluminous JSON fields"),
 
-  Option("mon_data_avail_warn", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("mon_health_max_detail", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(50)
+    .set_description(""),
 
-  Option("mon_data_size_warn", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(15ull*1024*1024*1024)
-  .set_description(""),
+    Option("mon_data_avail_crit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mon_warn_not_scrubbed", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_data_avail_warn", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mon_warn_not_deep_scrubbed", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_data_size_warn", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(15ull*1024*1024*1024)
+    .set_description(""),
 
-  Option("mon_scrub_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600*24)
-  .set_description(""),
+    Option("mon_warn_not_scrubbed", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_scrub_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60*5)
-  .set_description(""),
+    Option("mon_warn_not_deep_scrubbed", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_scrub_max_keys", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("mon_scrub_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600*24)
+    .set_description(""),
 
-  Option("mon_scrub_inject_crc_mismatch", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0.0)
-  .set_description(""),
+    Option("mon_scrub_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60*5)
+    .set_description(""),
 
-  Option("mon_scrub_inject_missing_keys", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0.0)
-  .set_description(""),
+    Option("mon_scrub_max_keys", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("mon_config_key_max_entry_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("mon_scrub_inject_crc_mismatch", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0.0)
+    .set_description(""),
 
-  Option("mon_sync_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60.0)
-  .set_description(""),
+    Option("mon_scrub_inject_missing_keys", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0.0)
+    .set_description(""),
 
-  Option("mon_sync_max_payload_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1048576)
-  .set_description(""),
+    Option("mon_config_key_max_entry_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("mon_sync_debug", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_sync_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60.0)
+    .set_description(""),
 
-  Option("mon_inject_sync_get_chunk_delay", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_sync_max_payload_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1048576)
+    .set_description(""),
 
-  Option("mon_osd_min_down_reporters", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("mon_sync_debug", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_osd_reporter_subtree_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("host")
-  .set_description(""),
+    Option("mon_inject_sync_get_chunk_delay", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_osd_force_trim_to", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_osd_min_down_reporters", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("mon_mds_force_trim_to", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_osd_reporter_subtree_level", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("host")
+    .set_description(""),
 
-  Option("mon_mds_skip_sanity", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_osd_force_trim_to", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_debug_deprecated_as_obsolete", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_mds_force_trim_to", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_debug_dump_transactions", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_mds_skip_sanity", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_debug_dump_json", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_debug_deprecated_as_obsolete", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_debug_dump_location", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("/var/log/ceph/$cluster-$name.tdump")
-  .set_description(""),
+    Option("mon_debug_dump_transactions", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_debug_no_require_luminous", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_debug_dump_json", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_debug_no_require_bluestore_for_ec_overwrites", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_debug_dump_location", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("/var/log/ceph/$cluster-$name.tdump")
+    .set_description(""),
 
-  Option("mon_debug_no_initial_persistent_features", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_debug_no_require_luminous", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_inject_transaction_delay_max", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(10.0)
-  .set_description(""),
+    Option("mon_debug_no_require_bluestore_for_ec_overwrites", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_inject_transaction_delay_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_debug_no_initial_persistent_features", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_sync_provider_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_inject_transaction_delay_max", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("mon_sync_requester_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_inject_transaction_delay_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_force_quorum_join", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_sync_provider_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_keyvaluedb", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("rocksdb")
-  .set_description(""),
+    Option("mon_sync_requester_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mon_debug_unsafe_allow_tier_with_nonempty_snaps", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mon_force_quorum_join", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_osd_blacklist_default_expire", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60*60)
-  .set_description(""),
+    Option("mon_keyvaluedb", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("rocksdb")
+    .set_description(""),
 
-  Option("mon_osd_crush_smoke_test", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mon_debug_unsafe_allow_tier_with_nonempty_snaps", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("paxos_stash_full_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(25)
-  .set_description(""),
+    Option("mon_osd_blacklist_default_expire", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60*60)
+    .set_description(""),
 
-  Option("paxos_max_join_drift", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mon_osd_crush_smoke_test", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("paxos_propose_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("paxos_stash_full_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(25)
+    .set_description(""),
 
-  Option("paxos_min_wait", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.05)
-  .set_description(""),
+    Option("paxos_max_join_drift", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("paxos_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("paxos_propose_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
 
-  Option("paxos_trim_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(250)
-  .set_description(""),
+    Option("paxos_min_wait", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.05)
+    .set_description(""),
 
-  Option("paxos_trim_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("paxos_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("paxos_service_trim_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(250)
-  .set_description(""),
+    Option("paxos_trim_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(250)
+    .set_description(""),
 
-  Option("paxos_service_trim_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("paxos_trim_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("paxos_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("paxos_service_trim_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(250)
+    .set_description(""),
 
-  Option("auth_cluster_required", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("cephx")
-  .set_description(""),
+    Option("paxos_service_trim_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("auth_service_required", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("cephx")
-  .set_description(""),
+    Option("paxos_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("auth_client_required", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("cephx, none")
-  .set_description(""),
+    Option("auth_cluster_required", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("cephx")
+    .set_description(""),
 
-  Option("auth_supported", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("auth_service_required", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("cephx")
+    .set_description(""),
 
-  Option("max_rotating_auth_attempts", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("auth_client_required", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("cephx, none")
+    .set_description(""),
 
-  Option("cephx_require_signatures", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("auth_supported", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("cephx_cluster_require_signatures", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("max_rotating_auth_attempts", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("cephx_service_require_signatures", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("cephx_require_signatures", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("cephx_sign_messages", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("cephx_cluster_require_signatures", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("auth_mon_ticket_ttl", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60*60*12)
-  .set_description(""),
+    Option("cephx_service_require_signatures", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("auth_service_ticket_ttl", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60*60)
-  .set_description(""),
+    Option("cephx_sign_messages", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("auth_mon_ticket_ttl", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60*60*12)
+    .set_description(""),
 
-  Option("mon_client_hunt_parallel", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("auth_service_ticket_ttl", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60*60)
+    .set_description(""),
 
-  Option("mon_client_hunt_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(3.0)
-  .set_description(""),
+    Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mon_client_ping_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10.0)
-  .set_description(""),
+    Option("mon_client_hunt_parallel", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("mon_client_ping_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30.0)
-  .set_description(""),
+    Option("mon_client_hunt_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(3.0)
+    .set_description(""),
 
-  Option("mon_client_hunt_interval_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(2.0)
-  .set_description(""),
+    Option("mon_client_ping_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("mon_client_hunt_interval_min_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("mon_client_ping_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30.0)
+    .set_description(""),
 
-  Option("mon_client_hunt_interval_max_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10.0)
-  .set_description(""),
+    Option("mon_client_hunt_interval_backoff", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(2.0)
+    .set_description(""),
 
-  Option("mon_client_max_log_entries_per_message", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("mon_client_hunt_interval_min_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
 
-  Option("mon_max_pool_pg_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
+    Option("mon_client_hunt_interval_max_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("mon_pool_quota_warn_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_client_max_log_entries_per_message", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("mon_pool_quota_crit_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mon_max_pool_pg_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
 
-  Option("crush_location", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("mon_pool_quota_warn_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("crush_location_hook", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("mon_pool_quota_crit_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("crush_location_hook_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("crush_location", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("objecter_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5.0)
-  .set_description(""),
+    Option("crush_location_hook", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("objecter_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10.0)
-  .set_description(""),
+    Option("crush_location_hook_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("objecter_inflight_op_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024*1024*100)
-  .set_description(""),
+    Option("objecter_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5.0)
+    .set_description(""),
 
-  Option("objecter_inflight_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("objecter_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("objecter_completion_locks_per_session", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("objecter_inflight_op_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024*1024*100)
+    .set_description(""),
 
-  Option("objecter_inject_no_watch_ping", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("objecter_inflight_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("objecter_retry_writes_after_first_reply", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("objecter_completion_locks_per_session", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("objecter_debug_inject_relock_delay", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("objecter_inject_no_watch_ping", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filer_max_purge_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("objecter_retry_writes_after_first_reply", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filer_max_truncate_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("objecter_debug_inject_relock_delay", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("journaler_write_head_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(15)
-  .set_description(""),
+    Option("filer_max_purge_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("journaler_prefetch_periods", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("filer_max_truncate_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("journaler_prezero_periods", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("journaler_write_head_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(15)
+    .set_description(""),
 
-  Option("osd_check_max_object_name_len_on_startup", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("journaler_prefetch_periods", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("osd_max_backfills", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("journaler_prezero_periods", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("osd_min_recovery_priority", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_check_max_object_name_len_on_startup", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_backfill_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30.0)
-  .set_description(""),
+    Option("osd_max_backfills", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_recovery_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30.0)
-  .set_description(""),
+    Option("osd_min_recovery_priority", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_agent_max_ops", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("osd_backfill_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30.0)
+    .set_description(""),
 
-  Option("osd_agent_max_low_ops", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("osd_recovery_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30.0)
+    .set_description(""),
 
-  Option("osd_agent_min_evict_effort", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.1)
-  .set_description(""),
+    Option("osd_agent_max_ops", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("osd_agent_quantize_effort", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.1)
-  .set_description(""),
+    Option("osd_agent_max_low_ops", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("osd_agent_delay_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5.0)
-  .set_description(""),
+    Option("osd_agent_min_evict_effort", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.1)
+    .set_description(""),
 
-  Option("osd_find_best_info_ignore_history_les", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_agent_quantize_effort", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.1)
+    .set_description(""),
 
-  Option("osd_agent_hist_halflife", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("osd_agent_delay_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5.0)
+    .set_description(""),
 
-  Option("osd_agent_slop", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.02)
-  .set_description(""),
+    Option("osd_find_best_info_ignore_history_les", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_uuid", Option::TYPE_UUID, Option::LEVEL_ADVANCED)
-  .set_default(uuid_d())
-  .set_description(""),
+    Option("osd_agent_hist_halflife", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("osd_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/lib/ceph/osd/$cluster-$id")
-  .set_description(""),
+    Option("osd_agent_slop", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.02)
+    .set_description(""),
 
-  Option("osd_journal", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/lib/ceph/osd/$cluster-$id/journal")
-  .set_description(""),
+    Option("osd_uuid", Option::TYPE_UUID, Option::LEVEL_ADVANCED)
+    .set_default(uuid_d())
+    .set_description(""),
 
-  Option("osd_journal_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5120)
-  .set_description(""),
+    Option("osd_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/lib/ceph/osd/$cluster-$id")
+    .set_description(""),
 
-  Option("osd_journal_flush_on_shutdown", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_journal", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/lib/ceph/osd/$cluster-$id/journal")
+    .set_description(""),
 
-  Option("osd_os_flags", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_journal_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5120)
+    .set_description(""),
 
-  Option("osd_max_write_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(90)
-  .set_description(""),
+    Option("osd_journal_flush_on_shutdown", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_max_pgls", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("osd_os_flags", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_client_message_size_cap", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(500*1024L*1024L)
-  .set_description(""),
+    Option("osd_max_write_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(90)
+    .set_description(""),
 
-  Option("osd_client_message_cap", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("osd_max_pgls", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("osd_pg_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(6)
-  .set_description(""),
+    Option("osd_client_message_size_cap", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(500*1024L*1024L)
+    .set_description(""),
 
-  Option("osd_pgp_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(6)
-  .set_description(""),
+    Option("osd_client_message_cap", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("osd_crush_update_weight_set", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_pg_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(6)
+    .set_description(""),
 
-  Option("osd_crush_chooseleaf_type", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_pgp_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(6)
+    .set_description(""),
 
-  Option("osd_pool_use_gmt_hitset", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_crush_update_weight_set", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_crush_update_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_crush_chooseleaf_type", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_class_update_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_pool_use_gmt_hitset", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_crush_initial_weight", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("osd_crush_update_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_pool_default_crush_rule", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("osd_class_update_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_pool_erasure_code_stripe_unit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("osd_crush_initial_weight", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("osd_pool_default_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
+    Option("osd_pool_default_crush_rule", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("osd_pool_default_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_pool_erasure_code_stripe_unit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("osd_pool_default_pg_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8)
-  .set_description(""),
+    Option("osd_pool_default_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
 
-  Option("osd_pool_default_pgp_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8)
-  .set_description(""),
+    Option("osd_pool_default_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_pool_default_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("replicated")
-  .set_description(""),
+    Option("osd_pool_default_pg_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8)
+    .set_description(""),
 
-  Option("osd_pool_default_erasure_code_profile", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("plugin=jerasure technique=reed_sol_van k=2 m=1")
-  .set_description(""),
+    Option("osd_pool_default_pgp_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8)
+    .set_description(""),
 
-  Option("osd_erasure_code_plugins", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("jerasure lrc"
-#ifdef HAVE_BETTER_YASM_ELF64
-       " isa"
-#endif
-      )
-  .set_description(""),
+    Option("osd_pool_default_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("replicated")
+    .set_description(""),
 
-  Option("osd_allow_recovery_below_min_size", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_pool_default_erasure_code_profile", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("plugin=jerasure technique=reed_sol_van k=2 m=1")
+    .set_description(""),
 
-  Option("osd_pool_default_flags", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_erasure_code_plugins", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("jerasure lrc"
+  #ifdef HAVE_BETTER_YASM_ELF64
+         " isa"
+  #endif
+        )
+    .set_description(""),
 
-  Option("osd_pool_default_flag_hashpspool", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_allow_recovery_below_min_size", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_pool_default_flag_nodelete", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_pool_default_flags", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_pool_default_flag_nopgchange", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_pool_default_flag_hashpspool", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_pool_default_flag_nosizechange", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_pool_default_flag_nodelete", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_pool_default_hit_set_bloom_fpp", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.05)
-  .set_description(""),
+    Option("osd_pool_default_flag_nopgchange", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_pool_default_cache_target_dirty_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.4)
-  .set_description(""),
+    Option("osd_pool_default_flag_nosizechange", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_pool_default_cache_target_dirty_high_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.6)
-  .set_description(""),
+    Option("osd_pool_default_hit_set_bloom_fpp", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.05)
+    .set_description(""),
 
-  Option("osd_pool_default_cache_target_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.8)
-  .set_description(""),
+    Option("osd_pool_default_cache_target_dirty_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.4)
+    .set_description(""),
 
-  Option("osd_pool_default_cache_min_flush_age", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_pool_default_cache_target_dirty_high_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.6)
+    .set_description(""),
 
-  Option("osd_pool_default_cache_min_evict_age", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_pool_default_cache_target_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.8)
+    .set_description(""),
 
-  Option("osd_pool_default_cache_max_evict_check_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("osd_pool_default_cache_min_flush_age", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_hit_set_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("osd_pool_default_cache_min_evict_age", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_hit_set_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100000)
-  .set_description(""),
+    Option("osd_pool_default_cache_max_evict_check_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("osd_hit_set_namespace", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".ceph-internal")
-  .set_description(""),
+    Option("osd_hit_set_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("osd_tier_promote_max_objects_sec", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(25)
-  .set_description(""),
+    Option("osd_hit_set_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100000)
+    .set_description(""),
 
-  Option("osd_tier_promote_max_bytes_sec", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5 * 1024*1024)
-  .set_description(""),
+    Option("osd_hit_set_namespace", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".ceph-internal")
+    .set_description(""),
 
-  Option("osd_tier_default_cache_mode", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("writeback")
-  .set_description(""),
+    Option("osd_tier_promote_max_objects_sec", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(25)
+    .set_description(""),
 
-  Option("osd_tier_default_cache_hit_set_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("osd_tier_promote_max_bytes_sec", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5 * 1024*1024)
+    .set_description(""),
 
-  Option("osd_tier_default_cache_hit_set_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1200)
-  .set_description(""),
+    Option("osd_tier_default_cache_mode", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("writeback")
+    .set_description(""),
 
-  Option("osd_tier_default_cache_hit_set_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("bloom")
-  .set_description(""),
+    Option("osd_tier_default_cache_hit_set_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("osd_tier_default_cache_min_read_recency_for_promote", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_tier_default_cache_hit_set_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1200)
+    .set_description(""),
 
-  Option("osd_tier_default_cache_min_write_recency_for_promote", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_tier_default_cache_hit_set_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("bloom")
+    .set_description(""),
 
-  Option("osd_tier_default_cache_hit_set_grade_decay_rate", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("osd_tier_default_cache_min_read_recency_for_promote", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_tier_default_cache_hit_set_search_last_n", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_tier_default_cache_min_write_recency_for_promote", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_map_dedup", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_tier_default_cache_hit_set_grade_decay_rate", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
 
-  Option("osd_map_max_advance", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(40)
-  .set_description(""),
+    Option("osd_tier_default_cache_hit_set_search_last_n", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_map_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(50)
-  .set_description(""),
+    Option("osd_map_dedup", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("osd_map_max_advance", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(40)
+    .set_description(""),
+
+    Option("osd_map_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(50)
+    .set_description(""),
+
+    Option("osd_map_message_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(40)
+    .set_description(""),
+
+    Option("osd_map_share_max_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(40)
+    .set_description(""),
+
+    Option("osd_inject_bad_map_crc_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_inject_failure_on_pg_removal", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_max_markdown_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
+
+    Option("osd_max_markdown_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_peering_wq_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
+
+    Option("osd_peering_wq_batch_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
+
+    Option("osd_op_pq_max_tokens_per_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4194304)
+    .set_description(""),
+
+    Option("osd_op_pq_min_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
+
+    Option("osd_disk_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("osd_disk_thread_ioprio_class", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("osd_disk_thread_ioprio_priority", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
+
+    Option("osd_recover_clone_overlap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("osd_op_num_threads_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_op_num_threads_per_shard_hdd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("osd_op_num_threads_per_shard_ssd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
+
+    Option("osd_op_num_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_op_num_shards_hdd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_op_num_shards_ssd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8)
+    .set_description(""),
+
+    Option("osd_op_queue", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("wpq")
+    .set_enum_allowed( { "wpq", "prioritized", "mclock_opclass", "mclock_client", "debug_random" } )
+    .set_description("which operation queue algorithm to use")
+    .set_long_description("which operation queue algorithm to use; mclock_opclass and mclock_client are currently experimental")
+    .add_see_also("osd_op_queue_cut_off"),
+
+    Option("osd_op_queue_cut_off", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("low")
+    .set_enum_allowed( { "low", "high", "debug_random" } )
+    .set_description("the threshold between high priority ops and low priority ops")
+    .set_long_description("the threshold between high priority ops that use strict priority ordering and low priority ops that use a fairness algorithm that may or may not incorporate priority")
+    .add_see_also("osd_op_queue"),
+
+    Option("osd_op_queue_mclock_client_op_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1000.0)
+    .set_description("mclock reservation of client operator requests")
+    .set_long_description("mclock reservation of client operator requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_client_op_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(500.0)
+    .set_description("mclock weight of client operator requests")
+    .set_long_description("mclock weight of client operator requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_client_op_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.0)
+    .set_description("mclock limit of client operator requests")
+    .set_long_description("mclock limit of client operator requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_osd_subop_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1000.0)
+    .set_description("mclock reservation of osd sub-operation requests")
+    .set_long_description("mclock reservation of osd sub-operation requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_osd_subop_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(500.0)
+    .set_description("mclock weight of osd sub-operation requests")
+    .set_long_description("mclock weight of osd sub-operation requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_osd_subop_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.0)
+    .set_description("mclock limit of osd sub-operation requests")
+    .set_long_description("mclock limit of osd sub-operation requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_snap_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.0)
+    .set_description("mclock reservation of snaptrim requests")
+    .set_long_description("mclock reservation of snaptrim requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_snap_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description("mclock weight of snaptrim requests")
+    .set_long_description("mclock weight of snaptrim requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_snap_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.001)
+    .set_description("")
+    .set_description("mclock limit of snaptrim requests")
+    .set_long_description("mclock limit of snaptrim requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_recov_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.0)
+    .set_description("mclock reservation of recovery requests")
+    .set_long_description("mclock reservation of recovery requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_recov_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description("mclock weight of recovery requests")
+    .set_long_description("mclock weight of recovery requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_recov_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.001)
+    .set_description("mclock limit of recovery requests")
+    .set_long_description("mclock limit of recovery requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_scrub_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.0)
+    .set_description("mclock reservation of scrub requests")
+    .set_long_description("mclock reservation of scrub requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the reservation")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_scrub_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description("mclock weight of scrub requests")
+    .set_long_description("mclock weight of scrub requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the weight")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_lim"),
+
+    Option("osd_op_queue_mclock_scrub_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.001)
+    .set_description("mclock weight of limit requests")
+    .set_long_description("mclock weight of limit requests when osd_op_queue is either 'mclock_opclass' or 'mclock_client'; higher values increase the limit")
+    .add_see_also("osd_op_queue")
+    .add_see_also("osd_op_queue_mclock_client_op_res")
+    .add_see_also("osd_op_queue_mclock_client_op_wgt")
+    .add_see_also("osd_op_queue_mclock_client_op_lim")
+    .add_see_also("osd_op_queue_mclock_osd_subop_res")
+    .add_see_also("osd_op_queue_mclock_osd_subop_wgt")
+    .add_see_also("osd_op_queue_mclock_osd_subop_lim")
+    .add_see_also("osd_op_queue_mclock_snap_res")
+    .add_see_also("osd_op_queue_mclock_snap_wgt")
+    .add_see_also("osd_op_queue_mclock_snap_lim")
+    .add_see_also("osd_op_queue_mclock_recov_res")
+    .add_see_also("osd_op_queue_mclock_recov_wgt")
+    .add_see_also("osd_op_queue_mclock_recov_lim")
+    .add_see_also("osd_op_queue_mclock_scrub_res")
+    .add_see_also("osd_op_queue_mclock_scrub_wgt"),
+
+    Option("osd_ignore_stale_divergent_priors", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_read_ec_check_for_errors", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_recover_clone_overlap_limit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
+
+    Option("osd_backfill_scan_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_description(""),
+
+    Option("osd_backfill_scan_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(512)
+    .set_description(""),
+
+    Option("osd_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(15)
+    .set_description(""),
+
+    Option("osd_op_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(150)
+    .set_description(""),
+
+    Option("osd_recovery_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
+
+    Option("osd_recovery_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
+
+    Option("osd_recovery_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description("Time in seconds to sleep before next recovery or backfill op"),
+
+    Option("osd_recovery_sleep_hdd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.1)
+    .set_description("Time in seconds to sleep before next recovery or backfill op for HDDs"),
+
+    Option("osd_recovery_sleep_ssd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description("Time in seconds to sleep before next recovery or backfill op for SSDs"),
+
+    Option("osd_recovery_sleep_hybrid", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.025)
+    .set_description("Time in seconds to sleep before next recovery or backfill op when data is on HDD and journal is on SSD"),
+
+    Option("osd_snap_trim_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_scrub_invalid_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("osd_remove_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60*60)
+    .set_description(""),
+
+    Option("osd_remove_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10*60*60)
+    .set_description(""),
+
+    Option("osd_command_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10*60)
+    .set_description(""),
+
+    Option("osd_command_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(15*60)
+    .set_description(""),
+
+    Option("osd_heartbeat_addr", Option::TYPE_ADDR, Option::LEVEL_ADVANCED)
+    .set_default(entity_addr_t())
+    .set_description(""),
+
+    Option("osd_heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(6)
+    .set_description(""),
+
+    Option("osd_heartbeat_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
+
+    Option("osd_heartbeat_min_peers", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
+
+    Option("osd_heartbeat_use_min_delay_socket", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_map_message_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(40)
-  .set_description(""),
+    Option("osd_heartbeat_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2000)
+    .set_description(""),
 
-  Option("osd_map_share_max_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(40)
-  .set_description(""),
+    Option("osd_pg_max_concurrent_snap_trims", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("osd_inject_bad_map_crc_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_max_trimming_pgs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("osd_inject_failure_on_pg_removal", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_heartbeat_min_healthy_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.33)
+    .set_description(""),
 
-  Option("osd_max_markdown_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("osd_mon_heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("osd_max_markdown_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("osd_mon_report_interval_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("osd_peering_wq_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("osd_mon_report_interval_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("osd_peering_wq_batch_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("osd_mon_report_max_in_flight", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("osd_op_pq_max_tokens_per_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4194304)
-  .set_description(""),
+    Option("osd_beacon_report_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
 
-  Option("osd_op_pq_min_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
+    Option("osd_pg_stat_report_interval_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("osd_disk_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_mon_ack_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30.0)
+    .set_description(""),
 
-  Option("osd_disk_thread_ioprio_class", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("osd_stats_ack_timeout_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(2.0)
+    .set_description(""),
 
-  Option("osd_disk_thread_ioprio_priority", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("osd_stats_ack_timeout_decay", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.9)
+    .set_description(""),
 
-  Option("osd_recover_clone_overlap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_default_data_pool_replay_window", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(45)
+    .set_description(""),
 
-  Option("osd_op_num_threads_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_auto_mark_unfound_lost", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_op_num_threads_per_shard_hdd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_recovery_delay_start", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_op_num_threads_per_shard_ssd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("osd_recovery_max_active", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
 
-  Option("osd_op_num_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_recovery_max_single_start", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_op_num_shards_hdd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("osd_recovery_max_chunk", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8<<20)
+    .set_description(""),
 
-  Option("osd_op_num_shards_ssd", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8)
-  .set_description(""),
+    Option("osd_recovery_max_omap_entries_per_chunk", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64000)
+    .set_description(""),
 
-  Option("osd_op_queue", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("wpq")
-  .set_description(""),
+    Option("osd_copyfrom_max_chunk", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8<<20)
+    .set_description(""),
 
-  Option("osd_op_queue_cut_off", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("low")
-  .set_description(""),
+    Option("osd_push_per_object_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("osd_op_queue_mclock_client_op_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1000.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_client_op_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(500.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_client_op_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_osd_subop_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1000.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_osd_subop_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(500.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_osd_subop_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_snap_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_snap_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_snap_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.001)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_recov_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_recov_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_recov_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.001)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_scrub_res", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_scrub_wgt", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
-
-  Option("osd_op_queue_mclock_scrub_lim", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.001)
-  .set_description(""),
-
-  Option("osd_ignore_stale_divergent_priors", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_read_ec_check_for_errors", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_recover_clone_overlap_limit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
-
-  Option("osd_backfill_scan_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(64)
-  .set_description(""),
-
-  Option("osd_backfill_scan_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(512)
-  .set_description(""),
-
-  Option("osd_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(15)
-  .set_description(""),
-
-  Option("osd_op_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(150)
-  .set_description(""),
-
-  Option("osd_recovery_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
-
-  Option("osd_recovery_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
-
-  Option("osd_recovery_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)                                                              
-  .set_description(""),                                                        
-                                                                               
-  Option("osd_recovery_sleep_hdd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED) 
-  .set_default(0.1)                                                            
-  .set_description(""),                                                        
-                                                                               
-  Option("osd_recovery_sleep_ssd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED) 
-  .set_default(0)   
-  .set_description(""),
-
-  Option("osd_snap_trim_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_scrub_invalid_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("osd_remove_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60*60)
-  .set_description(""),
-
-  Option("osd_remove_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10*60*60)
-  .set_description(""),
-
-  Option("osd_command_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10*60)
-  .set_description(""),
-
-  Option("osd_command_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(15*60)
-  .set_description(""),
-
-  Option("osd_heartbeat_addr", Option::TYPE_ADDR, Option::LEVEL_ADVANCED)
-  .set_default(entity_addr_t())
-  .set_description(""),
-
-  Option("osd_heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(6)
-  .set_description(""),
-
-  Option("osd_heartbeat_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
-
-  Option("osd_heartbeat_min_peers", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
-
-  Option("osd_heartbeat_use_min_delay_socket", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_heartbeat_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2000)
-  .set_description(""),
-
-  Option("osd_pg_max_concurrent_snap_trims", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("osd_max_trimming_pgs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("osd_heartbeat_min_healthy_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.33)
-  .set_description(""),
-
-  Option("osd_mon_heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
-
-  Option("osd_mon_report_interval_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
-
-  Option("osd_mon_report_interval_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_mon_report_max_in_flight", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("osd_beacon_report_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
-
-  Option("osd_pg_stat_report_interval_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
-
-  Option("osd_mon_ack_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30.0)
-  .set_description(""),
-
-  Option("osd_stats_ack_timeout_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(2.0)
-  .set_description(""),
-
-  Option("osd_stats_ack_timeout_decay", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.9)
-  .set_description(""),
+    Option("osd_max_push_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8<<20)
+    .set_description(""),
 
-  Option("osd_default_data_pool_replay_window", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(45)
-  .set_description(""),
+    Option("osd_max_push_objects", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("osd_auto_mark_unfound_lost", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_recovery_forget_lost_objects", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_recovery_delay_start", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_max_scrubs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("osd_recovery_max_active", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
+    Option("osd_scrub_during_recovery", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_recovery_max_single_start", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_scrub_begin_hour", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_recovery_max_chunk", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8<<20)
-  .set_description(""),
+    Option("osd_scrub_end_hour", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(24)
+    .set_description(""),
 
-  Option("osd_recovery_max_omap_entries_per_chunk", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64000)
-  .set_description(""),
+    Option("osd_scrub_load_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.5)
+    .set_description(""),
 
-  Option("osd_copyfrom_max_chunk", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8<<20)
-  .set_description(""),
+    Option("osd_scrub_min_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60*60*24)
+    .set_description(""),
+
+    Option("osd_scrub_max_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(7*60*60*24)
+    .set_description(""),
+
+    Option("osd_scrub_interval_randomize_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.5)
+    .set_description(""),
+
+    Option("osd_scrub_backoff_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.66)
+    .set_description(""),
+
+    Option("osd_scrub_chunk_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_scrub_chunk_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(25)
+    .set_description(""),
+
+    Option("osd_scrub_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_scrub_auto_repair", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_scrub_auto_repair_num_errors", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_deep_scrub_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60*60*24*7)
+    .set_description(""),
+
+    Option("osd_deep_scrub_randomize_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.15)
+    .set_description(""),
+
+    Option("osd_deep_scrub_stride", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(524288)
+    .set_description(""),
+
+    Option("osd_deep_scrub_update_digest_min_age", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2*60*60)
+    .set_description(""),
+
+    Option("osd_class_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(CEPH_LIBDIR "/rados-classes")
+    .set_description(""),
+
+    Option("osd_open_classes_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("osd_class_load_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("cephfs hello journal lock log numops " "rbd refcount replica_log rgw statelog timeindex user version")
+    .set_description(""),
+
+    Option("osd_class_default_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("cephfs hello journal lock log numops " "rbd refcount replica_log rgw statelog timeindex user version")
+    .set_description(""),
+
+    Option("osd_check_for_log_corruption", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_use_stale_snap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_rollback_to_cluster_snap", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("osd_default_notify_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
+
+    Option("osd_kill_backfill_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_pg_epoch_persisted_max_stale", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(40)
+    .set_description(""),
+
+    Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1500)
+    .set_description("minimum number of entries to maintain in the PG log")
+    .add_service("osd")
+    .add_see_also("osd_max_pg_log_entries")
+    .add_see_also("osd_pg_log_dups_tracked"),
+
+    Option("osd_max_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description("maximum number of entries to maintain in the PG log when degraded before we trim")
+    .add_service("osd")
+    .add_see_also("osd_min_pg_log_entries")
+    .add_see_also("osd_pg_log_dups_tracked"),
+
+    Option("osd_pg_log_dups_tracked", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(3000)
+    .set_description("how many versions back to track in order to detect duplicate ops; this is combined with both the regular pg log entries and additional minimal dup detection entries")
+    .add_service("osd")
+    .add_see_also("osd_min_pg_log_entries")
+    .add_see_also("osd_max_pg_log_entries"),
+
+    Option("osd_force_recovery_pg_log_entries_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.3)
+    .set_description(""),
+
+    Option("osd_pg_log_trim_min", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
+
+    Option("osd_op_complaint_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
+
+    Option("osd_command_max_records", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(256)
+    .set_description(""),
+
+    Option("osd_max_pg_blocked_by", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16)
+    .set_description(""),
+
+    Option("osd_op_log_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_verify_sparse_read_holes", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_backoff_on_unfound", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("osd_backoff_on_degraded", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_backoff_on_down", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("osd_backoff_on_peering", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_crash_on_ignored_backoff", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_inject_dispatch_delay_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_debug_inject_dispatch_delay_duration", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(.1)
+    .set_description(""),
+
+    Option("osd_debug_drop_ping_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_debug_drop_ping_duration", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_debug_op_order", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_verify_missing_on_start", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_scrub_chance_rewrite_digest", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_debug_verify_snaps_on_info", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_verify_stray_on_activate", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_skip_full_check_in_backfill_reservation", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_debug_reject_backfill_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_push_per_object_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("osd_debug_inject_copyfrom_error", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_max_push_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8<<20)
-  .set_description(""),
+    Option("osd_debug_misdirected_ops", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_max_push_objects", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("osd_debug_skip_full_check_in_recovery", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_recovery_forget_lost_objects", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_debug_random_push_read_error", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_max_scrubs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("osd_debug_verify_cached_snaps", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_scrub_during_recovery", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_enable_op_tracker", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_scrub_begin_hour", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_num_op_tracker_shard", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("osd_scrub_end_hour", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(24)
-  .set_description(""),
+    Option("osd_op_history_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
 
-  Option("osd_scrub_load_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.5)
-  .set_description(""),
+    Option("osd_op_history_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("osd_scrub_min_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60*60*24)
-  .set_description(""),
-
-  Option("osd_scrub_max_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(7*60*60*24)
-  .set_description(""),
-
-  Option("osd_scrub_interval_randomize_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.5)
-  .set_description(""),
-
-  Option("osd_scrub_backoff_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.66)
-  .set_description(""),
-
-  Option("osd_scrub_chunk_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_scrub_chunk_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(25)
-  .set_description(""),
-
-  Option("osd_scrub_sleep", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_scrub_auto_repair", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_scrub_auto_repair_num_errors", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_deep_scrub_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60*60*24*7)
-  .set_description(""),
-
-  Option("osd_deep_scrub_randomize_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.15)
-  .set_description(""),
-
-  Option("osd_deep_scrub_stride", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(524288)
-  .set_description(""),
-
-  Option("osd_deep_scrub_update_digest_min_age", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2*60*60)
-  .set_description(""),
-
-  Option("osd_class_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(CEPH_LIBDIR "/rados-classes")
-  .set_description(""),
-
-  Option("osd_open_classes_on_start", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("osd_class_load_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("cephfs hello journal lock log numops " "rbd refcount replica_log rgw statelog timeindex user version")
-  .set_description(""),
-
-  Option("osd_class_default_list", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("cephfs hello journal lock log numops " "rbd refcount replica_log rgw statelog timeindex user version")
-  .set_description(""),
-
-  Option("osd_check_for_log_corruption", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_use_stale_snap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_rollback_to_cluster_snap", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("osd_default_notify_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
-
-  Option("osd_kill_backfill_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_pg_epoch_persisted_max_stale", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(40)
-  .set_description(""),
-
-  Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1500)
-  .set_description("minimum number of entries to maintain in the PG log")
-  .add_service("osd")
-  .add_see_also("osd_max_pg_log_entries")
-  .add_see_also("osd_pg_log_dups_tracked"),
-
-  Option("osd_max_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description("maximum number of entries to maintain in the PG log when degraded before we trim")
-  .add_service("osd")
-  .add_see_also("osd_min_pg_log_entries")
-  .add_see_also("osd_pg_log_dups_tracked"),
-
-  Option("osd_pg_log_dups_tracked", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(3000)
-  .set_description("how many versions back to track in order to detect duplicate ops; this is combined with both the regular pg log entries and additional minimal dup detection entries")
-  .add_service("osd")
-  .add_see_also("osd_min_pg_log_entries")
-  .add_see_also("osd_max_pg_log_entries"),
-
-  Option("osd_force_recovery_pg_log_entries_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.3)
-  .set_description(""),
-
-  Option("osd_pg_log_trim_min", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
-
-  Option("osd_op_complaint_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
-
-  Option("osd_command_max_records", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(256)
-  .set_description(""),
-
-  Option("osd_max_pg_blocked_by", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16)
-  .set_description(""),
-
-  Option("osd_op_log_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_verify_sparse_read_holes", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_backoff_on_unfound", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("osd_backoff_on_degraded", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_backoff_on_down", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("osd_backoff_on_peering", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_crash_on_ignored_backoff", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_inject_dispatch_delay_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_debug_inject_dispatch_delay_duration", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(.1)
-  .set_description(""),
-
-  Option("osd_debug_drop_ping_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_debug_drop_ping_duration", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_debug_op_order", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_verify_missing_on_start", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_scrub_chance_rewrite_digest", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_debug_verify_snaps_on_info", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_verify_stray_on_activate", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_skip_full_check_in_backfill_reservation", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_debug_reject_backfill_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_op_history_slow_op_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
 
-  Option("osd_debug_inject_copyfrom_error", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_op_history_slow_op_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("osd_debug_misdirected_ops", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_target_transaction_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("osd_debug_skip_full_check_in_recovery", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_failsafe_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.97)
+    .set_description(""),
 
-  Option("osd_debug_random_push_read_error", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_fast_fail_on_connection_refused", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_debug_verify_cached_snaps", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("osd_pg_object_context_cache_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_description(""),
 
-  Option("osd_enable_op_tracker", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_num_op_tracker_shard", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("osd_function_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_op_history_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("osd_fast_info", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_op_history_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("osd_debug_pg_log_writeout", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_op_history_slow_op_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("osd_loop_before_reset_tphandle", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_description(""),
 
-  Option("osd_op_history_slow_op_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10.0)
-  .set_description(""),
+    Option("threadpool_default_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("osd_target_transaction_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("threadpool_empty_queue_max_wait", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("osd_failsafe_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.97)
-  .set_description(""),
+    Option("leveldb_log_to_ceph_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_fast_fail_on_connection_refused", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("leveldb_write_buffer_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8 *1024*1024)
+    .set_description(""),
 
-  Option("osd_pg_object_context_cache_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(64)
-  .set_description(""),
+    Option("leveldb_cache_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128 *1024*1024)
+    .set_description(""),
 
-  Option("osd_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("leveldb_block_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_function_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("leveldb_bloom_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_fast_info", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("leveldb_max_open_files", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_debug_pg_log_writeout", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("leveldb_compression", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_loop_before_reset_tphandle", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64)
-  .set_description(""),
+    Option("leveldb_paranoid", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("leveldb_log", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/dev/null")
+    .set_description(""),
+
+    Option("leveldb_compact_on_mount", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("kinetic_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("kinetic_port", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8123)
+    .set_description(""),
+
+    Option("kinetic_user_id", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("kinetic_hmac_key", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("asdfasdf")
+    .set_description(""),
+
+    Option("kinetic_use_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_separate_wal_dir", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_db_paths", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description("")
+    .set_safe(),
+
+    Option("rocksdb_log_to_ceph_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rocksdb_cache_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1024*1024)
+    .set_description(""),
+
+    Option("rocksdb_cache_row_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("rocksdb_cache_shard_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
+
+    Option("rocksdb_cache_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("lru")
+    .set_description(""),
+
+    Option("rocksdb_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4*1024)
+    .set_description(""),
+
+    Option("rocksdb_perf", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_collect_compaction_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_collect_extended_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_collect_memory_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_enable_rmrange", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rocksdb_bloom_bits_per_key", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description("Number of bits per key to use for RocksDB's bloom filters.")
+    .set_long_description("RocksDB bloom filters can be used to quickly answer the question of whether or not a key may exist or definitely does not exist in a given RocksDB SST file without having to read all keys into memory.  Using a higher bit value decreases the likelihood of false positives at the expense of additional disk space and memory consumption when the filter is loaded into RAM.  The current default value of 20 was found to provide significant performance gains when getattr calls are made (such as during new object creation in bluestore) without significant memory overhead or cache pollution when combined with rocksdb partitioned index filters.  See: https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters for more information."),
+
+    Option("rocksdb_cache_index_and_filter_blocks", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Whether to cache indices and filters in block cache")
+    .set_long_description("By default RocksDB will load an SST file's index and bloom filters into memory when it is opened and remove them from memory when an SST file is closed.  Thus, memory consumption by indices and bloom filters is directly tied to the number of concurrent SST files allowed to be kept open.  This option instead stores cached indicies and filters in the block cache where they directly compete with other cached data.  By default we set this option to true to better account for and bound rocksdb memory usage and keep filters in memory even when an SST file is closed."),
+
+    Option("rocksdb_cache_index_and_filter_blocks_with_high_priority", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Whether to cache indices and filters in the block cache with high priority")
+    .set_long_description("A downside of setting rocksdb_cache_index_and_filter_blocks to true is that regular data can push indices and filters out of memory.  Setting this option to true means they are cached with higher priority than other data and should typically stay in the block cache."),
+
+    Option("rocksdb_pin_l0_filter_and_index_blocks_in_cache", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Whether to pin Level 0 indices and bloom filters in the block cache")
+    .set_long_description("A downside of setting rocksdb_cache_index_and_filter_blocks to true is that regular data can push indices and filters out of memory.  Setting this option to true means that level 0 SST files will always have their indices and filters pinned in the block cache."),
+
+    Option("rocksdb_index_type", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("binary_search")
+    .set_description("Type of index for SST files: binary_search, hash_search, two_level")
+    .set_long_description("This option controls the table index type.  binary_search is a space efficient index block that is optimized for block-search-based index. hash_search may improve prefix lookup performance at the expense of higher disk and memory usage and potentially slower compactions.  two_level is an experimental index type that uses two binary search indexes and works in conjunction with partition filters.  See: http://rocksdb.org/blog/2017/05/12/partitioned-index-filter.html"),
+
+    Option("rocksdb_partition_filters", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description("(experimental) partition SST index/filters into smaller blocks")
+    .set_long_description("This is an experimental option for rocksdb that works in conjunction with two_level indices to avoid having to keep the entire filter/index in cache when cache_index_and_filter_blocks is true.  The idea is to keep a much smaller top-level index in heap/cache and then opportunistically cache the lower level indices.  See: https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters"),
+
+    Option("rocksdb_metadata_block_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(4096)
+    .set_description("The block size for index partitions. (0 = rocksdb default)"),
+
+    Option("mon_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("write_buffer_size=33554432,compression=kNoCompression")
+    .set_description(""),
+
+    Option("osd_client_op_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(63)
+    .set_description(""),
+
+    Option("osd_recovery_op_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
+
+    Option("osd_snap_trim_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_snap_trim_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1<<20)
+    .set_description(""),
+
+    Option("osd_scrub_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_scrub_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(50<<20)
+    .set_description(""),
+
+    Option("osd_requested_scrub_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(120)
+    .set_description(""),
+
+    Option("osd_recovery_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_recovery_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20<<20)
+    .set_description(""),
+
+    Option("osd_recovery_op_warn_multiple", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16)
+    .set_description(""),
+
+    Option("osd_mon_shutdown_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
+
+    Option("osd_shutdown_pgref_assert", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("osd_max_object_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1024L*1024L)
+    .set_description(""),
+
+    Option("osd_max_object_name_len", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(2048)
+    .set_description(""),
+
+    Option("osd_max_object_namespace_len", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(256)
+    .set_description(""),
+
+    Option("osd_max_attr_name_len", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
+
+    Option("osd_max_attr_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("osd_max_omap_entries_per_request", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(131072)
+    .set_description(""),
 
-  Option("threadpool_default_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("osd_max_omap_bytes_per_request", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1<<30)
+    .set_description(""),
 
-  Option("threadpool_empty_queue_max_wait", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("osd_objectstore", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("filestore")
+    .set_description(""),
 
-  Option("leveldb_log_to_ceph_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_objectstore_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("leveldb_write_buffer_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8 *1024*1024)
-  .set_description(""),
+    Option("osd_objectstore_fuse", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("leveldb_cache_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128 *1024*1024)
-  .set_description(""),
+    Option("osd_bench_small_size_max_iops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("leveldb_block_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_bench_large_size_max_throughput", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100 << 20)
+    .set_description(""),
 
-  Option("leveldb_bloom_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_bench_max_block_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64 << 20)
+    .set_description(""),
 
-  Option("leveldb_max_open_files", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("osd_bench_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("leveldb_compression", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_blkin_trace_all", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("leveldb_paranoid", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("leveldb_log", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/dev/null")
-  .set_description(""),
-
-  Option("leveldb_compact_on_mount", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("kinetic_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("kinetic_port", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8123)
-  .set_description(""),
-
-  Option("kinetic_user_id", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
-
-  Option("kinetic_hmac_key", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("asdfasdf")
-  .set_description(""),
-
-  Option("kinetic_use_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_separate_wal_dir", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_db_paths", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description("")
-  .set_safe(),
-
-  Option("rocksdb_log_to_ceph_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rocksdb_cache_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1024*1024)
-  .set_description(""),
-
-  Option("rocksdb_cache_row_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("rocksdb_cache_shard_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
-
-  Option("rocksdb_cache_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("lru")
-  .set_description(""),
-
-  Option("rocksdb_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4*1024)
-  .set_description(""),
-
-  Option("rocksdb_perf", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_collect_compaction_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_collect_extended_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_collect_memory_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_enable_rmrange", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rocksdb_bloom_bits_per_key", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description("Number of bits per key to use for RocksDB's bloom filters.")
-  .set_long_description("RocksDB bloom filters can be used to quickly answer the question of whether or not a key may exist or definitely does not exist in a given RocksDB SST file without having to read all keys into memory.  Using a higher bit value decreases the likelihood of false positives at the expense of additional disk space and memory consumption when the filter is loaded into RAM.  The current default value of 20 was found to provide significant performance gains when getattr calls are made (such as during new object creation in bluestore) without significant memory overhead or cache pollution when combined with rocksdb partitioned index filters.  See: https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters for more information."),
-
-  Option("rocksdb_cache_index_and_filter_blocks", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .set_description("Whether to cache indices and filters in block cache")
-  .set_long_description("By default RocksDB will load an SST file's index and bloom filters into memory when it is opened and remove them from memory when an SST file is closed.  Thus, memory consumption by indices and bloom filters is directly tied to the number of concurrent SST files allowed to be kept open.  This option instead stores cached indicies and filters in the block cache where they directly compete with other cached data.  By default we set this option to true to better account for and bound rocksdb memory usage and keep filters in memory even when an SST file is closed."),
-
-  Option("rocksdb_cache_index_and_filter_blocks_with_high_priority", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .set_description("Whether to cache indices and filters in the block cache with high priority")
-  .set_long_description("A downside of setting rocksdb_cache_index_and_filter_blocks to true is that regular data can push indices and filters out of memory.  Setting this option to true means they are cached with higher priority than other data and should typically stay in the block cache."),
-
-  Option("rocksdb_pin_l0_filter_and_index_blocks_in_cache", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .set_description("Whether to pin Level 0 indices and bloom filters in the block cache")
-  .set_long_description("A downside of setting rocksdb_cache_index_and_filter_blocks to true is that regular data can push indices and filters out of memory.  Setting this option to true means that level 0 SST files will always have their indices and filters pinned in the block cache."),
-
-  Option("rocksdb_index_type", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("binary_search")
-  .set_description("Type of index for SST files: binary_search, hash_search, two_level")
-  .set_long_description("This option controls the table index type.  binary_search is a space efficient index block that is optimized for block-search-based index. hash_search may improve prefix lookup performance at the expense of higher disk and memory usage and potentially slower compactions.  two_level is an experimental index type that uses two binary search indexes and works in conjunction with partition filters.  See: http://rocksdb.org/blog/2017/05/12/partitioned-index-filter.html"),
-
-  Option("rocksdb_partition_filters", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description("(experimental) partition SST index/filters into smaller blocks")
-  .set_long_description("This is an experimental option for rocksdb that works in conjunction with two_level indices to avoid having to keep the entire filter/index in cache when cache_index_and_filter_blocks is true.  The idea is to keep a much smaller top-level index in heap/cache and then opportunistically cache the lower level indices.  See: https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters"),
-
-  Option("rocksdb_metadata_block_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(4096)
-  .set_description("The block size for index partitions. (0 = rocksdb default)"),
-
-  Option("mon_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("write_buffer_size=33554432,compression=kNoCompression")
-  .set_description(""),
-
-  Option("osd_client_op_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(63)
-  .set_description(""),
-
-  Option("osd_recovery_op_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
-
-  Option("osd_snap_trim_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_snap_trim_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1<<20)
-  .set_description(""),
-
-  Option("osd_scrub_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_scrub_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(50<<20)
-  .set_description(""),
-
-  Option("osd_requested_scrub_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(120)
-  .set_description(""),
-
-  Option("osd_recovery_priority", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_recovery_cost", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20<<20)
-  .set_description(""),
-
-  Option("osd_recovery_op_warn_multiple", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16)
-  .set_description(""),
-
-  Option("osd_mon_shutdown_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
-
-  Option("osd_shutdown_pgref_assert", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("osd_max_object_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1024L*1024L)
-  .set_description(""),
-
-  Option("osd_max_object_name_len", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(2048)
-  .set_description(""),
-
-  Option("osd_max_object_namespace_len", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(256)
-  .set_description(""),
-
-  Option("osd_max_attr_name_len", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
-
-  Option("osd_max_attr_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("osd_max_omap_entries_per_request", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(131072)
-  .set_description(""),
+    Option("osdc_blkin_trace_all", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_max_omap_bytes_per_request", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1<<30)
-  .set_description(""),
+    Option("osd_discard_disconnected_ops", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_objectstore", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("filestore")
-  .set_description(""),
+    Option("memstore_device_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024*1024*1024)
+    .set_description(""),
 
-  Option("osd_objectstore_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("memstore_page_set", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_objectstore_fuse", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("memstore_page_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64 << 10)
+    .set_description(""),
 
-  Option("osd_bench_small_size_max_iops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("objectstore_blackhole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_bench_large_size_max_throughput", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100 << 20)
-  .set_description(""),
+    // --------------------------
+    // bluestore
 
-  Option("osd_bench_max_block_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64 << 20)
-  .set_description(""),
+    Option("bdev_debug_inflight_ios", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("osd_bench_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("bdev_inject_crash", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("osd_blkin_trace_all", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("bdev_inject_crash_flush_delay", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(2)
+    .set_description(""),
 
-  Option("osdc_blkin_trace_all", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("bdev_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("osd_discard_disconnected_ops", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("bdev_aio_poll_ms", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(250)
+    .set_description(""),
 
-  Option("memstore_device_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024*1024*1024)
-  .set_description(""),
+    Option("bdev_aio_max_queue_depth", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("memstore_page_set", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("bdev_aio_reap_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16)
+    .set_description(""),
 
-  Option("memstore_page_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64 << 10)
-  .set_description(""),
+    Option("bdev_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("objectstore_blackhole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("bdev_debug_aio", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  // --------------------------
-  // bluestore
+    Option("bdev_debug_aio_suicide_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(60.0)
+    .set_description(""),
 
-  Option("bdev_debug_inflight_ios", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("bdev_nvme_unbind_from_kernel", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("bdev_inject_crash", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("bdev_nvme_retry_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("bdev_inject_crash_flush_delay", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(2)
-  .set_description(""),
+    Option("bluefs_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1048576)
+    .set_description(""),
+
+    Option("bluefs_max_prefetch", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1048576)
+    .set_description(""),
+
+    Option("bluefs_min_log_runway", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1048576)
+    .set_description(""),
+
+    Option("bluefs_max_log_runway", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4194304)
+    .set_description(""),
+
+    Option("bluefs_log_compact_min_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5.0)
+    .set_description(""),
+
+    Option("bluefs_log_compact_min_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16*1048576)
+    .set_description(""),
+
+    Option("bluefs_min_flush_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(524288)
+    .set_description(""),
+
+    Option("bluefs_compact_log_sync", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluefs_buffered_io", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluefs_sync_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluefs_allocator", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("bitmap")
+    .set_description(""),
+
+    Option("bluefs_preextend_wal_files", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_bluefs", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .add_tag("mkfs")
+    .set_description("Use BlueFS to back rocksdb")
+    .set_long_description("BlueFS allows rocksdb to share the same physical device(s) as the rest of BlueStore.  It should be used in all cases unless testing/developing an alternative metadata database for BlueStore."),
+
+    Option("bluestore_bluefs_env_mirror", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .add_tag("mkfs")
+    .set_description("Mirror bluefs data to file system for testing/validation"),
+
+    Option("bluestore_bluefs_min", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1*1024*1024*1024)
+    .set_description("minimum disk space allocated to BlueFS (e.g., at mkfs)"),
+
+    Option("bluestore_bluefs_min_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.02)
+    .set_description("Minimum fraction of free space devoted to BlueFS"),
+
+    Option("bluestore_bluefs_max_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.90)
+    .set_description("Maximum fraction of free storage devoted to BlueFS"),
+
+    Option("bluestore_bluefs_gift_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.02)
+    .set_description("Maximum fraction of free space to give to BlueFS at once"),
+
+    Option("bluestore_bluefs_reclaim_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.20)
+    .set_description("Maximum fraction of free space to reclaim from BlueFS at once"),
+
+    Option("bluestore_bluefs_balance_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description("How frequently (in seconds) to balance free space between BlueFS and BlueStore"),
+
+    Option("bluestore_spdk_mem", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(512)
+    .set_description(""),
+
+    Option("bluestore_spdk_coremask", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("0x3")
+    .set_description(""),
+
+    Option("bluestore_spdk_max_io_completion", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("bluestore_block_path", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("")
+    .add_tag("mkfs")
+    .set_description("Path to block device/file"),
+
+    Option("bluestore_block_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(10ull * 1024*1024*1024)
+    .add_tag("mkfs")
+    .set_description("Size of file to create for backing bluestore"),
+
+    Option("bluestore_block_create", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .add_tag("mkfs")
+    .set_description("Create bluestore_block_path if it doesn't exist")
+    .add_see_also("bluestore_block_path").add_see_also("bluestore_block_size"),
+
+    Option("bluestore_block_db_path", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("")
+    .add_tag("mkfs")
+    .set_description("Path for db block device"),
+
+    Option("bluestore_block_db_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(0)
+    .add_tag("mkfs")
+    .set_description("Size of file to create for bluestore_block_db_path"),
+
+    Option("bluestore_block_db_create", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .add_tag("mkfs")
+    .set_description("Create bluestore_block_db_path if it doesn't exist")
+    .add_see_also("bluestore_block_db_path")
+    .add_see_also("bluestore_block_db_size"),
+
+    Option("bluestore_block_wal_path", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("")
+    .add_tag("mkfs")
+    .set_description("Path to block device/file backing bluefs wal"),
+
+    Option("bluestore_block_wal_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(96 * 1024*1024)
+    .add_tag("mkfs")
+    .set_description("Size of file to create for bluestore_block_wal_path"),
+
+    Option("bluestore_block_wal_create", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .add_tag("mkfs")
+    .set_description("Create bluestore_block_wal_path if it doesn't exist")
+    .add_see_also("bluestore_block_wal_path")
+    .add_see_also("bluestore_block_wal_size"),
+
+    Option("bluestore_block_preallocate_file", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .add_tag("mkfs")
+    .set_description("Preallocate file created via bluestore_block*_create"),
+
+    Option("bluestore_csum_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("crc32c")
+    .set_enum_allowed({"none", "crc32c", "crc32c_16", "crc32c_8", "xxhash32", "xxhash64"})
+    .set_safe()
+    .set_description("Default checksum algorithm to use")
+    .set_long_description("crc32c, xxhash32, and xxhash64 are available.  The _16 and _8 variants use only a subset of the bits for more compact (but less reliable) checksumming."),
+
+    Option("bluestore_csum_min_block", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_safe()
+    .set_description("Minimum block size to checksum")
+    .set_long_description("A larger checksum block means less checksum metadata to store, but results in read amplification when doing a read smaller than this size (because the entire block must be read to verify the checksum).")
+    .add_see_also("bluestore_csum_max_block"),
+
+    Option("bluestore_csum_max_block", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64*1024)
+    .set_safe()
+    .set_description("Maximum block size to checksum")
+    .add_see_also("bluestore_csum_min_block"),
+
+    Option("bluestore_min_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .add_tag("mkfs")
+    .set_description("Minimum allocation size to allocate for an object")
+    .set_long_description("A smaller allocation size generally means less data is read and then rewritten when a copy-on-write operation is triggered (e.g., when writing to something that was recently snapshotted).  Similarly, less data is journaled before performing an overwrite (writes smaller than min_alloc_size must first pass through the BlueStore journal).  Larger values of min_alloc_size reduce the amount of metadata required to describe the on-disk layout and reduce overall fragmentation."),
+
+    Option("bluestore_min_alloc_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64*1024)
+    .add_tag("mkfs")
+    .set_description("Default min_alloc_size value for rotational media"),
+
+    Option("bluestore_min_alloc_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16*1024)
+    .add_tag("mkfs")
+    .set_description("Default min_alloc_size value for non-rotational (solid state)  media"),
+
+    Option("bluestore_max_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .add_tag("mkfs")
+    .set_description("Maximum size of a single allocation (0 for no max)"),
+
+    Option("bluestore_prefer_deferred_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_safe()
+    .set_description("Writes smaller than this size will be written to the journal and then asynchronously written to the device.  This can be beneficial when using rotational media where seeks are expensive, and is helpful both with and without solid state journal/wal devices."),
+
+    Option("bluestore_prefer_deferred_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(32768)
+    .set_safe()
+    .set_description("Default bluestore_prefer_deferred_size for rotational media"),
+
+    Option("bluestore_prefer_deferred_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_safe()
+    .set_description("Default bluestore_prefer_deferred_size for non-rotational (solid state) media"),
+
+    Option("bluestore_compression_mode", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("none")
+    .set_enum_allowed({"none", "passive", "aggressive", "force"})
+    .set_safe()
+    .set_description("Default policy for using compression when pool does not specify")
+    .set_long_description("'none' means never use compression.  'passive' means use compression when clients hint that data is compressible.  'aggressive' means use compression unless clients hint that data is not compressible.  This option is used when the per-pool property for the compression mode is not present."),
+
+    Option("bluestore_compression_algorithm", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("snappy")
+    .set_enum_allowed({"", "snappy", "zlib", "zstd", "lz4"})
+    .set_safe()
+    .set_description("Default compression algorithm to use when writing object data")
+    .set_long_description("This controls the default compressor to use (if any) if the per-pool property is not set.  Note that zstd is *not* recommended for bluestore due to high CPU overhead when compressing small amounts of data."),
+
+    Option("bluestore_compression_min_blob_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_safe()
+    .set_description("Chunks smaller than this are never compressed"),
+
+    Option("bluestore_compression_min_blob_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1024)
+    .set_safe()
+    .set_description("Default value of bluestore_compression_min_blob_size for rotational media"),
+
+    Option("bluestore_compression_min_blob_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8*1024)
+    .set_safe()
+    .set_description("Default value of bluestore_compression_min_blob_size for non-rotational (solid state) media"),
+
+    Option("bluestore_compression_max_blob_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_safe()
+    .set_description("Chunks larger than this are broken into smaller chunks before being compressed"),
+
+    Option("bluestore_compression_max_blob_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(512*1024)
+    .set_safe()
+    .set_description("Default value of bluestore_compression_max_blob_size for rotational media"),
+
+    Option("bluestore_compression_max_blob_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64*1024)
+    .set_safe()
+    .set_description("Default value of bluestore_compression_max_blob_size for non-rotational (solid state) media"),
+
+    Option("bluestore_gc_enable_blob_threshold", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_safe()
+    .set_description(""),
+
+    Option("bluestore_gc_enable_total_threshold", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_safe()
+    .set_description(""),
+
+    Option("bluestore_max_blob_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_safe()
+    .set_description(""),
+
+    Option("bluestore_max_blob_size_hdd", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(512*1024)
+    .set_safe()
+    .set_description(""),
+
+    Option("bluestore_max_blob_size_ssd", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(64*1024)
+    .set_safe()
+    .set_description(""),
+
+    Option("bluestore_compression_required_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.875)
+    .set_safe()
+    .set_description("Compression ratio required to store compressed data")
+    .set_long_description("If we compress data and get less than this we discard the result and store the original uncompressed data."),
+
+    Option("bluestore_extent_map_shard_max_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(1200)
+    .set_description("Max size (bytes) for a single extent map shard before splitting"),
+
+    Option("bluestore_extent_map_shard_target_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(500)
+    .set_description("Target size (bytes) for a single extent map shard"),
+
+    Option("bluestore_extent_map_shard_min_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(150)
+    .set_description("Min size (bytes) for a single extent map shard before merging"),
+
+    Option("bluestore_extent_map_shard_target_size_slop", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(.2)
+    .set_description("Ratio above/below target for a shard when trying to align to an existing extent or blob boundary"),
+
+    Option("bluestore_extent_map_inline_shard_prealloc_size", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(256)
+    .set_description("Preallocated buffer for inline shards"),
+
+    Option("bluestore_cache_trim_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.2)
+    .set_description("How frequently we trim the bluestore cache"),
+
+    Option("bluestore_cache_trim_max_skip_pinned", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(64)
+    .set_description("Max pinned cache entries we consider before giving up"),
+
+    Option("bluestore_cache_type", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("2q")
+    .set_enum_allowed({"2q", "lru"})
+    .set_description("Cache replacement algorithm"),
+
+    Option("bluestore_2q_cache_kin_ratio", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(.5)
+    .set_description("2Q paper suggests .5"),
+
+    Option("bluestore_2q_cache_kout_ratio", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(.5)
+    .set_description("2Q paper suggests .5"),
+
+    Option("bluestore_cache_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description("Cache size (in bytes) for BlueStore")
+    .set_long_description("This includes data and metadata cached by BlueStore as well as memory devoted to rocksdb's cache(s)."),
+
+    Option("bluestore_cache_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1ull*1024*1024*1024)
+    .set_description("Default bluestore_cache_size for rotational media"),
+
+    Option("bluestore_cache_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(3ull*1024*1024*1024)
+    .set_description("Default bluestore_cache_size for non-rotational (solid state) media"),
+
+    Option("bluestore_cache_meta_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.01)
+    .set_description("Ratio of bluestore cache to devote to metadata"),
+
+    Option("bluestore_cache_kv_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.99)
+    .set_description("Ratio of bluestore cache to devote to kv database (rocksdb)"),
+
+    Option("bluestore_cache_kv_max", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(512*1024*1024)
+    .set_description("Max memory (bytes) to devote to kv database (rocksdb)"),
+
+    Option("bluestore_kvbackend", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("rocksdb")
+    .add_tag("mkfs")
+    .set_description("Key value database to use for bluestore"),
+
+    Option("bluestore_allocator", Option::TYPE_STR, Option::LEVEL_DEV)
+    .set_default("bitmap")
+    .add_tag("mkfs")
+    .set_description(""),
+
+    Option("bluestore_freelist_blocks_per_key", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(128)
+    .set_description("Block (and bits) per database key"),
+
+    Option("bluestore_bitmapallocator_blocks_per_zone", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(1024)
+    .set_description(""),
+
+    Option("bluestore_bitmapallocator_span_size", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(1024)
+    .set_description(""),
+
+    Option("bluestore_max_deferred_txc", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description("Max transactions with deferred writes that can accumulate before we force flush deferred writes"),
+
+    Option("bluestore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
+    .set_description("Rocksdb options"),
+
+    Option("bluestore_fsck_on_mount", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description("Run fsck at mount"),
+
+    Option("bluestore_fsck_on_mount_deep", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Run deep fsck at mount"),
+
+    Option("bluestore_fsck_on_umount", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description("Run fsck at umount"),
+
+    Option("bluestore_fsck_on_umount_deep", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Run deep fsck at umount"),
+
+    Option("bluestore_fsck_on_mkfs", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(true)
+    .set_description("Run fsck after mkfs"),
+
+    Option("bluestore_fsck_on_mkfs_deep", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description("Run deep fsck after mkfs"),
+
+    Option("bluestore_sync_submit_transaction", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description("Try to submit metadata transaction to rocksdb in queuing thread context"),
+
+    Option("bluestore_throttle_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64*1024*1024)
+    .set_safe()
+    .set_description("Maximum bytes in flight before we throttle IO submission"),
+
+    Option("bluestore_throttle_deferred_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1024*1024)
+    .set_safe()
+    .set_description("Maximum bytes for deferred writes before we throttle IO submission"),
+
+    Option("bluestore_throttle_cost_per_io", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_safe()
+    .set_description("Overhead added to transaction cost (in bytes) for each IO"),
+
+  Option("bluestore_throttle_cost_per_io_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(670000)
+    .set_safe()
+    .set_description("Default bluestore_throttle_cost_per_io for rotational media"),
+
+    Option("bluestore_throttle_cost_per_io_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4000)
+    .set_safe()
+    .set_description("Default bluestore_throttle_cost_per_io for non-rotation (solid state) media"),
+
+
+    Option("bluestore_deferred_batch_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_safe()
+    .set_description("Max number of deferred writes before we flush the deferred write queue"),
+
+    Option("bluestore_deferred_batch_ops_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_safe()
+    .set_description("Default bluestore_deferred_batch_ops for rotational media"),
+
+    Option("bluestore_deferred_batch_ops_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16)
+    .set_safe()
+    .set_description("Default bluestore_deferred_batch_ops for non-rotational (solid state) media"),
+
+    Option("bluestore_nid_prealloc", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(1024)
+    .set_description("Number of unique object ids to preallocate at a time"),
+
+    Option("bluestore_blobid_prealloc", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(10240)
+    .set_description("Number of unique blob ids to preallocate at a time"),
+
+    Option("bluestore_clone_cow", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_safe()
+    .set_description("Use copy-on-write when cloning objects (versus reading and rewriting them at clone time)"),
+
+    Option("bluestore_default_buffered_read", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_safe()
+    .set_description("Cache read results by default (unless hinted NOCACHE or WONTNEED)"),
+
+    Option("bluestore_default_buffered_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_safe()
+    .set_description("Cache writes by default (unless hinted NOCACHE or WONTNEED)"),
+
+    Option("bluestore_debug_misc", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_no_reuse_blocks", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_small_allocations", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("bluestore_debug_freelist", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_prefill", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description("simulate fragmentation"),
+
+    Option("bluestore_debug_prefragment_max", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(1048576)
+    .set_description(""),
+
+    Option("bluestore_debug_inject_read_err", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_randomize_serial_transaction", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
+    Option("bluestore_debug_omit_block_device_write", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_fsck_abort", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_omit_kv_commit", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_permit_any_bdev_label", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_shard_finishers", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("bluestore_debug_random_read_err", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("bdev_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    // -----------------------------------------
+    // kstore
 
-  Option("bdev_aio_poll_ms", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(250)
-  .set_description(""),
+    Option("kstore_max_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(512)
+    .set_description(""),
+
+    Option("kstore_max_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64*1024*1024)
+    .set_description(""),
 
-  Option("bdev_aio_max_queue_depth", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("kstore_backend", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("rocksdb")
+    .set_description(""),
 
-  Option("bdev_aio_reap_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16)
-  .set_description(""),
+    Option("kstore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("compression=kNoCompression")
+    .set_description(""),
 
-  Option("bdev_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("kstore_fsck_on_mount", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("bdev_debug_aio", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("kstore_fsck_on_mount_deep", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("bdev_debug_aio_suicide_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(60.0)
-  .set_description(""),
+    Option("kstore_nid_prealloc", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("bdev_nvme_unbind_from_kernel", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("kstore_sync_transaction", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("bdev_nvme_retry_count", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("kstore_sync_submit_transaction", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("bluefs_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1048576)
-  .set_description(""),
-
-  Option("bluefs_max_prefetch", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1048576)
-  .set_description(""),
-
-  Option("bluefs_min_log_runway", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1048576)
-  .set_description(""),
-
-  Option("bluefs_max_log_runway", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4194304)
-  .set_description(""),
-
-  Option("bluefs_log_compact_min_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5.0)
-  .set_description(""),
-
-  Option("bluefs_log_compact_min_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16*1048576)
-  .set_description(""),
-
-  Option("bluefs_min_flush_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(524288)
-  .set_description(""),
-
-  Option("bluefs_compact_log_sync", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluefs_buffered_io", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluefs_sync_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluefs_allocator", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("bitmap")
-  .set_description(""),
-
-  Option("bluefs_preextend_wal_files", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_bluefs", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .add_tag("mkfs")
-  .set_description("Use BlueFS to back rocksdb")
-  .set_long_description("BlueFS allows rocksdb to share the same physical device(s) as the rest of BlueStore.  It should be used in all cases unless testing/developing an alternative metadata database for BlueStore."),
-
-  Option("bluestore_bluefs_env_mirror", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .add_tag("mkfs")
-  .set_description("Mirror bluefs data to file system for testing/validation"),
-
-  Option("bluestore_bluefs_min", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1*1024*1024*1024)
-  .set_description("minimum disk space allocated to BlueFS (e.g., at mkfs)"),
-
-  Option("bluestore_bluefs_min_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.02)
-  .set_description("Minimum fraction of free space devoted to BlueFS"),
-
-  Option("bluestore_bluefs_max_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.90)
-  .set_description("Maximum fraction of free storage devoted to BlueFS"),
-
-  Option("bluestore_bluefs_gift_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.02)
-  .set_description("Maximum fraction of free space to give to BlueFS at once"),
-
-  Option("bluestore_bluefs_reclaim_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.20)
-  .set_description("Maximum fraction of free space to reclaim from BlueFS at once"),
-
-  Option("bluestore_bluefs_balance_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description("How frequently (in seconds) to balance free space between BlueFS and BlueStore"),
-
-  Option("bluestore_spdk_mem", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(512)
-  .set_description(""),
-
-  Option("bluestore_spdk_coremask", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("0x3")
-  .set_description(""),
-
-  Option("bluestore_spdk_max_io_completion", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("bluestore_block_path", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("")
-  .add_tag("mkfs")
-  .set_description("Path to block device/file"),
-
-  Option("bluestore_block_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(10ull * 1024*1024*1024)
-  .add_tag("mkfs")
-  .set_description("Size of file to create for backing bluestore"),
-
-  Option("bluestore_block_create", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .add_tag("mkfs")
-  .set_description("Create bluestore_block_path if it doesn't exist")
-  .add_see_also("bluestore_block_path").add_see_also("bluestore_block_size"),
-
-  Option("bluestore_block_db_path", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("")
-  .add_tag("mkfs")
-  .set_description("Path for db block device"),
-
-  Option("bluestore_block_db_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(0)
-  .add_tag("mkfs")
-  .set_description("Size of file to create for bluestore_block_db_path"),
-
-  Option("bluestore_block_db_create", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .add_tag("mkfs")
-  .set_description("Create bluestore_block_db_path if it doesn't exist")
-  .add_see_also("bluestore_block_db_path")
-  .add_see_also("bluestore_block_db_size"),
-
-  Option("bluestore_block_wal_path", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("")
-  .add_tag("mkfs")
-  .set_description("Path to block device/file backing bluefs wal"),
-
-  Option("bluestore_block_wal_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(96 * 1024*1024)
-  .add_tag("mkfs")
-  .set_description("Size of file to create for bluestore_block_wal_path"),
-
-  Option("bluestore_block_wal_create", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .add_tag("mkfs")
-  .set_description("Create bluestore_block_wal_path if it doesn't exist")
-  .add_see_also("bluestore_block_wal_path")
-  .add_see_also("bluestore_block_wal_size"),
-
-  Option("bluestore_block_preallocate_file", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .add_tag("mkfs")
-  .set_description("Preallocate file created via bluestore_block*_create"),
-
-  Option("bluestore_csum_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("crc32c")
-  .set_enum_allowed({"none", "crc32c", "crc32c_16", "crc32c_8", "xxhash32", "xxhash64"})
-  .set_safe()
-  .set_description("Default checksum algorithm to use")
-  .set_long_description("crc32c, xxhash32, and xxhash64 are available.  The _16 and _8 variants use only a subset of the bits for more compact (but less reliable) checksumming."),
-
-  Option("bluestore_csum_min_block", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_safe()
-  .set_description("Minimum block size to checksum")
-  .set_long_description("A larger checksum block means less checksum metadata to store, but results in read amplification when doing a read smaller than this size (because the entire block must be read to verify the checksum).")
-  .add_see_also("bluestore_csum_max_block"),
-
-  Option("bluestore_csum_max_block", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64*1024)
-  .set_safe()
-  .set_description("Maximum block size to checksum")
-  .add_see_also("bluestore_csum_min_block"),
-
-  Option("bluestore_min_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .add_tag("mkfs")
-  .set_description("Minimum allocation size to allocate for an object")
-  .set_long_description("A smaller allocation size generally means less data is read and then rewritten when a copy-on-write operation is triggered (e.g., when writing to something that was recently snapshotted).  Similarly, less data is journaled before performing an overwrite (writes smaller than min_alloc_size must first pass through the BlueStore journal).  Larger values of min_alloc_size reduce the amount of metadata required to describe the on-disk layout and reduce overall fragmentation."),
-
-  Option("bluestore_min_alloc_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64*1024)
-  .add_tag("mkfs")
-  .set_description("Default min_alloc_size value for rotational media"),
-
-  Option("bluestore_min_alloc_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16*1024)
-  .add_tag("mkfs")
-  .set_description("Default min_alloc_size value for non-rotational (solid state)  media"),
-
-  Option("bluestore_max_alloc_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .add_tag("mkfs")
-  .set_description("Maximum size of a single allocation (0 for no max)"),
-
-  Option("bluestore_prefer_deferred_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_safe()
-  .set_description("Writes smaller than this size will be written to the journal and then asynchronously written to the device.  This can be beneficial when using rotational media where seeks are expensive, and is helpful both with and without solid state journal/wal devices."),
-
-  Option("bluestore_prefer_deferred_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(32768)
-  .set_safe()
-  .set_description("Default bluestore_prefer_deferred_size for rotational media"),
-
-  Option("bluestore_prefer_deferred_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_safe()
-  .set_description("Default bluestore_prefer_deferred_size for non-rotational (solid state) media"),
-
-  Option("bluestore_compression_mode", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("none")
-  .set_enum_allowed({"none", "passive", "aggressive", "force"})
-  .set_safe()
-  .set_description("Default policy for using compression when pool does not specify")
-  .set_long_description("'none' means never use compression.  'passive' means use compression when clients hint that data is compressible.  'aggressive' means use compression unless clients hint that data is not compressible.  This option is used when the per-pool property for the compression mode is not present."),
-
-  Option("bluestore_compression_algorithm", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("snappy")
-  .set_enum_allowed({"", "snappy", "zlib", "zstd", "lz4"})
-  .set_safe()
-  .set_description("Default compression algorithm to use when writing object data")
-  .set_long_description("This controls the default compressor to use (if any) if the per-pool property is not set.  Note that zstd is *not* recommended for bluestore due to high CPU overhead when compressing small amounts of data."),
-
-  Option("bluestore_compression_min_blob_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_safe()
-  .set_description("Chunks smaller than this are never compressed"),
-
-  Option("bluestore_compression_min_blob_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1024)
-  .set_safe()
-  .set_description("Default value of bluestore_compression_min_blob_size for rotational media"),
-
-  Option("bluestore_compression_min_blob_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8*1024)
-  .set_safe()
-  .set_description("Default value of bluestore_compression_min_blob_size for non-rotational (solid state) media"),
-
-  Option("bluestore_compression_max_blob_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_safe()
-  .set_description("Chunks larger than this are broken into smaller chunks before being compressed"),
-
-  Option("bluestore_compression_max_blob_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(512*1024)
-  .set_safe()
-  .set_description("Default value of bluestore_compression_max_blob_size for rotational media"),
-
-  Option("bluestore_compression_max_blob_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64*1024)
-  .set_safe()
-  .set_description("Default value of bluestore_compression_max_blob_size for non-rotational (solid state) media"),
-
-  Option("bluestore_gc_enable_blob_threshold", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_safe()
-  .set_description(""),
-
-  Option("bluestore_gc_enable_total_threshold", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_safe()
-  .set_description(""),
-
-  Option("bluestore_max_blob_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_safe()
-  .set_description(""),
-
-  Option("bluestore_max_blob_size_hdd", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(512*1024)
-  .set_safe()
-  .set_description(""),
-
-  Option("bluestore_max_blob_size_ssd", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(64*1024)
-  .set_safe()
-  .set_description(""),
-
-  Option("bluestore_compression_required_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.875)
-  .set_safe()
-  .set_description("Compression ratio required to store compressed data")
-  .set_long_description("If we compress data and get less than this we discard the result and store the original uncompressed data."),
-
-  Option("bluestore_extent_map_shard_max_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(1200)
-  .set_description("Max size (bytes) for a single extent map shard before splitting"),
-
-  Option("bluestore_extent_map_shard_target_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(500)
-  .set_description("Target size (bytes) for a single extent map shard"),
-
-  Option("bluestore_extent_map_shard_min_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(150)
-  .set_description("Min size (bytes) for a single extent map shard before merging"),
-
-  Option("bluestore_extent_map_shard_target_size_slop", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(.2)
-  .set_description("Ratio above/below target for a shard when trying to align to an existing extent or blob boundary"),
-
-  Option("bluestore_extent_map_inline_shard_prealloc_size", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(256)
-  .set_description("Preallocated buffer for inline shards"),
-
-  Option("bluestore_cache_trim_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.2)
-  .set_description("How frequently we trim the bluestore cache"),
-
-  Option("bluestore_cache_trim_max_skip_pinned", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(64)
-  .set_description("Max pinned cache entries we consider before giving up"),
-
-  Option("bluestore_cache_type", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("2q")
-  .set_enum_allowed({"2q", "lru"})
-  .set_description("Cache replacement algorithm"),
-
-  Option("bluestore_2q_cache_kin_ratio", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(.5)
-  .set_description("2Q paper suggests .5"),
-
-  Option("bluestore_2q_cache_kout_ratio", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(.5)
-  .set_description("2Q paper suggests .5"),
-
-  Option("bluestore_cache_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description("Cache size (in bytes) for BlueStore")
-  .set_long_description("This includes data and metadata cached by BlueStore as well as memory devoted to rocksdb's cache(s)."),
-
-  Option("bluestore_cache_size_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1ull*1024*1024*1024)
-  .set_description("Default bluestore_cache_size for rotational media"),
-
-  Option("bluestore_cache_size_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(3ull*1024*1024*1024)
-  .set_description("Default bluestore_cache_size for non-rotational (solid state) media"),
-
-  Option("bluestore_cache_meta_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.01)
-  .set_description("Ratio of bluestore cache to devote to metadata"),
-
-  Option("bluestore_cache_kv_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.99)
-  .set_description("Ratio of bluestore cache to devote to kv database (rocksdb)"),
-
-  Option("bluestore_cache_kv_max", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(512*1024*1024)
-  .set_description("Max memory (bytes) to devote to kv database (rocksdb)"),
-
-  Option("bluestore_kvbackend", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("rocksdb")
-  .add_tag("mkfs")
-  .set_description("Key value database to use for bluestore"),
-
-  Option("bluestore_allocator", Option::TYPE_STR, Option::LEVEL_DEV)
-  .set_default("bitmap")
-  .add_tag("mkfs")
-  .set_description(""),
-
-  Option("bluestore_freelist_blocks_per_key", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(128)
-  .set_description("Block (and bits) per database key"),
-
-  Option("bluestore_bitmapallocator_blocks_per_zone", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(1024)
-  .set_description(""),
-
-  Option("bluestore_bitmapallocator_span_size", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(1024)
-  .set_description(""),
-
-  Option("bluestore_max_deferred_txc", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description("Max transactions with deferred writes that can accumulate before we force flush deferred writes"),
-
-  Option("bluestore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
-  .set_description("Rocksdb options"),
-
-  Option("bluestore_fsck_on_mount", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description("Run fsck at mount"),
-
-  Option("bluestore_fsck_on_mount_deep", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .set_description("Run deep fsck at mount"),
-
-  Option("bluestore_fsck_on_umount", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description("Run fsck at umount"),
-
-  Option("bluestore_fsck_on_umount_deep", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .set_description("Run deep fsck at umount"),
-
-  Option("bluestore_fsck_on_mkfs", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(true)
-  .set_description("Run fsck after mkfs"),
-
-  Option("bluestore_fsck_on_mkfs_deep", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description("Run deep fsck after mkfs"),
-
-  Option("bluestore_sync_submit_transaction", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description("Try to submit metadata transaction to rocksdb in queuing thread context"),
-
-  Option("bluestore_throttle_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64*1024*1024)
-  .set_safe()
-  .set_description("Maximum bytes in flight before we throttle IO submission"),
-
-  Option("bluestore_throttle_deferred_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1024*1024)
-  .set_safe()
-  .set_description("Maximum bytes for deferred writes before we throttle IO submission"),
-
-  Option("bluestore_throttle_cost_per_io", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_safe()
-  .set_description("Overhead added to transaction cost (in bytes) for each IO"),
-
-Option("bluestore_throttle_cost_per_io_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(670000)
-  .set_safe()
-  .set_description("Default bluestore_throttle_cost_per_io for rotational media"),
-
-  Option("bluestore_throttle_cost_per_io_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4000)
-  .set_safe()
-  .set_description("Default bluestore_throttle_cost_per_io for non-rotation (solid state) media"),
-
-
-  Option("bluestore_deferred_batch_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_safe()
-  .set_description("Max number of deferred writes before we flush the deferred write queue"),
-
-  Option("bluestore_deferred_batch_ops_hdd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64)
-  .set_safe()
-  .set_description("Default bluestore_deferred_batch_ops for rotational media"),
-
-  Option("bluestore_deferred_batch_ops_ssd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16)
-  .set_safe()
-  .set_description("Default bluestore_deferred_batch_ops for non-rotational (solid state) media"),
-
-  Option("bluestore_nid_prealloc", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(1024)
-  .set_description("Number of unique object ids to preallocate at a time"),
-
-  Option("bluestore_blobid_prealloc", Option::TYPE_UINT, Option::LEVEL_DEV)
-  .set_default(10240)
-  .set_description("Number of unique blob ids to preallocate at a time"),
-
-  Option("bluestore_clone_cow", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_safe()
-  .set_description("Use copy-on-write when cloning objects (versus reading and rewriting them at clone time)"),
-
-  Option("bluestore_default_buffered_read", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_safe()
-  .set_description("Cache read results by default (unless hinted NOCACHE or WONTNEED)"),
-
-  Option("bluestore_default_buffered_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_safe()
-  .set_description("Cache writes by default (unless hinted NOCACHE or WONTNEED)"),
-
-  Option("bluestore_debug_misc", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_no_reuse_blocks", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_small_allocations", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("bluestore_debug_freelist", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_prefill", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description("simulate fragmentation"),
-
-  Option("bluestore_debug_prefragment_max", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(1048576)
-  .set_description(""),
-
-  Option("bluestore_debug_inject_read_err", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_randomize_serial_transaction", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
-
-  Option("bluestore_debug_omit_block_device_write", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_fsck_abort", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_omit_kv_commit", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_permit_any_bdev_label", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_shard_finishers", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("bluestore_debug_random_read_err", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("kstore_onode_map_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  // -----------------------------------------
-  // kstore
+    Option("kstore_default_stripe_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
 
-  Option("kstore_max_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(512)
-  .set_description(""),
-
-  Option("kstore_max_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64*1024*1024)
-  .set_description(""),
+    // ---------------------
+    // filestore
 
-  Option("kstore_backend", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("rocksdb")
-  .set_description(""),
+    Option("filestore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("kstore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("compression=kNoCompression")
-  .set_description(""),
+    Option("filestore_omap_backend", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("rocksdb")
+    .set_description(""),
 
-  Option("kstore_fsck_on_mount", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_omap_backend_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("kstore_fsck_on_mount_deep", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("filestore_wbthrottle_enable", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("kstore_nid_prealloc", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("filestore_wbthrottle_btrfs_bytes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(41943040)
+    .set_description(""),
 
-  Option("kstore_sync_transaction", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_wbthrottle_btrfs_bytes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(419430400)
+    .set_description(""),
 
-  Option("kstore_sync_submit_transaction", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_wbthrottle_btrfs_ios_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("kstore_onode_map_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("filestore_wbthrottle_btrfs_ios_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5000)
+    .set_description(""),
 
-  Option("kstore_default_stripe_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
+    Option("filestore_wbthrottle_btrfs_inodes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  // ---------------------
-  // filestore
+    Option("filestore_wbthrottle_xfs_bytes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(41943040)
+    .set_description(""),
 
-  Option("filestore_rocksdb_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("filestore_wbthrottle_xfs_bytes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(419430400)
+    .set_description(""),
 
-  Option("filestore_omap_backend", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("rocksdb")
-  .set_description(""),
+    Option("filestore_wbthrottle_xfs_ios_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("filestore_omap_backend_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("filestore_wbthrottle_xfs_ios_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5000)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_enable", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("filestore_wbthrottle_xfs_inodes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(500)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_btrfs_bytes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(41943040)
-  .set_description(""),
+    Option("filestore_wbthrottle_btrfs_inodes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5000)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_btrfs_bytes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(419430400)
-  .set_description(""),
+    Option("filestore_wbthrottle_xfs_inodes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5000)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_btrfs_ios_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("filestore_odsync_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_btrfs_ios_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5000)
-  .set_description(""),
+    Option("filestore_index_retry_probability", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_btrfs_inodes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("filestore_debug_inject_read_err", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_xfs_bytes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(41943040)
-  .set_description(""),
+    Option("filestore_debug_random_read_err", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_xfs_bytes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(419430400)
-  .set_description(""),
+    Option("filestore_debug_omap_check", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_xfs_ios_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("filestore_omap_header_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_xfs_ios_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5000)
-  .set_description(""),
+    Option("filestore_max_inline_xattr_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_xfs_inodes_start_flusher", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(500)
-  .set_description(""),
+    Option("filestore_max_inline_xattr_size_xfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_btrfs_inodes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5000)
-  .set_description(""),
+    Option("filestore_max_inline_xattr_size_btrfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(2048)
+    .set_description(""),
 
-  Option("filestore_wbthrottle_xfs_inodes_hard_limit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5000)
-  .set_description(""),
+    Option("filestore_max_inline_xattr_size_other", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(512)
+    .set_description(""),
 
-  Option("filestore_odsync_write", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_max_inline_xattrs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_index_retry_probability", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_max_inline_xattrs_xfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("filestore_debug_inject_read_err", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_max_inline_xattrs_btrfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("filestore_debug_random_read_err", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_max_inline_xattrs_other", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("filestore_debug_omap_check", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_max_xattr_value_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_omap_header_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("filestore_max_xattr_value_size_xfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64<<10)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattr_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_max_xattr_value_size_btrfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64<<10)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattr_size_xfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
+    Option("filestore_max_xattr_value_size_other", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1<<10)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattr_size_btrfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(2048)
-  .set_description(""),
+    Option("filestore_sloppy_crc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattr_size_other", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(512)
-  .set_description(""),
+    Option("filestore_sloppy_crc_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(65536)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattrs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_max_alloc_hint_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1ULL << 20)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattrs_xfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("filestore_max_sync_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattrs_btrfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("filestore_min_sync_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.01)
+    .set_description(""),
 
-  Option("filestore_max_inline_xattrs_other", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("filestore_btrfs_snap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_max_xattr_value_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_btrfs_clone_range", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_max_xattr_value_size_xfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64<<10)
-  .set_description(""),
+    Option("filestore_zfs_snap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_max_xattr_value_size_btrfs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64<<10)
-  .set_description(""),
+    Option("filestore_fsync_flushes_journal_data", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_max_xattr_value_size_other", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1<<10)
-  .set_description(""),
+    Option("filestore_fiemap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_sloppy_crc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_punch_hole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_sloppy_crc_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(65536)
-  .set_description(""),
+    Option("filestore_seek_data_hole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_max_alloc_hint_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1ULL << 20)
-  .set_description(""),
+    Option("filestore_splice", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_max_sync_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("filestore_fadvise", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_min_sync_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.01)
-  .set_description(""),
+    Option("filestore_collect_device_partition_information", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_btrfs_snap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("filestore_xfs_extsize", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_btrfs_clone_range", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("filestore_journal_parallel", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_zfs_snap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_journal_writeahead", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_fsync_flushes_journal_data", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_journal_trailing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_fiemap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_queue_max_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(50)
+    .set_description(""),
 
-  Option("filestore_punch_hole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_queue_max_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100 << 20)
+    .set_description(""),
 
-  Option("filestore_seek_data_hole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_caller_concurrency", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("filestore_splice", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_expected_throughput_bytes", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(200 << 20)
+    .set_description(""),
 
-  Option("filestore_fadvise", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("filestore_expected_throughput_ops", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(200)
+    .set_description(""),
 
-  Option("filestore_collect_device_partition_information", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("filestore_queue_max_delay_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_xfs_extsize", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_queue_high_delay_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_journal_parallel", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_queue_low_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.3)
+    .set_description(""),
 
-  Option("filestore_journal_writeahead", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_queue_high_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.9)
+    .set_description(""),
 
-  Option("filestore_journal_trailing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("filestore_op_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("filestore_queue_max_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(50)
-  .set_description(""),
+    Option("filestore_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("filestore_queue_max_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100 << 20)
-  .set_description(""),
+    Option("filestore_op_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(180)
+    .set_description(""),
 
-  Option("filestore_caller_concurrency", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("filestore_commit_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("filestore_expected_throughput_bytes", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(200 << 20)
-  .set_description(""),
+    Option("filestore_fiemap_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("filestore_expected_throughput_ops", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(200)
-  .set_description(""),
+    Option("filestore_merge_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("filestore_queue_max_delay_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_split_multiple", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("filestore_queue_high_delay_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("filestore_split_rand_factor", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
 
-  Option("filestore_queue_low_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.3)
-  .set_description(""),
+    Option("filestore_update_to", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("filestore_queue_high_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.9)
-  .set_description(""),
+    Option("filestore_blackhole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_op_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("filestore_fd_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("filestore_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("filestore_fd_cache_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16)
+    .set_description(""),
 
-  Option("filestore_op_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(180)
-  .set_description(""),
+    Option("filestore_ondisk_finisher_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("filestore_commit_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("filestore_apply_finisher_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("filestore_fiemap_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("filestore_dump_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("filestore_merge_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("filestore_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_split_multiple", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("filestore_inject_stall", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_split_rand_factor", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("filestore_fail_eio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_update_to", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("filestore_debug_verify_split", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_blackhole", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("journal_dio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_fd_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("journal_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_fd_cache_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16)
-  .set_description(""),
+    Option("journal_force_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("filestore_ondisk_finisher_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("journal_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("filestore_apply_finisher_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("journal_max_corrupt_search", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(10<<20)
+    .set_description(""),
 
-  Option("filestore_dump_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("journal_block_align", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("filestore_kill_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("journal_write_header_frequency", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("filestore_inject_stall", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("journal_max_write_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10 << 20)
+    .set_description(""),
 
-  Option("filestore_fail_eio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("journal_max_write_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("filestore_debug_verify_split", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("journal_throttle_low_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.6)
+    .set_description(""),
 
-  Option("journal_dio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("journal_throttle_high_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.9)
+    .set_description(""),
 
-  Option("journal_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("journal_throttle_high_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("journal_force_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("journal_throttle_max_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("journal_block_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("journal_align_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(64 << 10)
+    .set_description(""),
 
-  Option("journal_max_corrupt_search", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(10<<20)
-  .set_description(""),
+    Option("journal_replay_from", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("journal_block_align", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("journal_zero_on_create", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("journal_write_header_frequency", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("journal_ignore_corruption", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("journal_max_write_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10 << 20)
-  .set_description(""),
+    Option("journal_discard", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("journal_max_write_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("fio_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/tmp/fio")
+    .set_description(""),
 
-  Option("journal_throttle_low_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.6)
-  .set_description(""),
+    Option("rados_mon_op_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("journal_throttle_high_threshhold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.9)
-  .set_description(""),
+    Option("rados_osd_op_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("journal_throttle_high_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rados_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("journal_throttle_max_multiple", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("nss_db_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("journal_align_min_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(64 << 10)
-  .set_description(""),
+    Option("mgr_module_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(CEPH_PKGLIBDIR "/mgr")
+    .set_description(""),
 
-  Option("journal_replay_from", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mgr_initial_modules", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("restful status")
+    .set_description(""),
 
-  Option("journal_zero_on_create", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mgr_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/lib/ceph/mgr/$cluster-$id")
+    .set_description(""),
 
-  Option("journal_ignore_corruption", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mgr_tick_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("journal_discard", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mgr_stats_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("fio_dir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/tmp/fio")
-  .set_description(""),
+    Option("mgr_client_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1048576)
+    .set_description(""),
 
-  Option("rados_mon_op_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mgr_client_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(512)
+    .set_description(""),
 
-  Option("rados_osd_op_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mgr_osd_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(512*1048576)
+    .set_description(""),
 
-  Option("rados_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mgr_osd_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8192)
+    .set_description(""),
 
-  Option("nss_db_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("mgr_mds_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1048576)
+    .set_description(""),
 
-  Option("mgr_module_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(CEPH_PKGLIBDIR "/mgr")
-  .set_description(""),
+    Option("mgr_mds_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("mgr_initial_modules", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("restful status")
-  .set_description(""),
+    Option("mgr_mon_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128*1048576)
+    .set_description(""),
 
-  Option("mgr_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/lib/ceph/mgr/$cluster-$id")
-  .set_description(""),
+    Option("mgr_mon_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("mgr_tick_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("mgr_connect_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
 
-  Option("mgr_stats_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mgr_service_beacon_grace", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60.0)
+    .set_description(""),
 
-  Option("mgr_client_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1048576)
-  .set_description(""),
+    Option("mon_mgr_digest_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mgr_client_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(512)
-  .set_description(""),
+    Option("mon_mgr_beacon_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mgr_osd_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(512*1048576)
-  .set_description(""),
+    Option("mon_mgr_inactive_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("mgr_osd_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8192)
-  .set_description(""),
+    Option("mon_mgr_mkfs_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("mgr_mds_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1048576)
-  .set_description(""),
+    Option("mutex_perf_counter", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mgr_mds_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("throttler_perf_counter", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mgr_mon_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128*1048576)
-  .set_description(""),
+    Option("event_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mgr_mon_messages", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("internal_safe_to_start_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mgr_connect_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("debug_deliberately_leak_memory", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+  });
+}
 
-  Option("mgr_service_beacon_grace", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60.0)
-  .set_description(""),
+std::vector<Option> get_rgw_options() {
+  return std::vector<Option>({
+    Option("rgw_acl_grants_max_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("mon_mgr_digest_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("rgw_max_chunk_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4 * 1024 * 1024)
+    .set_description(""),
 
-  Option("mon_mgr_beacon_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rgw_put_obj_min_window_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16 * 1024 * 1024)
+    .set_description(""),
 
-  Option("mon_mgr_inactive_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("rgw_put_obj_max_window_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(64 * 1024 * 1024)
+    .set_description(""),
 
-  Option("mon_mgr_mkfs_grace", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("rgw_max_put_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5ULL*1024*1024*1024)
+    .set_description(""),
 
-  Option("mutex_perf_counter", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_max_put_param_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1 * 1024 * 1024)
+    .set_description(""),
 
-  Option("throttler_perf_counter", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_override_bucket_index_max_shards", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("event_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_bucket_index_max_aio", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8)
+    .set_description(""),
 
-  Option("internal_safe_to_start_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_enable_quota_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("debug_deliberately_leak_memory", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-};
-
-std::vector<Option> rgw_options = {
-  Option("rgw_acl_grants_max_num", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("rgw_enable_gc_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_max_chunk_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4 * 1024 * 1024)
-  .set_description(""),
+    Option("rgw_enable_lc_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_put_obj_min_window_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16 * 1024 * 1024)
-  .set_description(""),
+    Option("rgw_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/lib/ceph/radosgw/$cluster-$id")
+    .set_description(""),
 
-  Option("rgw_put_obj_max_window_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(64 * 1024 * 1024)
-  .set_description(""),
+    Option("rgw_enable_apis", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("s3, s3website, swift, swift_auth, admin")
+    .set_description(""),
 
-  Option("rgw_max_put_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5ULL*1024*1024*1024)
-  .set_description(""),
+    Option("rgw_cache_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_max_put_param_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1 * 1024 * 1024)
-  .set_description(""),
+    Option("rgw_cache_lru_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("rgw_override_bucket_index_max_shards", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rgw_socket_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_bucket_index_max_aio", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8)
-  .set_description(""),
+    Option("rgw_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_enable_quota_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_port", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_enable_gc_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_dns_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_enable_lc_threads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_dns_s3website_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/lib/ceph/radosgw/$cluster-$id")
-  .set_description(""),
+    Option("rgw_content_length_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_enable_apis", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("s3, s3website, swift, swift_auth, admin")
-  .set_description(""),
+    Option("rgw_lifecycle_work_time", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("00:00-06:00")
+    .set_description(""),
 
-  Option("rgw_cache_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_lc_lock_max_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("rgw_cache_lru_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("rgw_lc_max_objs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("rgw_socket_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_lc_debug_interval", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("rgw_host", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_mp_lock_max_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("rgw_port", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_script_uri", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_dns_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_request_uri", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_dns_s3website_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_swift_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_content_length_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_swift_url_prefix", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("swift")
+    .set_description(""),
 
-  Option("rgw_lifecycle_work_time", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("00:00-06:00")
-  .set_description(""),
+    Option("rgw_swift_auth_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_lc_lock_max_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("rgw_swift_auth_entry", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("auth")
+    .set_description(""),
 
-  Option("rgw_lc_max_objs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("rgw_swift_tenant_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_lc_debug_interval", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(-1)
-  .set_description(""),
+    Option("rgw_swift_account_in_url", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_script_uri", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_swift_enforce_content_length", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_request_uri", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_admin_token", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_url_prefix", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("swift")
-  .set_description(""),
+    Option("rgw_keystone_admin_user", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_auth_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_admin_password", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_auth_entry", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("auth")
-  .set_description(""),
+    Option("rgw_keystone_admin_tenant", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_tenant_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_admin_project", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_account_in_url", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_keystone_admin_domain", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_swift_enforce_content_length", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_keystone_barbican_user", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_barbican_password", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_admin_token", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_barbican_tenant", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_admin_user", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_barbican_project", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_admin_password", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_barbican_domain", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_admin_tenant", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_api_version", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("rgw_keystone_admin_project", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_accepted_roles", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("Member, admin")
+    .set_description(""),
 
-  Option("rgw_keystone_admin_domain", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_accepted_admin_roles", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_barbican_user", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_token_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("rgw_keystone_barbican_password", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_revocation_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(15 * 60)
+    .set_description(""),
 
-  Option("rgw_keystone_barbican_tenant", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_verify_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_keystone_barbican_project", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_keystone_implicit_tenants", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_keystone_barbican_domain", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_cross_domain_policy", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("<allow-access-from domain=\"*\" secure=\"false\" />")
+    .set_description(""),
 
-  Option("rgw_keystone_api_version", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("rgw_healthcheck_disabling_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_accepted_roles", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("Member, admin")
-  .set_description(""),
+    Option("rgw_s3_auth_use_rados", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_keystone_accepted_admin_roles", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_s3_auth_use_keystone", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_keystone_token_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("rgw_s3_auth_aws4_force_boto2_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_keystone_revocation_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(15 * 60)
-  .set_description(""),
+    Option("rgw_barbican_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_keystone_verify_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_ldap_uri", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("ldaps://<ldap.your.domain>")
+    .set_description(""),
 
-  Option("rgw_keystone_implicit_tenants", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_ldap_binddn", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("uid=admin,cn=users,dc=example,dc=com")
+    .set_description(""),
 
-  Option("rgw_cross_domain_policy", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("<allow-access-from domain=\"*\" secure=\"false\" />")
-  .set_description(""),
+    Option("rgw_ldap_searchdn", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("cn=users,cn=accounts,dc=example,dc=com")
+    .set_description(""),
 
-  Option("rgw_healthcheck_disabling_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_ldap_dnattr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("uid")
+    .set_description(""),
 
-  Option("rgw_s3_auth_use_rados", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_ldap_secret", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/etc/openldap/secret")
+    .set_description(""),
 
-  Option("rgw_s3_auth_use_keystone", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_s3_auth_use_ldap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_s3_auth_aws4_force_boto2_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_ldap_searchfilter", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_barbican_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_admin_entry", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("admin")
+    .set_description(""),
 
-  Option("rgw_ldap_uri", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("ldaps://<ldap.your.domain>")
-  .set_description(""),
+    Option("rgw_enforce_swift_acls", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_ldap_binddn", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("uid=admin,cn=users,dc=example,dc=com")
-  .set_description(""),
+    Option("rgw_swift_token_expiration", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(24 * 3600)
+    .set_description(""),
 
-  Option("rgw_ldap_searchdn", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("cn=users,cn=accounts,dc=example,dc=com")
-  .set_description(""),
+    Option("rgw_print_continue", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_ldap_dnattr", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("uid")
-  .set_description(""),
+    Option("rgw_print_prohibited_content_length", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_ldap_secret", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/etc/openldap/secret")
-  .set_description(""),
+    Option("rgw_remote_addr_param", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("REMOTE_ADDR")
+    .set_description(""),
 
-  Option("rgw_s3_auth_use_ldap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10*60)
+    .set_description(""),
 
-  Option("rgw_ldap_searchfilter", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_op_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rgw_admin_entry", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("admin")
-  .set_description(""),
+    Option("rgw_thread_pool_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("rgw_enforce_swift_acls", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_num_control_oids", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(8)
+    .set_description(""),
 
-  Option("rgw_swift_token_expiration", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(24 * 3600)
-  .set_description(""),
+    Option("rgw_num_rados_handles", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("rgw_print_continue", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_verify_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_print_prohibited_content_length", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_nfs_lru_lanes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("rgw_remote_addr_param", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("REMOTE_ADDR")
-  .set_description(""),
+    Option("rgw_nfs_lru_lane_hiwat", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(911)
+    .set_description(""),
 
-  Option("rgw_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10*60)
-  .set_description(""),
+    Option("rgw_nfs_fhcache_partitions", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
 
-  Option("rgw_op_thread_suicide_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rgw_nfs_fhcache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2017)
+    .set_description(""),
 
-  Option("rgw_thread_pool_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("rgw_nfs_namespace_expire_secs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_min(1)
+    .set_description(""),
+
+    Option("rgw_nfs_max_gc", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_min(1)
+    .set_description(""),
+
+    Option("rgw_nfs_write_completion_interval_s", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
+
+    Option("rgw_zone", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("rgw_zone_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".rgw.root")
+    .set_description(""),
+
+    Option("rgw_default_zone_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default.zone")
+    .set_description(""),
+
+    Option("rgw_region", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("rgw_region_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".rgw.root")
+    .set_description(""),
+
+    Option("rgw_default_region_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default.region")
+    .set_description(""),
+
+    Option("rgw_zonegroup", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("rgw_zonegroup_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".rgw.root")
+    .set_description(""),
+
+    Option("rgw_default_zonegroup_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default.zonegroup")
+    .set_description(""),
+
+    Option("rgw_realm", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("rgw_realm_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".rgw.root")
+    .set_description(""),
+
+    Option("rgw_default_realm_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("default.realm")
+    .set_description(""),
+
+    Option("rgw_period_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".rgw.root")
+    .set_description(""),
+
+    Option("rgw_period_latest_epoch_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".latest_epoch")
+    .set_description(""),
+
+    Option("rgw_log_nonexistent_bucket", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rgw_log_object_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("%Y-%m-%d-%H-%i-%n")
+    .set_description(""),
+
+    Option("rgw_log_object_name_utc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rgw_usage_max_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
+
+    Option("rgw_usage_max_user_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_min(1)
+    .set_description(""),
 
-  Option("rgw_num_control_oids", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(8)
-  .set_description(""),
+    Option("rgw_enable_ops_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_num_rados_handles", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("rgw_enable_usage_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_verify_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_ops_log_rados", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_nfs_lru_lanes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("rgw_ops_log_socket_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_nfs_lru_lane_hiwat", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(911)
-  .set_description(""),
+    Option("rgw_ops_log_data_backlog", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5 << 20)
+    .set_description(""),
 
-  Option("rgw_nfs_fhcache_partitions", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
+    Option("rgw_fcgi_socket_backlog", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("rgw_nfs_fhcache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2017)
-  .set_description(""),
+    Option("rgw_usage_log_flush_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("rgw_nfs_namespace_expire_secs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_min(1)
-  .set_description(""),
-
-  Option("rgw_nfs_max_gc", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_min(1)
-  .set_description(""),
-
-  Option("rgw_nfs_write_completion_interval_s", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
-
-  Option("rgw_zone", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_zone_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".rgw.root")
-  .set_description(""),
-
-  Option("rgw_default_zone_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default.zone")
-  .set_description(""),
-
-  Option("rgw_region", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_region_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".rgw.root")
-  .set_description(""),
-
-  Option("rgw_default_region_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default.region")
-  .set_description(""),
-
-  Option("rgw_zonegroup", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_zonegroup_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".rgw.root")
-  .set_description(""),
-
-  Option("rgw_default_zonegroup_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default.zonegroup")
-  .set_description(""),
-
-  Option("rgw_realm", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_realm_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".rgw.root")
-  .set_description(""),
-
-  Option("rgw_default_realm_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("default.realm")
-  .set_description(""),
-
-  Option("rgw_period_root_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".rgw.root")
-  .set_description(""),
-
-  Option("rgw_period_latest_epoch_info_oid", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".latest_epoch")
-  .set_description(""),
-
-  Option("rgw_log_nonexistent_bucket", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rgw_log_object_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("%Y-%m-%d-%H-%i-%n")
-  .set_description(""),
-
-  Option("rgw_log_object_name_utc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rgw_usage_max_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
-
-  Option("rgw_usage_max_user_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_min(1)
-  .set_description(""),
+    Option("rgw_usage_log_tick_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("rgw_enable_ops_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_intent_log_object_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("%Y-%m-%d-%i-%n")
+    .set_description(""),
 
-  Option("rgw_enable_usage_log", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_intent_log_object_name_utc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_ops_log_rados", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_init_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
 
-  Option("rgw_ops_log_socket_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_mime_types_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/etc/mime.types")
+    .set_description(""),
 
-  Option("rgw_ops_log_data_backlog", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5 << 20)
-  .set_description(""),
+    Option("rgw_gc_max_objs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("rgw_fcgi_socket_backlog", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("rgw_gc_obj_min_wait", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2 * 3600)
+    .set_description(""),
 
-  Option("rgw_usage_log_flush_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("rgw_gc_processor_max_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600)
+    .set_description(""),
 
-  Option("rgw_usage_log_tick_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rgw_gc_processor_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600)
+    .set_description(""),
 
-  Option("rgw_intent_log_object_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("%Y-%m-%d-%i-%n")
-  .set_description(""),
+    Option("rgw_s3_success_create_obj_status", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rgw_intent_log_object_name_utc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_resolve_cname", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_init_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
+    Option("rgw_obj_stripe_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4 << 20)
+    .set_description(""),
 
-  Option("rgw_mime_types_file", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/etc/mime.types")
-  .set_description(""),
+    Option("rgw_extended_http_attrs", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_gc_max_objs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("rgw_exit_timeout_secs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(120)
+    .set_description(""),
 
-  Option("rgw_gc_obj_min_wait", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2 * 3600)
-  .set_description(""),
+    Option("rgw_get_obj_window_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16 << 20)
+    .set_description(""),
 
-  Option("rgw_gc_processor_max_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600)
-  .set_description(""),
+    Option("rgw_get_obj_max_req_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4 << 20)
+    .set_description(""),
 
-  Option("rgw_gc_processor_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600)
-  .set_description(""),
+    Option("rgw_relaxed_s3_bucket_names", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_s3_success_create_obj_status", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rgw_defer_to_bucket_acls", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_resolve_cname", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_list_buckets_max_chunk", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rgw_obj_stripe_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4 << 20)
-  .set_description(""),
+    Option("rgw_md_log_max_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_description(""),
 
-  Option("rgw_extended_http_attrs", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_num_zone_opstate_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("rgw_exit_timeout_secs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(120)
-  .set_description(""),
+    Option("rgw_opstate_ratelimit_sec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("rgw_get_obj_window_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16 << 20)
-  .set_description(""),
+    Option("rgw_curl_wait_timeout_ms", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rgw_get_obj_max_req_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4 << 20)
-  .set_description(""),
+    Option("rgw_copy_obj_progress", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_relaxed_s3_bucket_names", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_copy_obj_progress_every_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024 * 1024)
+    .set_description(""),
 
-  Option("rgw_defer_to_bucket_acls", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_obj_tombstone_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rgw_list_buckets_max_chunk", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("rgw_data_log_window", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("rgw_md_log_max_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(64)
-  .set_description(""),
+    Option("rgw_data_log_changes_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rgw_num_zone_opstate_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("rgw_data_log_num_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128)
+    .set_description(""),
 
-  Option("rgw_opstate_ratelimit_sec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rgw_data_log_obj_prefix", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("data_log")
+    .set_description(""),
 
-  Option("rgw_curl_wait_timeout_ms", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("rgw_replica_log_obj_prefix", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("replica_log")
+    .set_description(""),
 
-  Option("rgw_copy_obj_progress", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_bucket_quota_ttl", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("rgw_copy_obj_progress_every_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024 * 1024)
-  .set_description(""),
+    Option("rgw_bucket_quota_soft_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.95)
+    .set_description(""),
 
-  Option("rgw_obj_tombstone_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("rgw_bucket_quota_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("rgw_data_log_window", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rgw_bucket_default_quota_max_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("rgw_data_log_changes_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("rgw_bucket_default_quota_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("rgw_data_log_num_shards", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128)
-  .set_description(""),
+    Option("rgw_expose_bucket", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_data_log_obj_prefix", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("data_log")
-  .set_description(""),
+    Option("rgw_frontends", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("civetweb port=7480")
+    .set_description(""),
 
-  Option("rgw_replica_log_obj_prefix", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("replica_log")
-  .set_description(""),
+    Option("rgw_user_quota_bucket_sync_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(180)
+    .set_description(""),
 
-  Option("rgw_bucket_quota_ttl", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("rgw_user_quota_sync_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600 * 24)
+    .set_description(""),
 
-  Option("rgw_bucket_quota_soft_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.95)
-  .set_description(""),
+    Option("rgw_user_quota_sync_idle_users", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_bucket_quota_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("rgw_user_quota_sync_wait_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600 * 24)
+    .set_description(""),
 
-  Option("rgw_bucket_default_quota_max_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("rgw_user_default_quota_max_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("rgw_bucket_default_quota_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("rgw_user_default_quota_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("rgw_expose_bucket", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_multipart_min_part_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5 * 1024 * 1024)
+    .set_description(""),
 
-  Option("rgw_frontends", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("civetweb port=7480")
-  .set_description(""),
+    Option("rgw_multipart_part_upload_limit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("rgw_user_quota_bucket_sync_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(180)
-  .set_description(""),
+    Option("rgw_max_slo_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rgw_user_quota_sync_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600 * 24)
-  .set_description(""),
+    Option("rgw_olh_pending_timeout_sec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3600)
+    .set_description(""),
 
-  Option("rgw_user_quota_sync_idle_users", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_user_max_buckets", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rgw_user_quota_sync_wait_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600 * 24)
-  .set_description(""),
+    Option("rgw_objexp_gc_interval", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(60 * 10)
+    .set_description(""),
 
-  Option("rgw_user_default_quota_max_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("rgw_objexp_time_step", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("rgw_user_default_quota_max_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("rgw_objexp_hints_num_shards", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(127)
+    .set_description(""),
 
-  Option("rgw_multipart_min_part_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5 * 1024 * 1024)
-  .set_description(""),
+    Option("rgw_objexp_chunk_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description(""),
 
-  Option("rgw_multipart_part_upload_limit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("rgw_enable_static_website", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_max_slo_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("rgw_log_http_headers", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_olh_pending_timeout_sec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3600)
-  .set_description(""),
+    Option("rgw_num_async_rados_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("rgw_user_max_buckets", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("rgw_md_notify_interval_msec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(200)
+    .set_description(""),
 
-  Option("rgw_objexp_gc_interval", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(60 * 10)
-  .set_description(""),
+    Option("rgw_run_sync_thread", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_objexp_time_step", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("rgw_sync_lease_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(120)
+    .set_description(""),
 
-  Option("rgw_objexp_hints_num_shards", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(127)
-  .set_description(""),
+    Option("rgw_sync_log_trim_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1200)
+    .set_description(""),
 
-  Option("rgw_objexp_chunk_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100)
-  .set_description(""),
+    Option("rgw_sync_data_inject_err_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rgw_enable_static_website", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_sync_meta_inject_err_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rgw_log_http_headers", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rgw_period_push_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
 
-  Option("rgw_num_async_rados_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("rgw_period_push_interval_max", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("rgw_md_notify_interval_msec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(200)
-  .set_description(""),
+    Option("rgw_safe_max_objects_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100*1024)
+    .set_description(""),
 
-  Option("rgw_run_sync_thread", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_shard_warning_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(90)
+    .set_description(""),
 
-  Option("rgw_sync_lease_period", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(120)
-  .set_description(""),
+    Option("rgw_swift_versioning_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rgw_sync_log_trim_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1200)
-  .set_description(""),
+    Option("rgw_swift_custom_header", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rgw_sync_data_inject_err_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("rgw_swift_need_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rgw_sync_meta_inject_err_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("rgw_reshard_num_logs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16)
+    .set_description(""),
 
-  Option("rgw_period_push_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
+    Option("rgw_reshard_bucket_lock_duration", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(120)
+    .set_description(""),
 
-  Option("rgw_period_push_interval_max", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
-
-  Option("rgw_safe_max_objects_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100*1024)
-  .set_description(""),
-
-  Option("rgw_shard_warning_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(90)
-  .set_description(""),
-
-  Option("rgw_swift_versioning_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rgw_swift_custom_header", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_swift_need_stats", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rgw_reshard_num_logs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16)
-  .set_description(""),
-
-  Option("rgw_reshard_bucket_lock_duration", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(120)
-  .set_description(""),
-
-  Option("rgw_crypt_require_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rgw_crypt_default_encryption_key", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_crypt_s3_kms_encryption_keys", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_crypt_suppress_logs", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rgw_list_bucket_min_readahead", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
-
-  Option("rgw_rest_getusage_op_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rgw_torrent_flag", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rgw_torrent_tracker", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_torrent_createby", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_torrent_comment", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_torrent_encoding", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_torrent_origin", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-
-  Option("rgw_torrent_sha_unit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(512*1024)
-  .set_description(""),
-
-  Option("rgw_dynamic_resharding", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rgw_max_objs_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100000)
-  .set_description(""),
-
-  Option("rgw_reshard_thread_interval", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(60 * 10)
-  .set_description(""),
-};
-
-std::vector<Option> rbd_options = {
-  Option("rbd_default_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("rbd")
-  .set_description("")
-  .set_validator([](std::string *value, std::string *error_message){
-    boost::regex pattern("^[^@/]+$");
-    if (!boost::regex_match (*value, pattern)) {
-      *value = "rbd";
-      *error_message = "invalid RBD default pool, resetting to 'rbd'";
-    }
-    return 0;
-  }),
-
-  Option("rbd_default_data_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description("")
-  .set_validator([](std::string *value, std::string *error_message){
-    boost::regex pattern("^[^@/]*$");
-    if (!boost::regex_match (*value, pattern)) {
-      *value = "";
-      *error_message = "ignoring invalid RBD data pool";
-    }
-    return 0;
-  }),
-
-  Option("rbd_default_features", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("layering,exclusive-lock,object-map,fast-diff,deep-flatten")
-  .set_description("")
-  .set_safe()
-  .set_validator([](std::string *value, std::string *error_message){
-    static const std::map<std::string, uint64_t> FEATURE_MAP = {
-      {RBD_FEATURE_NAME_LAYERING, RBD_FEATURE_LAYERING},
-      {RBD_FEATURE_NAME_STRIPINGV2, RBD_FEATURE_STRIPINGV2},
-      {RBD_FEATURE_NAME_EXCLUSIVE_LOCK, RBD_FEATURE_EXCLUSIVE_LOCK},
-      {RBD_FEATURE_NAME_OBJECT_MAP, RBD_FEATURE_OBJECT_MAP},
-      {RBD_FEATURE_NAME_FAST_DIFF, RBD_FEATURE_FAST_DIFF},
-      {RBD_FEATURE_NAME_DEEP_FLATTEN, RBD_FEATURE_DEEP_FLATTEN},
-      {RBD_FEATURE_NAME_JOURNALING, RBD_FEATURE_JOURNALING},
-      {RBD_FEATURE_NAME_DATA_POOL, RBD_FEATURE_DATA_POOL},
-    };
-    static_assert((RBD_FEATURE_DATA_POOL << 1) > RBD_FEATURES_ALL,
-                  "new RBD feature added");
-
-    // convert user-friendly comma delimited feature name list to a bitmask
-    // that is used by the librbd API
-    uint64_t features = 0;
-    error_message->clear();
-
-    try {
-      features = boost::lexical_cast<decltype(features)>(*value);
-
-      uint64_t unsupported_features = (features & ~RBD_FEATURES_ALL);
-      if (unsupported_features != 0ull) {
-        features &= RBD_FEATURES_ALL;
-
-        std::stringstream ss;
-        ss << "ignoring unknown feature mask 0x"
-           << std::hex << unsupported_features;
-        *error_message = ss.str();
-      }
-    } catch (const boost::bad_lexical_cast& ) {
-      int r = 0;
-      std::vector<std::string> feature_names;
-      boost::split(feature_names, *value, boost::is_any_of(","));
-      for (auto feature_name: feature_names) {
-        boost::trim(feature_name);
-        auto feature_it = FEATURE_MAP.find(feature_name);
-        if (feature_it != FEATURE_MAP.end()) {
-          features += feature_it->second;
-        } else {
-          if (!error_message->empty()) {
-            *error_message += ", ";
-          }
-          *error_message += "ignoring unknown feature " + feature_name;
-          r = -EINVAL;
-        }
-      }
+    Option("rgw_crypt_require_ssl", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-      if (features == 0 && r == -EINVAL) {
-        features = RBD_FEATURES_DEFAULT;
-      }
-    }
-    *value = stringify(features);
-    return 0;
-  }),
+    Option("rgw_crypt_default_encryption_key", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("rgw_crypt_s3_kms_encryption_keys", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+
+    Option("rgw_crypt_suppress_logs", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rgw_list_bucket_min_readahead", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("rbd_op_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("rgw_rest_getusage_op_compat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("rgw_torrent_flag", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_non_blocking_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_torrent_tracker", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_cache", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_torrent_createby", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_cache_writethrough_until_flush", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rgw_torrent_comment", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(32<<20)
-  .set_description(""),
+    Option("rgw_torrent_encoding", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_cache_max_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(24<<20)
-  .set_description(""),
+    Option("rgw_data_notify_interval_msec", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(200)
+    .set_description("data changes notification interval to followers"),
 
-  Option("rbd_cache_target_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16<<20)
-  .set_description(""),
+    Option("rgw_torrent_origin", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_cache_max_dirty_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("rgw_torrent_sha_unit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(512*1024)
+    .set_description(""),
 
-  Option("rbd_cache_max_dirty_object", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rgw_dynamic_resharding", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_cache_block_writes_upfront", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rgw_max_objs_per_shard", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100000)
+    .set_description(""),
 
-  Option("rbd_concurrent_management_ops", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_min(1)
-  .set_description(""),
+    Option("rgw_reshard_thread_interval", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(60 * 10)
+    .set_description(""),
+  });
+}
 
-  Option("rbd_balance_snap_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+static std::vector<Option> get_rbd_options() {
+  return std::vector<Option>({
+    Option("rbd_default_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("rbd")
+    .set_description("")
+    .set_validator([](std::string *value, std::string *error_message){
+      boost::regex pattern("^[^@/]+$");
+      if (!boost::regex_match (*value, pattern)) {
+        *value = "rbd";
+        *error_message = "invalid RBD default pool, resetting to 'rbd'";
+      }
+      return 0;
+    }),
+
+    Option("rbd_default_data_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description("")
+    .set_validator([](std::string *value, std::string *error_message){
+      boost::regex pattern("^[^@/]*$");
+      if (!boost::regex_match (*value, pattern)) {
+        *value = "";
+        *error_message = "ignoring invalid RBD data pool";
+      }
+      return 0;
+    }),
+
+    Option("rbd_default_features", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("layering,exclusive-lock,object-map,fast-diff,deep-flatten")
+    .set_description("")
+    .set_safe()
+    .set_validator([](std::string *value, std::string *error_message){
+      static const std::map<std::string, uint64_t> FEATURE_MAP = {
+        {RBD_FEATURE_NAME_LAYERING, RBD_FEATURE_LAYERING},
+        {RBD_FEATURE_NAME_STRIPINGV2, RBD_FEATURE_STRIPINGV2},
+        {RBD_FEATURE_NAME_EXCLUSIVE_LOCK, RBD_FEATURE_EXCLUSIVE_LOCK},
+        {RBD_FEATURE_NAME_OBJECT_MAP, RBD_FEATURE_OBJECT_MAP},
+        {RBD_FEATURE_NAME_FAST_DIFF, RBD_FEATURE_FAST_DIFF},
+        {RBD_FEATURE_NAME_DEEP_FLATTEN, RBD_FEATURE_DEEP_FLATTEN},
+        {RBD_FEATURE_NAME_JOURNALING, RBD_FEATURE_JOURNALING},
+        {RBD_FEATURE_NAME_DATA_POOL, RBD_FEATURE_DATA_POOL},
+      };
+      static_assert((RBD_FEATURE_DATA_POOL << 1) > RBD_FEATURES_ALL,
+                    "new RBD feature added");
+
+      // convert user-friendly comma delimited feature name list to a bitmask
+      // that is used by the librbd API
+      uint64_t features = 0;
+      error_message->clear();
+
+      try {
+        features = boost::lexical_cast<decltype(features)>(*value);
+
+        uint64_t unsupported_features = (features & ~RBD_FEATURES_ALL);
+        if (unsupported_features != 0ull) {
+          features &= RBD_FEATURES_ALL;
+
+          std::stringstream ss;
+          ss << "ignoring unknown feature mask 0x"
+             << std::hex << unsupported_features;
+          *error_message = ss.str();
+        }
+      } catch (const boost::bad_lexical_cast& ) {
+        int r = 0;
+        std::vector<std::string> feature_names;
+        boost::split(feature_names, *value, boost::is_any_of(","));
+        for (auto feature_name: feature_names) {
+          boost::trim(feature_name);
+          auto feature_it = FEATURE_MAP.find(feature_name);
+          if (feature_it != FEATURE_MAP.end()) {
+            features += feature_it->second;
+          } else {
+            if (!error_message->empty()) {
+              *error_message += ", ";
+            }
+            *error_message += "ignoring unknown feature " + feature_name;
+            r = -EINVAL;
+          }
+        }
 
-  Option("rbd_localize_snap_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+        if (features == 0 && r == -EINVAL) {
+          features = RBD_FEATURES_DEFAULT;
+        }
+      }
+      *value = stringify(features);
+      return 0;
+    }),
+
+    Option("rbd_op_threads", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
+
+    Option("rbd_op_thread_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
+
+    Option("rbd_non_blocking_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rbd_cache", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rbd_cache_writethrough_until_flush", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rbd_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(32<<20)
+    .set_description(""),
+
+    Option("rbd_cache_max_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(24<<20)
+    .set_description(""),
+
+    Option("rbd_cache_target_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16<<20)
+    .set_description(""),
+
+    Option("rbd_cache_max_dirty_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
+
+    Option("rbd_cache_max_dirty_object", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("rbd_cache_block_writes_upfront", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rbd_concurrent_management_ops", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_min(1)
+    .set_description(""),
 
-  Option("rbd_balance_parent_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rbd_balance_snap_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_localize_parent_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rbd_localize_snap_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_readahead_trigger_requests", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("rbd_balance_parent_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_readahead_max_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(512 * 1024)
-  .set_description(""),
+    Option("rbd_localize_parent_reads", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_readahead_disable_after_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(50 * 1024 * 1024)
-  .set_description(""),
+    Option("rbd_readahead_trigger_requests", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("rbd_clone_copy_on_read", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rbd_readahead_max_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(512 * 1024)
+    .set_description(""),
 
-  Option("rbd_blacklist_on_break_lock", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rbd_readahead_disable_after_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(50 * 1024 * 1024)
+    .set_description(""),
 
-  Option("rbd_blacklist_expire_seconds", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rbd_clone_copy_on_read", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_request_timed_out_seconds", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rbd_blacklist_on_break_lock", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rbd_skip_partial_discard", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rbd_blacklist_expire_seconds", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rbd_enable_alloc_hint", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rbd_request_timed_out_seconds", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("rbd_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rbd_skip_partial_discard", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_blkin_trace_all", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("rbd_enable_alloc_hint", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rbd_validate_pool", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("rbd_tracing", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_validate_names", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rbd_auto_exclusive_lock_until_manual_request", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
-
-  Option("rbd_mirroring_resync_after_disconnect", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
-
-  Option("rbd_mirroring_replay_delay", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("rbd_default_format", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("rbd_default_order", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(22)
-  .set_description(""),
-
-  Option("rbd_default_stripe_count", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("rbd_default_stripe_unit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
-
-  Option("rbd_default_map_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rbd_blkin_trace_all", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("rbd_journal_order", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(24)
-  .set_description(""),
+    Option("rbd_validate_pool", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("rbd_journal_splay_width", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("rbd_validate_names", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rbd_auto_exclusive_lock_until_manual_request", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("rbd_mirroring_resync_after_disconnect", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("rbd_mirroring_replay_delay", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("rbd_default_format", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
+
+    Option("rbd_default_order", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(22)
+    .set_description(""),
+
+    Option("rbd_default_stripe_count", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("rbd_default_stripe_unit", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
+
+    Option("rbd_default_map_options", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_journal_commit_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("rbd_journal_order", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(24)
+    .set_description(""),
 
-  Option("rbd_journal_object_flush_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rbd_journal_splay_width", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("rbd_journal_object_flush_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rbd_journal_commit_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("rbd_journal_object_flush_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rbd_journal_object_flush_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rbd_journal_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("rbd_journal_object_flush_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rbd_journal_max_payload_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(16384)
-  .set_description(""),
+    Option("rbd_journal_object_flush_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rbd_journal_max_concurrent_object_sets", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("rbd_journal_pool", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("rbd_mirror_journal_commit_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("rbd_journal_max_payload_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(16384)
+    .set_description(""),
 
-  Option("rbd_mirror_journal_poll_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("rbd_journal_max_concurrent_object_sets", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("rbd_mirror_journal_max_fetch_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(32768)
-  .set_description(""),
+    Option("rbd_mirror_journal_commit_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("rbd_mirror_sync_point_update_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rbd_mirror_journal_poll_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("rbd_mirror_concurrent_image_syncs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("rbd_mirror_journal_max_fetch_bytes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(32768)
+    .set_description(""),
 
-  Option("rbd_mirror_pool_replayers_refresh_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rbd_mirror_sync_point_update_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("rbd_mirror_delete_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("rbd_mirror_concurrent_image_syncs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("rbd_mirror_image_state_check_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_min(1)
-  .set_description(""),
-
-  Option("rbd_mirror_leader_heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_min(1)
-  .set_description(""),
-
-  Option("rbd_mirror_leader_max_missed_heartbeats", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(2)
-  .set_description(""),
-
-  Option("rbd_mirror_leader_max_acquire_attempts_before_break", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
-};
-
-std::vector<Option> mds_options = {
-  Option("mds_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/var/lib/ceph/mds/$cluster-$id")
-  .set_description(""),
+    Option("rbd_mirror_pool_replayers_refresh_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mds_max_file_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1ULL << 40)
-  .set_description(""),
+    Option("rbd_mirror_delete_retry_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mds_max_xattr_pairs_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64 << 10)
-  .set_description(""),
+    Option("rbd_mirror_image_state_check_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_min(1)
+    .set_description(""),
+
+    Option("rbd_mirror_leader_heartbeat_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_min(1)
+    .set_description(""),
+
+    Option("rbd_mirror_leader_max_missed_heartbeats", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(2)
+    .set_description(""),
+
+    Option("rbd_mirror_leader_max_acquire_attempts_before_break", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
+  });
+}
+
+std::vector<Option> get_mds_options() {
+  return std::vector<Option>({
+    Option("mds_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/var/lib/ceph/mds/$cluster-$id")
+    .set_description(""),
 
-  Option("mds_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(100000)
-  .set_description(""),
+    Option("mds_max_file_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1ULL << 40)
+    .set_description(""),
 
-  Option("mds_cache_mid", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.7)
-  .set_description(""),
+    Option("mds_max_xattr_pairs_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64 << 10)
+    .set_description(""),
 
-  Option("mds_max_file_recover", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(32)
-  .set_description(""),
+    Option("mds_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(100000)
+    .set_description(""),
 
-  Option("mds_dir_max_commit_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mds_cache_mid", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.7)
+    .set_description(""),
 
-  Option("mds_dir_keys_per_op", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16384)
-  .set_description(""),
+    Option("mds_max_file_recover", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(32)
+    .set_description(""),
 
-  Option("mds_decay_halflife", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_dir_max_commit_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("mds_beacon_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("mds_dir_keys_per_op", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16384)
+    .set_description(""),
 
-  Option("mds_beacon_grace", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(15)
-  .set_description(""),
+    Option("mds_decay_halflife", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mds_enforce_unique_name", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_beacon_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("mds_blacklist_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(24.0*60.0)
-  .set_description(""),
+    Option("mds_beacon_grace", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(15)
+    .set_description(""),
 
-  Option("mds_session_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("mds_enforce_unique_name", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_session_blacklist_on_timeout", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_blacklist_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(24.0*60.0)
+    .set_description(""),
 
-  Option("mds_session_blacklist_on_evict", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_session_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("mds_sessionmap_keys_per_op", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("mds_session_blacklist_on_timeout", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_revoke_cap_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("mds_session_blacklist_on_evict", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_recall_state_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(60)
-  .set_description(""),
+    Option("mds_sessionmap_keys_per_op", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("mds_revoke_cap_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("mds_session_autoclose", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(300)
-  .set_description(""),
+    Option("mds_recall_state_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(60)
+    .set_description(""),
 
-  Option("mds_health_summarize_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mds_health_cache_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.5)
-  .set_description(""),
+    Option("mds_session_autoclose", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(300)
+    .set_description(""),
 
-  Option("mds_reconnect_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(45)
-  .set_description(""),
+    Option("mds_health_summarize_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("mds_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_health_cache_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.5)
+    .set_description(""),
 
-  Option("mds_dirstat_min_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("mds_reconnect_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(45)
+    .set_description(""),
 
-  Option("mds_scatter_nudge_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mds_client_prealloc_inos", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("mds_dirstat_min_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("mds_early_reply", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_scatter_nudge_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mds_default_dir_hash", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(CEPH_STR_HASH_RJENKINS)
-  .set_description(""),
+    Option("mds_client_prealloc_inos", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("mds_log_pause", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_early_reply", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_log_skip_corrupt_events", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_default_dir_hash", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(CEPH_STR_HASH_RJENKINS)
+    .set_description(""),
 
-  Option("mds_log_max_events", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("mds_log_pause", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_log_events_per_segment", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
+    Option("mds_log_skip_corrupt_events", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_log_segment_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_log_max_events", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("mds_log_max_segments", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("mds_log_events_per_segment", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
 
-  Option("mds_log_max_expiring", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("mds_log_segment_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_bal_export_pin", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_log_max_segments", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mds_bal_sample_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(3.0)
-  .set_description(""),
+    Option("mds_log_max_expiring", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
 
-  Option("mds_bal_replicate_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(8000)
-  .set_description(""),
+    Option("mds_bal_export_pin", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_bal_unreplicate_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_bal_sample_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(3.0)
+    .set_description(""),
 
-  Option("mds_bal_frag", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_bal_replicate_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(8000)
+    .set_description(""),
 
-  Option("mds_bal_split_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("mds_bal_unreplicate_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_bal_split_rd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(25000)
-  .set_description(""),
+    Option("mds_bal_frag", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_bal_split_wr", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("mds_bal_split_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("mds_bal_split_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(3)
-  .set_description(""),
+    Option("mds_bal_split_rd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(25000)
+    .set_description(""),
 
-  Option("mds_bal_merge_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(50)
-  .set_description(""),
+    Option("mds_bal_split_wr", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
 
-  Option("mds_bal_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("mds_bal_split_bits", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(3)
+    .set_description(""),
 
-  Option("mds_bal_fragment_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_bal_merge_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(50)
+    .set_description(""),
 
-  Option("mds_bal_fragment_size_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000*10)
-  .set_description(""),
+    Option("mds_bal_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("mds_bal_fragment_fast_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.5)
-  .set_description(""),
+    Option("mds_bal_fragment_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mds_bal_idle_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_bal_fragment_size_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000*10)
+    .set_description(""),
 
-  Option("mds_bal_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("mds_bal_fragment_fast_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.5)
+    .set_description(""),
 
-  Option("mds_bal_max_until", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("mds_bal_idle_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_bal_mode", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_bal_max", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("mds_bal_min_rebalance", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.1)
-  .set_description(""),
+    Option("mds_bal_max_until", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("mds_bal_min_start", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.2)
-  .set_description(""),
+    Option("mds_bal_mode", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_bal_need_min", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.8)
-  .set_description(""),
+    Option("mds_bal_min_rebalance", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.1)
+    .set_description(""),
 
-  Option("mds_bal_need_max", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.2)
-  .set_description(""),
+    Option("mds_bal_min_start", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.2)
+    .set_description(""),
 
-  Option("mds_bal_midchunk", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.3)
-  .set_description(""),
+    Option("mds_bal_need_min", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.8)
+    .set_description(""),
 
-  Option("mds_bal_minchunk", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.001)
-  .set_description(""),
+    Option("mds_bal_need_max", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.2)
+    .set_description(""),
 
-  Option("mds_bal_target_decay", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(10.0)
-  .set_description(""),
+    Option("mds_bal_midchunk", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.3)
+    .set_description(""),
 
-  Option("mds_replay_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("mds_bal_minchunk", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.001)
+    .set_description(""),
 
-  Option("mds_shutdown_check", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_bal_target_decay", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(10.0)
+    .set_description(""),
 
-  Option("mds_thrash_exports", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_replay_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
 
-  Option("mds_thrash_fragments", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_shutdown_check", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_dump_cache_on_map", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_thrash_exports", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_dump_cache_after_rejoin", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_thrash_fragments", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_verify_scatter", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_dump_cache_on_map", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_debug_scatterstat", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_dump_cache_after_rejoin", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_debug_frag", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_verify_scatter", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_debug_auth_pins", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_debug_scatterstat", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_debug_subtrees", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_debug_frag", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_kill_mdstable_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_debug_auth_pins", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_kill_export_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_debug_subtrees", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_kill_import_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_mdstable_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_link_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_export_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_rename_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_import_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_openc_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_link_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_journal_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_rename_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_journal_expire_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_openc_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_journal_replay_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_journal_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_journal_format", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("mds_kill_journal_expire_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_kill_create_at", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_journal_replay_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_inject_traceless_reply_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_journal_format", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("mds_wipe_sessions", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_kill_create_at", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_wipe_ino_prealloc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_inject_traceless_reply_probability", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_skip_ino", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_wipe_sessions", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_standby_for_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("mds_wipe_ino_prealloc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_standby_for_rank", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("mds_skip_ino", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_standby_for_fscid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("mds_standby_for_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("mds_standby_replay", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_standby_for_rank", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("mds_enable_op_tracker", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("mds_standby_for_fscid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("mds_op_history_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(20)
-  .set_description(""),
+    Option("mds_standby_replay", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_op_history_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(600)
-  .set_description(""),
+    Option("mds_enable_op_tracker", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("mds_op_complaint_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("mds_op_history_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(20)
+    .set_description(""),
 
-  Option("mds_op_log_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_op_history_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(600)
+    .set_description(""),
 
-  Option("mds_snap_min_uid", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_op_complaint_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("mds_snap_max_uid", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4294967294)
-  .set_description(""),
+    Option("mds_op_log_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mds_snap_rstat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("mds_snap_min_uid", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_verify_backtrace", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("mds_snap_max_uid", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4294967294)
+    .set_description(""),
 
-  Option("mds_max_completed_flushes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100000)
-  .set_description(""),
+    Option("mds_snap_rstat", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("mds_max_completed_requests", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(100000)
-  .set_description(""),
+    Option("mds_verify_backtrace", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("mds_action_on_write_error", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1)
-  .set_description(""),
+    Option("mds_max_completed_flushes", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100000)
+    .set_description(""),
 
-  Option("mds_mon_shutdown_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_max_completed_requests", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100000)
+    .set_description(""),
 
-  Option("mds_max_purge_files", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(64)
-  .set_description(""),
+    Option("mds_action_on_write_error", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1)
+    .set_description(""),
 
-  Option("mds_max_purge_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(8192)
-  .set_description(""),
+    Option("mds_mon_shutdown_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("mds_max_purge_ops_per_pg", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(0.5)
-  .set_description(""),
+    Option("mds_max_purge_files", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(64)
+    .set_description(""),
 
-  Option("mds_purge_queue_busy_flush_period", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("mds_max_purge_ops", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(8192)
+    .set_description(""),
 
-  Option("mds_root_ino_uid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_max_purge_ops_per_pg", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(0.5)
+    .set_description(""),
 
-  Option("mds_root_ino_gid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("mds_purge_queue_busy_flush_period", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
 
-  Option("mds_max_scrub_ops_in_progress", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("mds_root_ino_uid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_damage_table_max_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10000)
-  .set_description(""),
+    Option("mds_root_ino_gid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("mds_client_writeable_range_max_inc_objs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(1024)
-  .set_description(""),
-};
-
-std::vector<Option> mds_client_options = {
-  Option("client_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(16384)
-  .set_description(""),
+    Option("mds_max_scrub_ops_in_progress", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("client_cache_mid", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(.75)
-  .set_description(""),
+    Option("mds_damage_table_max_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10000)
+    .set_description(""),
+
+    Option("mds_client_writeable_range_max_inc_objs", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_description(""),
+  });
+}
 
-  Option("client_use_random_mds", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+std::vector<Option> get_mds_client_options() {
+  return std::vector<Option>({
+    Option("client_cache_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(16384)
+    .set_description(""),
 
-  Option("client_mount_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(300.0)
-  .set_description(""),
+    Option("client_cache_mid", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(.75)
+    .set_description(""),
 
-  Option("client_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(1.0)
-  .set_description(""),
+    Option("client_use_random_mds", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("client_trace", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("client_mount_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(300.0)
+    .set_description(""),
 
-  Option("client_readahead_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(128*1024)
-  .set_description(""),
+    Option("client_tick_interval", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(1.0)
+    .set_description(""),
 
-  Option("client_readahead_max_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(0)
-  .set_description(""),
+    Option("client_trace", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("client_readahead_max_periods", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(4)
-  .set_description(""),
+    Option("client_readahead_min", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(128*1024)
+    .set_description(""),
 
-  Option("client_reconnect_stale", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("client_readahead_max_bytes", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(0)
+    .set_description(""),
 
-  Option("client_snapdir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default(".snap")
-  .set_description(""),
+    Option("client_readahead_max_periods", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(4)
+    .set_description(""),
 
-  Option("client_mountpoint", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("/")
-  .set_description(""),
+    Option("client_reconnect_stale", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("client_mount_uid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("client_snapdir", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default(".snap")
+    .set_description(""),
 
-  Option("client_mount_gid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(-1)
-  .set_description(""),
+    Option("client_mountpoint", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("/")
+    .set_description(""),
 
-  Option("client_notify_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(10)
-  .set_description(""),
+    Option("client_mount_uid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("osd_client_watch_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(30)
-  .set_description(""),
+    Option("client_mount_gid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(-1)
+    .set_description(""),
 
-  Option("client_caps_release_delay", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(5)
-  .set_description(""),
+    Option("client_notify_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(10)
+    .set_description(""),
 
-  Option("client_quota_df", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("osd_client_watch_timeout", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_description(""),
 
-  Option("client_oc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("client_caps_release_delay", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(5)
+    .set_description(""),
 
-  Option("client_oc_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024*1024* 200)
-  .set_description(""),
+    Option("client_quota_df", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("client_oc_max_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024*1024* 100)
-  .set_description(""),
+    Option("client_oc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("client_oc_target_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1024*1024* 8)
-  .set_description(""),
+    Option("client_oc_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024*1024* 200)
+    .set_description(""),
 
-  Option("client_oc_max_dirty_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-  .set_default(5.0)
-  .set_description(""),
+    Option("client_oc_max_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024*1024* 100)
+    .set_description(""),
 
-  Option("client_oc_max_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-  .set_default(1000)
-  .set_description(""),
+    Option("client_oc_target_dirty", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024*1024* 8)
+    .set_description(""),
 
-  Option("client_debug_getattr_caps", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
-
-  Option("client_debug_force_sync_read", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("client_oc_max_dirty_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+    .set_default(5.0)
+    .set_description(""),
 
-  Option("client_debug_inject_tick_delay", Option::TYPE_INT, Option::LEVEL_DEV)
-  .set_default(0)
-  .set_description(""),
+    Option("client_oc_max_objects", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1000)
+    .set_description(""),
 
-  Option("client_max_inline_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
-  .set_default(4096)
-  .set_description(""),
+    Option("client_debug_getattr_caps", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
+
+    Option("client_debug_force_sync_read", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("client_inject_release_failure", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("client_debug_inject_tick_delay", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
 
-  Option("client_inject_fixed_oldest_tid", Option::TYPE_BOOL, Option::LEVEL_DEV)
-  .set_default(false)
-  .set_description(""),
+    Option("client_max_inline_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(4096)
+    .set_description(""),
 
-  Option("client_metadata", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("client_inject_release_failure", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("client_acl_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
+    Option("client_inject_fixed_oldest_tid", Option::TYPE_BOOL, Option::LEVEL_DEV)
+    .set_default(false)
+    .set_description(""),
 
-  Option("client_permissions", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("client_metadata", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("client_dirsize_rbytes", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("client_acl_type", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
 
-  Option("fuse_use_invalidate_cb", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("client_permissions", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_disable_pagecache", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("client_dirsize_rbytes", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_allow_other", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_use_invalidate_cb", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_default_permissions", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("fuse_disable_pagecache", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("fuse_big_writes", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_allow_other", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_atomic_o_trunc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_default_permissions", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("fuse_debug", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("fuse_big_writes", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_multithreaded", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_atomic_o_trunc", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_require_active_mds", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_debug", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("fuse_syncfs_on_mksnap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_multithreaded", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("fuse_set_user_groups", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("fuse_require_active_mds", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("client_try_dentry_invalidate", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_syncfs_on_mksnap", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("client_die_on_failed_remount", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("fuse_set_user_groups", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
 
-  Option("client_check_pool_perm", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(true)
-  .set_description(""),
+    Option("client_try_dentry_invalidate", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("client_use_faked_inos", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
-  .set_default(false)
-  .set_description(""),
+    Option("client_die_on_failed_remount", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
 
-  Option("client_mds_namespace", Option::TYPE_STR, Option::LEVEL_ADVANCED)
-  .set_default("")
-  .set_description(""),
-};
+    Option("client_check_pool_perm", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description(""),
+
+    Option("client_use_faked_inos", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(false)
+    .set_description(""),
+
+    Option("client_mds_namespace", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+    .set_default("")
+    .set_description(""),
+  });
+}
 
 
 static std::vector<Option> build_options()
 {
-  std::vector<Option> result = global_options;
+  std::vector<Option> result = get_global_options();
 
-  auto ingest = [&result](std::vector<Option> &options, const char* svc) {
+  auto ingest = [&result](std::vector<Option>&& options, const char* svc) {
     for (const auto &o_in : options) {
       Option o(o_in);
       o.add_service(svc);
@@ -5662,10 +5937,10 @@ static std::vector<Option> build_options()
     }
   };
 
-  ingest(rgw_options, "rgw");
-  ingest(rbd_options, "rbd");
-  ingest(mds_options, "mds");
-  ingest(mds_client_options, "mds_client");
+  ingest(get_rgw_options(), "rgw");
+  ingest(get_rbd_options(), "rbd");
+  ingest(get_mds_options(), "mds");
+  ingest(get_mds_client_options(), "mds_client");
 
   return result;
 }
index d59fdaaff5cde43737c6031fee58d1b774c2e8e0..eb4f7b7cf4a6e70144da8341918ac18abcc7a309 100644 (file)
@@ -291,8 +291,6 @@ int CrushCompiler::decompile_choose_args(const std::pair<const long unsigned int
 
 int CrushCompiler::decompile(ostream &out)
 {
-  crush.cleanup_classes();
-
   out << "# begin crush map\n";
 
   // only dump tunables if they differ from the defaults
@@ -734,7 +732,7 @@ int CrushCompiler::parse_bucket(iter_t const& i)
   }
 
   for (auto &i : class_id)
-    crush.class_bucket[id][i.first] = i.second;
+    class_bucket[id][i.first] = i.second;
 
   if (verbose) err << "bucket " << name << " (" << id << ") " << size << " items and weight "
                   << (float)bucketweight / (float)0x10000 << std::endl;
@@ -1084,7 +1082,7 @@ int CrushCompiler::parse_crush(iter_t const& i)
     case crush_grammar::_crushrule:
       if (!saw_rule) {
        saw_rule = true;
-       crush.populate_classes();
+       crush.populate_classes(class_bucket);
       }
       r = parse_rule(p);
       break;
@@ -1100,7 +1098,6 @@ int CrushCompiler::parse_crush(iter_t const& i)
   }
 
   //err << "max_devices " << crush.get_max_devices() << std::endl;
-  crush.cleanup_classes();
   crush.finalize();
 
   return 0;
index 7bfd25995323903d5a27ec68d6f90d2a2eb3b5d4..f035085e70ecb5a38eb85bca33a69b4b4a1a4998 100644 (file)
@@ -53,6 +53,7 @@ class CrushCompiler {
   map<int, unsigned> item_weight;
   map<string, int> type_id;
   map<string, int> rule_id;
+  std::map<int32_t, map<int32_t, int32_t> > class_bucket; // bucket id -> class id -> shadow bucket id
 
   string string_node(node_t &node);
   int int_node(node_t &node); 
index 5bfbc14462369064abe66ba1cf07a07a904d4d25..4a3da9b82ff93d6e3095c2f065f2141cdba52999 100644 (file)
@@ -127,16 +127,33 @@ namespace CrushTreeDumper {
       touched.insert(qi.id);
 
       if (qi.is_bucket()) {
-       // queue bucket contents...
+       // queue bucket contents, sorted by (class, name)
        int s = crush->get_bucket_size(qi.id);
+       map<string,pair<int,float>> sorted;
        for (int k = s - 1; k >= 0; k--) {
          int id = crush->get_bucket_item(qi.id, k);
          if (should_dump(id)) {
-           qi.children.push_back(id);
-           push_front(Item(id, qi.id, qi.depth + 1,
-                           crush->get_bucket_item_weightf(qi.id, k)));
+           string sort_by;
+           if (id >= 0) {
+             const char *c = crush->get_item_class(id);
+             sort_by = c ? c : "";
+             sort_by += "_";
+             char nn[80];
+             snprintf(nn, sizeof(nn), "osd.%08d", id);
+             sort_by += nn;
+           } else {
+             sort_by = "_";
+             sort_by += crush->get_item_name(id);
+           }
+           sorted[sort_by] = make_pair(
+             id, crush->get_bucket_item_weightf(qi.id, k));
          }
        }
+       for (auto p = sorted.rbegin(); p != sorted.rend(); ++p) {
+         qi.children.push_back(p->second.first);
+         push_front(Item(p->second.first, qi.id, qi.depth + 1,
+                         p->second.second));
+       }
       }
       return true;
     }
index 2891056c90564c9e431b7d35ca44d14a68f5fc62..8f2d63e77c8cc3d637f8e481c3f3fd14386eb58a 100644 (file)
@@ -360,6 +360,7 @@ bool CrushWrapper::_maybe_remove_last_instance(CephContext *cct, int item, bool
       class_remove_item(item);
     }
   }
+  rebuild_roots_with_classes();
   return true;
 }
 
@@ -369,8 +370,15 @@ int CrushWrapper::remove_root(int item, bool unused)
     return 0;
 
   crush_bucket *b = get_bucket(item);
-  if (IS_ERR(b))
-    return -ENOENT;
+  if (IS_ERR(b)) {
+    // should be idempotent
+    // e.g.: we use 'crush link' to link same host into
+    // different roots, which as a result can cause different
+    // shadow trees reference same hosts too. This means
+    // we may need to destory the same buckets(hosts, racks, etc.)
+    // multiple times during rebuilding all shadow trees.
+    return 0;
+  }
 
   for (unsigned n = 0; n < b->size; n++) {
     if (b->items[n] >= 0)
@@ -1356,43 +1364,16 @@ int CrushWrapper::get_parent_of_type(int item, int type) const
   return item;
 }
 
-
-bool CrushWrapper::class_is_in_use(int class_id, ostream *ss)
+int CrushWrapper::populate_classes(
+  const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
 {
-  list<unsigned> rules;
-  for (unsigned i = 0; i < crush->max_rules; ++i) {
-    crush_rule *r = crush->rules[i];
-    if (!r)
-      continue;
-    for (unsigned j = 0; j < r->len; ++j) {
-      if (r->steps[j].op == CRUSH_RULE_TAKE) {
-        int root = r->steps[j].arg1;
-        for (auto &p : class_bucket) {
-          auto& q = p.second;
-          if (q.count(class_id) && q[class_id] == root) {
-            rules.push_back(i);
-          }
-        }
-      }
-    }
-  }
-  if (rules.empty()) {
-    return false;
-  }
-  if (ss) {
-    ostringstream os;
-    for (auto &p: rules) {
-      os << "'" << get_rule_name(p) <<"',";
+  // build set of previous used shadow ids
+  set<int32_t> used_ids;
+  for (auto& p : old_class_bucket) {
+    for (auto& q : p.second) {
+      used_ids.insert(q.second);
     }
-    string out(os.str());
-    out.resize(out.size() - 1); // drop last ','
-    *ss << "still referenced by crush_rule(s): " << out;
   }
-  return true;
-}
-
-int CrushWrapper::populate_classes()
-{
   set<int> roots;
   find_nonshadow_roots(roots);
   for (auto &r : roots) {
@@ -1400,7 +1381,8 @@ int CrushWrapper::populate_classes()
       continue;
     for (auto &c : class_name) {
       int clone;
-      int res = device_class_clone(r, c.first, &clone);
+      int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
+                                  &clone);
       if (res < 0)
        return res;
     }
@@ -1408,11 +1390,6 @@ int CrushWrapper::populate_classes()
   return 0;
 }
 
-int CrushWrapper::cleanup_classes()
-{
-  return trim_roots_with_class(true);
-}
-
 int CrushWrapper::trim_roots_with_class(bool unused)
 {
   set<int> roots;
@@ -1828,18 +1805,6 @@ int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
   }
   class_remove_item(id);
 
-  // note that there is no need to remove ourselves from shadow parent
-  // and reweight because we are going to destroy all shadow trees
-  // rebuild them all (if necessary) later.
-
-  // see if there is any osds that still reference this class
-  set<int> devices;
-  get_devices_by_class(class_name, &devices);
-  if (devices.empty()) {
-    // class has no more devices
-    remove_class_name(class_name);
-  }
-
   int r = rebuild_roots_with_classes();
   if (r < 0) {
     *ss << "unable to rebuild roots with class '" << class_name << "' "
@@ -1849,7 +1814,11 @@ int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
   return 0;
 }
 
-int CrushWrapper::device_class_clone(int original_id, int device_class, int *clone)
+int CrushWrapper::device_class_clone(
+  int original_id, int device_class,
+  const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
+  const std::set<int32_t>& used_ids,
+  int *clone)
 {
   const char *item_name = get_item_name(original_id);
   if (item_name == NULL)
@@ -1863,15 +1832,13 @@ int CrushWrapper::device_class_clone(int original_id, int device_class, int *clo
     return 0;
   }
   crush_bucket *original = get_bucket(original_id);
-  if (IS_ERR(original))
-    return -ENOENT;
+  assert(!IS_ERR(original));
   crush_bucket *copy = crush_make_bucket(crush,
                                         original->alg,
                                         original->hash,
                                         original->type,
                                         0, NULL, NULL);
-  if(copy == NULL)
-    return -ENOMEM;
+  assert(copy);
   for (unsigned i = 0; i < original->size; i++) {
     int item = original->items[i];
     int weight = crush_get_bucket_item_weight(original, i);
@@ -1883,20 +1850,34 @@ int CrushWrapper::device_class_clone(int original_id, int device_class, int *clo
       }
     } else {
       int child_copy_id;
-      int res = device_class_clone(item, device_class, &child_copy_id);
+      int res = device_class_clone(item, device_class, old_class_bucket,
+                                  used_ids, &child_copy_id);
       if (res < 0)
        return res;
       crush_bucket *child_copy = get_bucket(child_copy_id);
-      if (IS_ERR(child_copy))
-       return -ENOENT;
+      assert(!IS_ERR(child_copy));
       res = bucket_add_item(copy, child_copy_id, child_copy->weight);
       if (res)
        return res;
     }
   }
-  int res = crush_add_bucket(crush, 0, copy, clone);
+  int bno = 0;
+  if (old_class_bucket.count(original_id) &&
+      old_class_bucket.at(original_id).count(device_class)) {
+    bno = old_class_bucket.at(original_id).at(device_class);
+  } else {
+    // pick a new shadow bucket id that is not used by the current map
+    // *or* any previous shadow buckets.
+    bno = -1;
+    while (((-1-bno) < crush->max_buckets && crush->buckets[-1-bno]) ||
+          used_ids.count(bno)) {
+      --bno;
+    }
+  }
+  int res = crush_add_bucket(crush, bno, copy, clone);
   if (res)
     return res;
+  assert(!bno || bno == *clone);
   res = set_item_class(*clone, device_class);
   if (res < 0)
     return res;
@@ -1908,15 +1889,50 @@ int CrushWrapper::device_class_clone(int original_id, int device_class, int *clo
   return 0;
 }
 
+bool CrushWrapper::_class_is_dead(int class_id)
+{
+  for (auto &p: class_map) {
+    if (p.first >= 0 && p.second == class_id) {
+      return false;
+    }
+  }
+  for (unsigned i = 0; i < crush->max_rules; ++i) {
+    crush_rule *r = crush->rules[i];
+    if (!r)
+      continue;
+    for (unsigned j = 0; j < r->len; ++j) {
+      if (r->steps[j].op == CRUSH_RULE_TAKE) {
+        int root = r->steps[j].arg1;
+        for (auto &p : class_bucket) {
+          auto& q = p.second;
+          if (q.count(class_id) && q[class_id] == root) {
+            return false;
+          }
+        }
+      }
+    }
+  }
+  // no more referenced by any devices or crush rules
+  return true;
+}
+
+void CrushWrapper::cleanup_dead_classes()
+{
+  for (auto &c: class_name) {
+    if (_class_is_dead(c.first))
+      remove_class_name(c.second);
+  }
+}
+
 int CrushWrapper::rebuild_roots_with_classes()
 {
+  std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
+  cleanup_dead_classes();
   int r = trim_roots_with_class(false);
   if (r < 0)
     return r;
-  r = populate_classes();
-  if (r < 0)
-    return r;
-  return trim_roots_with_class(true);
+  class_bucket.clear();
+  return populate_classes(old_class_bucket);
 }
 
 void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
@@ -2171,7 +2187,6 @@ void CrushWrapper::decode(bufferlist::iterator& blp)
       for (auto &c : class_name)
        class_rname[c.second] = c.first;
       ::decode(class_bucket, blp);
-      cleanup_classes();
     }
     if (!blp.end()) {
       __u32 choose_args_size;
@@ -2650,6 +2665,7 @@ public:
 
   void dump(TextTable *tbl) {
     tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT);
+    tbl->define_column("CLASS", TextTable::LEFT, TextTable::RIGHT);
     tbl->define_column("WEIGHT", TextTable::LEFT, TextTable::RIGHT);
     for (auto& p : crush->choose_args) {
       if (p.first == CrushWrapper::DEFAULT_CHOOSE_ARGS) {
@@ -2668,7 +2684,11 @@ public:
 
 protected:
   void dump_item(const CrushTreeDumper::Item &qi, TextTable *tbl) override {
+    const char *c = crush->get_item_class(qi.id);
+    if (!c)
+      c = "";
     *tbl << qi.id
+        << c
         << weightf_t(qi.weight);
     for (auto& p : crush->choose_args) {
       if (qi.parent < 0) {
index 94730d53d19637d7f3b81740ec92fc47131a585d..737ba583ececebcd90dd8ae99ddfef6bd8423d43 100644 (file)
@@ -65,6 +65,7 @@ public:
   std::map<int32_t, string> type_map; /* bucket/device type names */
   std::map<int32_t, string> name_map; /* bucket/device names */
   std::map<int32_t, string> rule_name_map;
+
   std::map<int32_t, int32_t> class_map; /* item id -> class id */
   std::map<int32_t, string> class_name; /* class id -> class name */
   std::map<string, int32_t> class_rname; /* class name -> class id */
@@ -1208,13 +1209,18 @@ public:
 
   int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
   int remove_device_class(CephContext *cct, int id, ostream *ss);
-  int device_class_clone(int original, int device_class, int *clone);
-  bool class_is_in_use(int class_id, ostream *ss = nullptr);
-  int populate_classes();
+  int device_class_clone(
+    int original, int device_class,
+    const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
+    const std::set<int32_t>& used_ids,
+    int *clone);
+  int populate_classes(
+    const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
+  bool _class_is_dead(int class_id);
+  void cleanup_dead_classes();
   int rebuild_roots_with_classes();
   /* remove unused roots generated for class devices */
   int trim_roots_with_class(bool unused);
-  int cleanup_classes();
 
   void start_choose_profile() {
     free(crush->choose_tries);
@@ -1257,7 +1263,7 @@ public:
     }
   }
 
-  bool ruleset_exists(int const ruleset) const {
+  bool ruleset_exists(const int ruleset) const {
     for (size_t i = 0; i < crush->max_rules; ++i) {
       if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
        return true;
index 9444ea1e96afd0a27250d6d989a0b41f797cdcd2..d133bc6b015067e30169952680045618dc56247b 100644 (file)
@@ -2,9 +2,19 @@ cmake_minimum_required(VERSION 2.8.11)
 
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules")
 
+set(CMAKE_CXX_FLAGS
+  "${CMAKE_CXX_FLAGS} -std=c++11 -Wno-write-strings -Wall -pthread")
+
+if(DO_NOT_DELAY_TAG_CALC)
+  add_definitions(-DDO_NOT_DELAY_TAG_CALC)
+endif()
+
 if (NOT(TARGET gtest AND TARGET gtest_main))
-  if(NOT(GTEST_FOUND))
-    find_package(GTest REQUIRED)
+  if (NOT GTEST_FOUND)
+    find_package(GTest QUIET)
+    if (NOT GTEST_FOUND)
+      include(BuildGTest)
+    endif()
   endif()
 endif()
 
@@ -12,7 +22,6 @@ if (NOT(BOOST_FOUND))
   find_package(Boost REQUIRED)
 endif()
 
-# add_subdirectory(support/src)
 add_subdirectory(src)
 add_subdirectory(sim)
 
diff --git a/ceph/src/dmclock/cmake/modules/BuildGTest.cmake b/ceph/src/dmclock/cmake/modules/BuildGTest.cmake
new file mode 100644 (file)
index 0000000..356c7d5
--- /dev/null
@@ -0,0 +1,67 @@
+macro(_build_gtest gtest_root)
+  include(ExternalProject)
+  ExternalProject_Add(googletest
+    SOURCE_DIR ${gtest_root}
+    CMAKE_ARGS -DBUILD_GMOCK=OFF -DBUILD_GTEST=ON
+    INSTALL_COMMAND ""
+    LOG_CONFIGURE ON
+    LOG_BUILD ON)
+
+  ExternalProject_Get_Property(googletest source_dir)
+  set(GTEST_INCLUDE_DIRS ${source_dir}/googletest/include)
+  set(GMOCK_INCLUDE_DIRS ${source_dir}/googlemock/include)
+
+  find_package(Threads REQUIRED)
+
+  ExternalProject_Get_Property(googletest binary_dir)
+  set(GTEST_LIBRARY_PATH ${binary_dir}/googletest/${CMAKE_FIND_LIBRARY_PREFIXES}gtest.a)
+  set(GTEST_LIBRARY gtest)
+  add_library(${GTEST_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GTEST_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIRS}"
+    IMPORTED_LOCATION ${GTEST_LIBRARY_PATH}
+    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GTEST_LIBRARY} googletest)
+  set(GTEST_LIBRARIES ${GTEST_LIBRARY})
+
+  set(GTEST_MAIN_LIBRARY_PATH ${binary_dir}/googletest/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main.a)
+  set(GTEST_MAIN_LIBRARY gtest_main)
+  add_library(${GTEST_MAIN_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GTEST_MAIN_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIRS}"
+    IMPORTED_LOCATION ${GTEST_MAIN_LIBRARY_PATH}
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GTEST_MAIN_LIBRARY} googletest)
+
+  set(GMOCK_LIBRARY_PATH ${binary_dir}/googlemock/${CMAKE_FIND_LIBRARY_PREFIXES}gmock.a)
+  set(GMOCK_LIBRARY gmock)
+  add_library(${GMOCK_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GMOCK_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GMOCK_INCLUDE_DIRS}"
+    IMPORTED_LOCATION "${GMOCK_LIBRARY_PATH}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GMOCK_LIBRARY} googletest)
+
+  set(GMOCK_MAIN_LIBRARY_PATH ${binary_dir}/googlemock/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main.a)
+  set(GMOCK_MAIN_LIBRARY gmock_main)
+  add_library(${GMOCK_MAIN_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GMOCK_MAIN_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GMOCK_INCLUDE_DIRS}"
+    IMPORTED_LOCATION ${GMOCK_MAIN_LIBRARY_PATH}
+    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GMOCK_MAIN_LIBRARY} ${GTEST_LIBRARY})
+endmacro()
+
+find_path(GTEST_ROOT src/gtest.cc
+  HINTS $ENV{GTEST_ROOT}
+  PATHS /usr/src/googletest/googletest /usr/src/gtest)
+
+if(EXISTS ${GTEST_ROOT})
+  message(STATUS "Found googletest: ${GTEST_ROOT}")
+  _build_gtest(${GTEST_ROOT})
+else()
+  message(SEND_ERROR "Could NOT find googletest")
+endif()
index 6538dab2c0811a69a372fb01238330fce0a60acf..fd4a81c76dac1a498ebaed534d2bc454514be059 100644 (file)
@@ -89,7 +89,7 @@ namespace crimson {
 
       using SubmitFunc =
        std::function<void(const ServerId&,
-                          const TestRequest&,
+                          TestRequest&&,
                           const ClientId&,
                           const ReqPm&)>;
 
@@ -240,8 +240,9 @@ namespace crimson {
              count_stats(internal_stats.mtx,
                          internal_stats.get_req_params_count);
 
-             TestRequest req(server, o, 12);
-             submit_f(server, req, id, rp);
+             submit_f(server,
+                      TestRequest{server, static_cast<uint32_t>(o), 12},
+                      id, rp);
              ++outstanding_ops;
              l.lock(); // lock for return to top of loop
 
index e318d6e90ac328e6810aa4b7a1fc4f628003ee37..42b5269d780323085c3d807ce1a5c84b0c7cc8ab 100644 (file)
@@ -147,14 +147,15 @@ namespace crimson {
        delete priority_queue;
       }
 
-      void post(const TestRequest& request,
+      void post(TestRequest&& request,
                const ClientId& client_id,
                const ReqPm& req_params)
       {
        time_stats(internal_stats.mtx,
                   internal_stats.add_request_time,
                   [&](){
-                    priority_queue->add_request(request, client_id, req_params);
+                    priority_queue->add_request(std::move(request),
+                                                client_id, req_params);
                   });
        count_stats(internal_stats.mtx,
                    internal_stats.add_request_count);
@@ -202,10 +203,9 @@ namespace crimson {
            // notify server of completion
            std::this_thread::sleep_for(op_time);
 
-           TestResponse resp(req->epoch);
            // TODO: rather than assuming this constructor exists, perhaps
            // pass in a function that does this mapping?
-           client_resp_f(client, resp, id, additional);
+           client_resp_f(client, TestResponse{req->epoch}, id, additional);
 
            time_stats(internal_stats.mtx,
                       internal_stats.request_complete_time,
index ee4c1e6e3efff40df6d0f81aba10c50e90d24ff7..610c2ef665c432ad1b2a9a9d6be4c7ebc0337a21 100644 (file)
@@ -100,10 +100,11 @@ namespace crimson {
        finishing = true;
       }
 
-      void add_request(const R& request,
+      void add_request(R&& request,
                       const C& client_id,
                       const ReqParams& req_params) {
-       add_request(RequestRef(new R(request)), client_id, req_params);
+       add_request(RequestRef(new R(std::move(request))),
+                   client_id, req_params);
       }
 
       void add_request(RequestRef&& request,
index 668869b8d72fb040d3b74997db89a071e0f9a72a..57b733e860cede4065a36d2ad2034450c35f8399 100644 (file)
@@ -136,11 +136,11 @@ int main(int argc, char* argv[]) {
     // lambda to post a request to the identified server; called by client
     test::SubmitFunc server_post_f =
         [&simulation](const ServerId& server,
-                      const sim::TestRequest& request,
+                      sim::TestRequest&& request,
                       const ClientId& client_id,
                       const test::dmc::ReqParams& req_params) {
         test::DmcServer& s = simulation->get_server(server);
-        s.post(request, client_id, req_params);
+        s.post(std::move(request), client_id, req_params);
     };
 
     std::vector<std::vector<sim::CliInst>> cli_inst;
index 6df20dc5f896eac656e594c8f8b5b69a38e29424..14ff7e9b47040eef906489d7352746506759a4a3 100644 (file)
@@ -67,11 +67,11 @@ int main(int argc, char* argv[]) {
   // lambda to post a request to the identified server; called by client
   test::SubmitFunc server_post_f =
     [&simulation](const ServerId& server_id,
-                 const sim::TestRequest& request,
+                 sim::TestRequest&& request,
                  const ClientId& client_id,
                  const ssched::ReqParams& req_params) {
     auto& server = simulation->get_server(server_id);
-    server.post(request, client_id, req_params);
+    server.post(std::move(request), client_id, req_params);
   };
 
   static std::vector<sim::CliInst> no_wait =
index 7178f266e2a41d13276ad1fbe3a92bc49135cd37..65d09f6a46915d05405a4f540dbc1261f059fea1 100644 (file)
@@ -2,23 +2,9 @@ include_directories(${GTEST_INCLUDE_DIRS})
 include_directories(${Boost_INCLUDE_DIRS})
 include_directories(../support/src)
 
-set(local_flags "-std=c++11 -Wno-write-strings -Wall -pthread")
-
-if(DO_NOT_DELAY_TAG_CALC)
-  set(local_flags "${local_flags} -DDO_NOT_DELAY_TAG_CALC")
-endif()
+set(CMAKE_CXX_FLAGS
+  "${CMAKE_CXX_FLAGS} -std=c++11 -Wno-write-strings -Wall -pthread")
 
 set(dmc_srcs dmclock_util.cc ../support/src/run_every.cc)
 
-set_source_files_properties(${dmc_srcs}
-  PROPERTIES
-  COMPILE_FLAGS "${local_flags}"
-  )
-
-if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-  set(warnings_off " -Wno-unused-variable -Wno-unused-function")
-elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-  set(warnings_off " -Wno-unused-but-set-variable -Wno-unused-function")
-endif()
-
 add_library(dmclock STATIC ${dmc_srcs})
index b44e1211b53f667daf23a8ed651480f4574a931d..92f4cf83bb1abbdad5c792b5377258f9227dd653 100644 (file)
@@ -19,8 +19,6 @@
 #include "dmclock_util.h"
 #include "dmclock_recs.h"
 
-#include "gtest/gtest_prod.h"
-
 
 namespace crimson {
   namespace dmclock {
@@ -57,7 +55,8 @@ namespace crimson {
     // S is server identifier type
     template<typename S>
     class ServiceTracker {
-      FRIEND_TEST(dmclock_client, server_erase);
+      // we don't want to include gtest.h just for FRIEND_TEST
+      friend class dmclock_client_server_erase_Test;
 
       using TimePoint = decltype(std::chrono::steady_clock::now());
       using Duration = std::chrono::milliseconds;
index 8aaad3977264847c0c3351d76eff05da7d629171..2c9940dc6c17f8af7ff6c9b990951a7672ddb547 100644 (file)
@@ -50,8 +50,6 @@
 #include "profile.h"
 #endif
 
-#include "gtest/gtest_prod.h"
-
 
 namespace crimson {
 
@@ -235,7 +233,8 @@ namespace crimson {
     // branching factor
     template<typename C, typename R, uint B>
     class PriorityQueueBase {
-      FRIEND_TEST(dmclock_server, client_idle_erase);
+      // we don't want to include gtest.h just for FRIEND_TEST
+      friend class dmclock_server_client_idle_erase_Test;
 
     public:
 
@@ -371,12 +370,12 @@ namespace crimson {
 
        // NB: because a deque is the underlying structure, this
        // operation might be expensive
-       bool remove_by_req_filter_fw(std::function<bool(const R&)> filter_accum) {
+       bool remove_by_req_filter_fw(std::function<bool(R&&)> filter_accum) {
          bool any_removed = false;
          for (auto i = requests.begin();
               i != requests.end();
               /* no inc */) {
-           if (filter_accum(*i->request)) {
+           if (filter_accum(std::move(*i->request))) {
              any_removed = true;
              i = requests.erase(i);
            } else {
@@ -388,12 +387,12 @@ namespace crimson {
 
        // NB: because a deque is the underlying structure, this
        // operation might be expensive
-       bool remove_by_req_filter_bw(std::function<bool(const R&)> filter_accum) {
+       bool remove_by_req_filter_bw(std::function<bool(R&&)> filter_accum) {
          bool any_removed = false;
          for (auto i = requests.rbegin();
               i != requests.rend();
               /* no inc */) {
-           if (filter_accum(*i->request)) {
+           if (filter_accum(std::move(*i->request))) {
              any_removed = true;
              i = decltype(i){ requests.erase(std::next(i).base()) };
            } else {
@@ -404,7 +403,7 @@ namespace crimson {
        }
 
        inline bool
-       remove_by_req_filter(std::function<bool(const R&)> filter_accum,
+       remove_by_req_filter(std::function<bool(R&&)> filter_accum,
                             bool visit_backwards) {
          if (visit_backwards) {
            return remove_by_req_filter_bw(filter_accum);
@@ -478,7 +477,7 @@ namespace crimson {
       }
 
 
-      bool remove_by_req_filter(std::function<bool(const R&)> filter_accum,
+      bool remove_by_req_filter(std::function<bool(R&&)> filter_accum,
                                bool visit_backwards = false) {
        bool any_removed = false;
        DataGuard g(data_mtx);
@@ -500,14 +499,14 @@ namespace crimson {
 
 
       // use as a default value when no accumulator is provide
-      static void request_sink(const R& req) {
+      static void request_sink(R&& req) {
        // do nothing
       }
 
 
       void remove_by_client(const C& client,
                            bool reverse = false,
-                           std::function<void (const R&)> accum = request_sink) {
+                           std::function<void (R&&)> accum = request_sink) {
        DataGuard g(data_mtx);
 
        auto i = client_map.find(client);
@@ -518,13 +517,13 @@ namespace crimson {
          for (auto j = i->second->requests.rbegin();
               j != i->second->requests.rend();
               ++j) {
-           accum(*j->request);
+           accum(std::move(*j->request));
          }
        } else {
          for (auto j = i->second->requests.begin();
               j != i->second->requests.end();
               ++j) {
-           accum(*j->request);
+           accum(std::move(*j->request));
          }
        }
 
@@ -881,7 +880,9 @@ namespace crimson {
        ClientRec& top = heap.top();
 
        RequestRef request = std::move(top.next_request().request);
+#ifndef DO_NOT_DELAY_TAG_CALC
        RequestTag tag = top.next_request().tag;
+#endif
 
        // pop request and adjust heaps
        top.pop_request();
@@ -1162,11 +1163,11 @@ namespace crimson {
       }
 
 
-      inline void add_request(const R& request,
+      inline void add_request(R&& request,
                              const C& client_id,
                              const ReqParams& req_params,
                              double addl_cost = 0.0) {
-       add_request(typename super::RequestRef(new R(request)),
+       add_request(typename super::RequestRef(new R(std::move(request))),
                    client_id,
                    req_params,
                    get_time(),
@@ -1174,11 +1175,11 @@ namespace crimson {
       }
 
 
-      inline void add_request(const R& request,
+      inline void add_request(R&& request,
                              const C& client_id,
                              double addl_cost = 0.0) {
        static const ReqParams null_req_params;
-       add_request(typename super::RequestRef(new R(request)),
+       add_request(typename super::RequestRef(new R(std::move(request))),
                    client_id,
                    null_req_params,
                    get_time(),
@@ -1187,12 +1188,12 @@ namespace crimson {
 
 
 
-      inline void add_request_time(const R& request,
+      inline void add_request_time(R&& request,
                                   const C& client_id,
                                   const ReqParams& req_params,
                                   const Time time,
                                   double addl_cost = 0.0) {
-       add_request(typename super::RequestRef(new R(request)),
+       add_request(typename super::RequestRef(new R(std::move(request))),
                    client_id,
                    req_params,
                    time,
@@ -1255,11 +1256,9 @@ namespace crimson {
        switch(next.type) {
        case super::NextReqType::none:
          return result;
-         break;
        case super::NextReqType::future:
          result.data = next.when_ready;
          return result;
-         break;
        case super::NextReqType::returning:
          // to avoid nesting, break out and let code below handle this case
          break;
@@ -1402,11 +1401,11 @@ namespace crimson {
 
     public:
 
-      inline void add_request(const R& request,
+      inline void add_request(R&& request,
                              const C& client_id,
                              const ReqParams& req_params,
                              double addl_cost = 0.0) {
-       add_request(typename super::RequestRef(new R(request)),
+       add_request(typename super::RequestRef(new R(std::move(request))),
                    client_id,
                    req_params,
                    get_time(),
index d12c6f9eb63c9a61b9ae6446386c3d27b82137d4..c46d09f39319a53e96f936abd25bb11e241c8110 100644 (file)
@@ -30,11 +30,19 @@ namespace crimson {
 
 
     inline Time get_time() {
+#if defined(__linux__)
+      struct timespec now;
+      auto result = clock_gettime(CLOCK_REALTIME, &now);
+      (void) result; // reference result in case assert is compiled out
+      assert(0 == result);
+      return now.tv_sec + (now.tv_nsec / 1.0e9);
+#else
       struct timeval now;
       auto result = gettimeofday(&now, NULL);
-      (void) result;
+      (void) result; // reference result in case assert is compiled out
       assert(0 == result);
-      return now.tv_sec + (now.tv_usec / 1000000.0);
+      return now.tv_sec + (now.tv_usec / 1.0e6);
+#endif
     }
 
     std::string format_time(const Time& time, uint modulo = 1000);
index 24da2e7320a5d02343355ee442b825a424bdbf17..aff35d5d77581ccfd918ead823efd75beaa7fa4c 100644 (file)
@@ -1,3 +1,8 @@
+INCLUDE (CheckIncludeFiles)
+CHECK_INCLUDE_FILES("sys/prctl.h" HAVE_SYS_PRCTL_H)
+CONFIGURE_FILE(dmtest-config.h.in dmtest-config.h)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
 include_directories(../src)
 include_directories(../support/src)
 include_directories(../sim/src)
@@ -15,6 +20,7 @@ set_source_files_properties(${core_srcs} ${test_srcs}
   )
 
 add_executable(dmclock-tests ${test_srcs} ${support_srcs})
+target_include_directories(dmclock-tests PRIVATE "${GTEST_INCLUDE_DIRS}")
 
 if (TARGET gtest AND TARGET gtest_main)
   add_dependencies(dmclock-tests gtest gtest_main)
diff --git a/ceph/src/dmclock/test/dmcPrCtl.h b/ceph/src/dmclock/test/dmcPrCtl.h
new file mode 100644 (file)
index 0000000..39f9829
--- /dev/null
@@ -0,0 +1,42 @@
+// essentially the same as ceph's PrCtl.h, copied into the dmclock library
+
+#include <dmtest-config.h>
+#ifdef HAVE_SYS_PRCTL_H
+#include <iostream>
+#include <sys/prctl.h>
+#include <errno.h>
+
+struct PrCtl {
+  int saved_state = -1;
+  int set_dumpable(int new_state) {
+    int r = prctl(PR_SET_DUMPABLE, new_state);
+    if (r) {
+      r = -errno;
+      std::cerr << "warning: unable to " << (new_state ? "set" : "unset")
+                << " dumpable flag: " << strerror(r)
+                << std::endl;
+    }
+    return r;
+  }
+  PrCtl(int new_state = 0) {
+    int r = prctl(PR_GET_DUMPABLE);
+    if (r == -1) {
+      r = errno;
+      std::cerr << "warning: unable to get dumpable flag: " << strerror(r)
+                << std::endl;
+    } else if (r != new_state) {
+      if (!set_dumpable(new_state)) {
+        saved_state = r;
+      }
+    }
+  }
+  ~PrCtl() {
+    if (saved_state < 0) {
+      return;
+    }
+    set_dumpable(saved_state);
+  }
+};
+#else
+struct PrCtl {};
+#endif
diff --git a/ceph/src/dmclock/test/dmtest-config.h.in b/ceph/src/dmclock/test/dmtest-config.h.in
new file mode 100644 (file)
index 0000000..ecd2044
--- /dev/null
@@ -0,0 +1,2 @@
+/* Define to 1 if you have the <sys/prctl.h> header file. */
+#cmakedefine HAVE_SYS_PRCTL_H 1
index 4555e377323e9ff87af6341efb47740ea60207cc..95def410fb69f44ded3f78b6d8962bdf170506b9 100644 (file)
@@ -16,6 +16,9 @@
 #include "dmclock_util.h"
 #include "gtest/gtest.h"
 
+// process control to prevent core dumps during gtest death tests
+#include "dmcPrCtl.h"
+
 
 namespace dmc = crimson::dmclock;
 
@@ -61,17 +64,19 @@ namespace crimson {
       };
 
       QueueRef pq(new Queue(client_info_f, false));
-      Request req;
       ReqParams req_params(1,1);
 
-      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(req, client1, req_params),
+      // Disable coredumps
+      PrCtl unset_dumpable;
+
+      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(Request{}, client1, req_params),
                                "Assertion.*reservation.*max_tag.*"
                                "proportion.*max_tag") <<
        "we should fail if a client tries to generate a reservation tag "
        "where reservation and proportion are both 0";
 
 
-      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(req, client2, req_params),
+      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(Request{}, client2, req_params),
                                "Assertion.*reservation.*max_tag.*"
                                "proportion.*max_tag") <<
        "we should fail if a client tries to generate a reservation tag "
@@ -548,14 +553,13 @@ namespace crimson {
 
       pq = QueueRef(new Queue(client_info_f, false));
 
-      Request req;
       ReqParams req_params(1,1);
 
       auto now = dmc::get_time();
 
       for (int i = 0; i < 5; ++i) {
-       pq->add_request(req, client1, req_params);
-       pq->add_request(req, client2, req_params);
+       pq->add_request(Request{}, client1, req_params);
+       pq->add_request(Request{}, client2, req_params);
        now += 0.0001;
       }
 
@@ -602,15 +606,14 @@ namespace crimson {
 
       QueueRef pq(new Queue(client_info_f, false));
 
-      Request req;
       ReqParams req_params(1,1);
 
       // make sure all times are well before now
       auto old_time = dmc::get_time() - 100.0;
 
       for (int i = 0; i < 5; ++i) {
-       pq->add_request_time(req, client1, req_params, old_time);
-       pq->add_request_time(req, client2, req_params, old_time);
+       pq->add_request_time(Request{}, client1, req_params, old_time);
+       pq->add_request_time(Request{}, client2, req_params, old_time);
        old_time += 0.001;
       }
 
@@ -661,7 +664,6 @@ namespace crimson {
 
       QueueRef pq(new Queue(client_info_f, false));
 
-      Request req;
       ReqParams req_params(1,1);
 
       // make sure all times are well before now
@@ -669,8 +671,8 @@ namespace crimson {
 
       // add six requests; for same client reservations spaced one apart
       for (int i = 0; i < 3; ++i) {
-       pq->add_request_time(req, client1, req_params, start_time);
-       pq->add_request_time(req, client2, req_params, start_time);
+       pq->add_request_time(Request{}, client1, req_params, start_time);
+       pq->add_request_time(Request{}, client2, req_params, start_time);
       }
 
       Queue::PullReq pr = pq->pull_request(start_time + 0.5);
@@ -744,13 +746,12 @@ namespace crimson {
 
       QueueRef pq(new Queue(client_info_f, false));
 
-      Request req;
       ReqParams req_params(1,1);
 
       // make sure all times are well before now
       auto now = dmc::get_time();
 
-      pq->add_request_time(req, client1, req_params, now + 100);
+      pq->add_request_time(Request{}, client1, req_params, now + 100);
       Queue::PullReq pr = pq->pull_request(now);
 
       EXPECT_EQ(Queue::NextReqType::future, pr.type);
@@ -776,13 +777,12 @@ namespace crimson {
 
       QueueRef pq(new Queue(client_info_f, true));
 
-      Request req;
       ReqParams req_params(1,1);
 
       // make sure all times are well before now
       auto now = dmc::get_time();
 
-      pq->add_request_time(req, client1, req_params, now + 100);
+      pq->add_request_time(Request{}, client1, req_params, now + 100);
       Queue::PullReq pr = pq->pull_request(now);
 
       EXPECT_EQ(Queue::NextReqType::returning, pr.type);
@@ -808,13 +808,12 @@ namespace crimson {
 
       QueueRef pq(new Queue(client_info_f, true));
 
-      Request req;
       ReqParams req_params(1,1);
 
       // make sure all times are well before now
       auto now = dmc::get_time();
 
-      pq->add_request_time(req, client1, req_params, now + 100);
+      pq->add_request_time(Request{}, client1, req_params, now + 100);
       Queue::PullReq pr = pq->pull_request(now);
 
       EXPECT_EQ(Queue::NextReqType::returning, pr.type);
index dfb31a1007b8e146cfd642a9262b4b0fc8b10633..45cef34eb47ced86c5026275777bc07b3020b4ff 100644 (file)
@@ -1242,6 +1242,17 @@ int ceph_get_pool_name(struct ceph_mount_info *cmount, int pool, char *buf, size
  */
 int ceph_get_path_pool_name(struct ceph_mount_info *cmount, const char *path, char *buf, size_t buflen);
 
+/**
+ * Get the default pool name of cephfs
+ * Write the name of the default pool to the buffer. If buflen is 0, return
+ * a suggested length for the buffer.
+ * @param cmount the ceph mount handle to use.
+ * @param buf buffer to store the name in
+ * @param buflen size of the buffer
+ * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough.
+ */
+int ceph_get_default_data_pool_name(struct ceph_mount_info *cmount, char *buf, size_t buflen);
+
 /**
  * Get the file layout from an open file descriptor.
  *
index c6343fe1bcd14765725d98ccd573d9c715d17a0d..36dede85a4dfa12bc2cd4a07bfaba2706d67b5c1 100644 (file)
@@ -45,7 +45,7 @@ struct denc_traits<inodeno_t> {
 };
 
 inline ostream& operator<<(ostream& out, const inodeno_t& ino) {
-  return out << hex << ino.val << dec;
+  return out << hex << "0x" << ino.val << dec;
 }
 
 namespace std {
index af782e6e457f7348e2180e424f2e8944204cc686..f266c76fcab2d9095c8d20853d567b3cf5b89a71 100644 (file)
@@ -142,26 +142,6 @@ typedef struct {
   bool up;
 } rbd_mirror_image_status_t;
 
-typedef enum {
-  GROUP_IMAGE_STATE_ATTACHED,
-  GROUP_IMAGE_STATE_INCOMPLETE
-} rbd_group_image_state_t;
-
-typedef struct {
-  char *name;
-  int64_t pool;
-} rbd_group_image_spec_t;
-
-typedef struct {
-  rbd_group_image_spec_t spec;
-  rbd_group_image_state_t state;
-} rbd_group_image_status_t;
-
-typedef struct {
-  char *name;
-  int64_t pool;
-} rbd_group_spec_t;
-
 typedef enum {
   RBD_LOCK_MODE_EXCLUSIVE = 0,
   RBD_LOCK_MODE_SHARED = 1,
@@ -820,11 +800,6 @@ CEPH_RBD_API int rbd_aio_mirror_image_get_status(rbd_image_t image,
                                                  size_t status_size,
                                                  rbd_completion_t c);
 
-// RBD consistency groups support functions
-CEPH_RBD_API int rbd_group_create(rados_ioctx_t p, const char *name);
-CEPH_RBD_API int rbd_group_remove(rados_ioctx_t p, const char *name);
-CEPH_RBD_API int rbd_group_list(rados_ioctx_t p, char *names, size_t *size);
-
 /**
  * Register an image metadata change watcher.
  *
@@ -846,30 +821,6 @@ CEPH_RBD_API int rbd_update_watch(rbd_image_t image, uint64_t *handle,
  */
 CEPH_RBD_API int rbd_update_unwatch(rbd_image_t image, uint64_t handle);
 
-
-CEPH_RBD_API int rbd_group_image_add(
-                               rados_ioctx_t group_p, const char *group_name,
-                               rados_ioctx_t image_p, const char *image_name);
-CEPH_RBD_API int rbd_group_image_remove(
-                               rados_ioctx_t group_p, const char *group_name,
-                               rados_ioctx_t image_p, const char *image_name);
-CEPH_RBD_API int rbd_group_image_remove_by_id(
-                               rados_ioctx_t group_p, const char *group_name,
-                               rados_ioctx_t image_p, const char *image_id);
-CEPH_RBD_API int rbd_group_image_list(
-                                 rados_ioctx_t group_p, const char *group_name,
-                                 rbd_group_image_status_t *images,
-                                 size_t *image_size);
-CEPH_RBD_API int rbd_image_get_group(rados_ioctx_t image_p,
-                                    const char *image_name,
-                                    rbd_group_spec_t *group_spec);
-CEPH_RBD_API void rbd_group_spec_cleanup(rbd_group_spec_t *group_spec);
-CEPH_RBD_API void rbd_group_image_status_cleanup(
-                                               rbd_group_image_status_t *image
-                                               );
-CEPH_RBD_API void rbd_group_image_status_list_cleanup(
-                                             rbd_group_image_status_t *images,
-                                             size_t len);
 #ifdef __cplusplus
 }
 #endif
index bbf4270bc245d7b13bd1eb4a5700acc3eccf7054..7220514e05cf0e521ae1273138a11fd0545afe52 100644 (file)
@@ -71,19 +71,6 @@ namespace librbd {
     bool up;
   } mirror_image_status_t;
 
-  typedef rbd_group_image_state_t group_image_state_t;
-
-  typedef struct {
-    std::string name;
-    int64_t pool;
-    group_image_state_t state;
-  } group_image_status_t;
-
-  typedef struct {
-    std::string name;
-    int64_t pool;
-  } group_spec_t;
-
   typedef rbd_image_info_t image_info_t;
 
   class CEPH_RBD_API ProgressContext
@@ -185,20 +172,6 @@ public:
   int mirror_image_status_summary(IoCtx& io_ctx,
       std::map<mirror_image_status_state_t, int> *states);
 
-  // RBD consistency groups support functions
-  int group_create(IoCtx& io_ctx, const char *group_name);
-  int group_remove(IoCtx& io_ctx, const char *group_name);
-  int group_list(IoCtx& io_ctx, std::vector<std::string> *names);
-
-  int group_image_add(IoCtx& io_ctx, const char *group_name,
-                     IoCtx& image_io_ctx, const char *image_name);
-  int group_image_remove(IoCtx& io_ctx, const char *group_name,
-                        IoCtx& image_io_ctx, const char *image_name);
-  int group_image_remove_by_id(IoCtx& io_ctx, const char *group_name,
-                               IoCtx& image_io_ctx, const char *image_id);
-  int group_image_list(IoCtx& io_ctx, const char *group_name,
-                      std::vector<group_image_status_t> *images);
-
 private:
   /* We don't allow assignment or copying */
   RBD(const RBD& rhs);
@@ -259,7 +232,6 @@ public:
                    std::string *parent_id, std::string *parent_snapname);
   int old_format(uint8_t *old);
   int size(uint64_t *size);
-  int get_group(group_spec_t *group_spec);
   int features(uint64_t *features);
   int update_features(uint64_t features, bool enabled);
   int overlap(uint64_t *overlap);
index d1267142c3ecfea8bf76738c80617752e428e248..786a6ece2f5888df39b9bc06d29256ee5163c3c5 100644 (file)
@@ -938,6 +938,22 @@ public class CephMount {
   }
 
   private static native String native_ceph_get_file_pool_name(long mountp, int fd);
+  
+  /**
+   * Get the default data pool of cephfs.
+   * 
+   * @return The pool name.
+   */ 
+  public String get_default_data_pool_name() {
+    rlock.lock();
+    try {
+      return native_ceph_get_default_data_pool_name(instance_ptr);
+    } finally {
+      rlock.unlock();
+    }
+  }
+  
+  private static native String native_ceph_get_default_data_pool_name(long mountp);
 
   /**
    * Get the replication of a file.
index 975a092dace5a0429b3423632adedba5570306cb..080ec3cbda4ef2688ee77605b8634d40cfda671c 100644 (file)
@@ -2580,6 +2580,51 @@ out:
        return pool;
 }
 
+/**
+ * Class: com_ceph_fs_CephMount
+ * Method: native_ceph_get_default_data_pool_name
+ * Signature: (J)Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_com_ceph_fs_CephMount_native_1ceph_1get_1default_1data_1pool_1name
+  (JNIEnv *env, jclass clz, jlong j_mntp)
+{
+       struct ceph_mount_info *cmount = get_ceph_mount(j_mntp);
+       CephContext *cct = ceph_get_mount_context(cmount);
+       jstring pool = NULL;
+       int ret, buflen = 0;
+       char *buf = NULL;
+        
+       CHECK_MOUNTED(cmount, NULL);
+        
+       ldout(cct, 10) << "jni: get_default_data_pool_name" << dendl;
+
+       ret = ceph_get_default_data_pool_name(cmount, NULL, 0);
+       if (ret < 0)
+               goto out;
+       buflen = ret;
+       buf = new (std::nothrow) char[buflen+1]; /* +1 for '\0' */
+       if (!buf) {
+               cephThrowOutOfMemory(env, "head allocation failed");
+               goto out;
+       }
+       memset(buf, 0, (buflen+1)*sizeof(*buf));
+       ret = ceph_get_default_data_pool_name(cmount, buf, buflen);
+       
+       ldout(cct, 10) << "jni: get_default_data_pool_name: ret " << ret << dendl;
+       
+       if (ret < 0)
+               handle_error(env, ret);
+       else
+               pool = env->NewStringUTF(buf);
+
+out:
+       if (buf)
+       delete [] buf;
+
+       return pool;        
+}
+
+
 /*
  * Class:     com_ceph_fs_CephMount
  * Method:    native_ceph_localize_reads
index bfacd1bae51b1af157ece04f693b1efdcaa05336..74652da147280c27ee48002268d178ca2b36dd11 100644 (file)
@@ -1074,6 +1074,21 @@ extern "C" int ceph_get_path_pool_name(struct ceph_mount_info *cmount, const cha
   return name.length();
 }
 
+extern "C" int ceph_get_default_data_pool_name(struct ceph_mount_info *cmount, char *buf, size_t len)
+{
+  if (!cmount->is_mounted())
+    return -ENOTCONN;
+  int64_t pool_id = cmount->get_client()->get_default_pool_id();
+  string name = cmount->get_client()->get_pool_name(pool_id);
+  if (len == 0)
+    return name.length();
+  if (name.length() > len)
+    return -ERANGE;
+  strncpy(buf, name.c_str(), len);
+  return name.length(); 
+}
+
 extern "C" int ceph_get_file_layout(struct ceph_mount_info *cmount, int fh, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool)
 {
   file_layout_t l;
index 7131aa838acd413a646b298b5e63f61ad29801b6..904fe57c4b1df43ebe4d74855a606b63475297e0 100644 (file)
@@ -641,7 +641,8 @@ int librados::RadosClient::get_fs_stats(ceph_statfs& stats)
   int ret = 0;
 
   lock.Lock();
-  objecter->get_fs_stats(stats, new C_SafeCond(&mylock, &cond, &done, &ret));
+  objecter->get_fs_stats(stats, boost::optional<int64_t> (),
+                         new C_SafeCond(&mylock, &cond, &done, &ret));
   lock.Unlock();
 
   mylock.Lock();
index 55356e0d7e53477f51ce8b4719ac3e9fcf8468c5..8d34ce51ecd5633aad6e1b02bb2a759a7e9f6c95 100644 (file)
@@ -274,7 +274,7 @@ WriteCompletionData::WriteCompletionData
  librados::AioCompletionImpl *userCompletion,
  int n) :
   CompletionData(striper, soid, lockCookie, userCompletion, n), m_safe(0),
-  m_unlockCompletion(0) {
+  m_unlockCompletion(0), m_writeRc(0) {
   if (userCompletion) {
     m_safe = new librados::IoCtxImpl::C_aio_Complete(userCompletion);
   }
index e596bd5f499721c83196e55f19ccd28daa93e4f1..1b04ec91ab81f5b0051f99cb28d2cec0b11a3d77 100644 (file)
@@ -22,7 +22,6 @@ set(librbd_internal_srcs
   Utils.cc
   Watcher.cc
   api/DiffIterate.cc
-  api/Group.cc
   api/Image.cc
   api/Mirror.cc
   cache/ImageWriteback.cc
index af0cb1220d465554b35e4d252cd1ad5ebf7f6f3c..3fc256d72ceebf9f685604750b59d85d706e3091 100644 (file)
@@ -1428,6 +1428,11 @@ int Operations<I>::metadata_remove(const std::string &key) {
     return -EROFS;
   }
 
+  std::string value;
+  r = cls_client::metadata_get(&m_image_ctx.md_ctx, m_image_ctx.header_oid, key, &value);
+  if(r < 0)
+    return r;
+
   {
     RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
     C_SaferCond metadata_ctx;
index f75d65c43def61ec6278cbe0e5d92ad241b102ae..19c634479433988903960f0edfa4c5df019df3e0 100644 (file)
@@ -97,6 +97,11 @@ struct C_AsyncCallback : public Context {
 
 std::string generate_image_id(librados::IoCtx &ioctx);
 
+template <typename T>
+inline std::string generate_image_id(librados::IoCtx &ioctx) {
+  return generate_image_id(ioctx);
+}
+
 const std::string group_header_name(const std::string &group_id);
 const std::string id_obj_name(const std::string &name);
 const std::string header_name(const std::string &image_id);
diff --git a/ceph/src/librbd/api/Group.cc b/ceph/src/librbd/api/Group.cc
deleted file mode 100644 (file)
index 090a980..0000000
+++ /dev/null
@@ -1,419 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "librbd/api/Group.h"
-#include "common/errno.h"
-#include "librbd/ImageState.h"
-#include "librbd/Utils.h"
-#include "librbd/io/AioCompletion.h"
-
-#define dout_subsys ceph_subsys_rbd
-#undef dout_prefix
-#define dout_prefix *_dout << "librbd::api::Group: " << __func__ << ": "
-
-using std::map;
-using std::pair;
-using std::set;
-using std::string;
-using std::vector;
-// list binds to list() here, so std::list is explicitly used below
-
-using ceph::bufferlist;
-using librados::snap_t;
-using librados::IoCtx;
-using librados::Rados;
-
-namespace librbd {
-namespace api {
-
-// Consistency groups functions
-
-template <typename I>
-int Group<I>::create(librados::IoCtx& io_ctx, const char *group_name)
-{
-  CephContext *cct = (CephContext *)io_ctx.cct();
-
-  Rados rados(io_ctx);
-  uint64_t bid = rados.get_instance_id();
-
-  uint32_t extra = rand() % 0xFFFFFFFF;
-  ostringstream bid_ss;
-  bid_ss << std::hex << bid << std::hex << extra;
-  string id = bid_ss.str();
-
-  ldout(cct, 2) << "adding consistency group to directory..." << dendl;
-
-  int r = cls_client::group_dir_add(&io_ctx, RBD_GROUP_DIRECTORY, group_name,
-                                    id);
-  if (r < 0) {
-    lderr(cct) << "error adding consistency group to directory: "
-              << cpp_strerror(r)
-              << dendl;
-    return r;
-  }
-  string header_oid = util::group_header_name(id);
-
-  r = cls_client::group_create(&io_ctx, header_oid);
-  if (r < 0) {
-    lderr(cct) << "error writing header: " << cpp_strerror(r) << dendl;
-    goto err_remove_from_dir;
-  }
-
-  return 0;
-
-err_remove_from_dir:
-  int remove_r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY,
-                                             group_name, id);
-  if (remove_r < 0) {
-    lderr(cct) << "error cleaning up consistency group from rbd_directory "
-              << "object after creation failed: " << cpp_strerror(remove_r)
-              << dendl;
-  }
-
-  return r;
-}
-
-template <typename I>
-int Group<I>::remove(librados::IoCtx& io_ctx, const char *group_name)
-{
-  CephContext *cct((CephContext *)io_ctx.cct());
-  ldout(cct, 20) << "io_ctx=" << &io_ctx << " " << group_name << dendl;
-
-  std::vector<group_image_status_t> images;
-  int r = image_list(io_ctx, group_name, &images);
-  if (r < 0 && r != -ENOENT) {
-    lderr(cct) << "error listing group images" << dendl;
-    return r;
-  }
-
-  for (auto i : images) {
-    librados::Rados rados(io_ctx);
-    IoCtx image_ioctx;
-    rados.ioctx_create2(i.pool, image_ioctx);
-    r = image_remove(io_ctx, group_name, image_ioctx, i.name.c_str());
-    if (r < 0 && r != -ENOENT) {
-      lderr(cct) << "error removing image from a group" << dendl;
-      return r;
-    }
-  }
-
-  std::string group_id;
-  r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY,
-                            std::string(group_name), &group_id);
-  if (r < 0 && r != -ENOENT) {
-    lderr(cct) << "error getting id of group" << dendl;
-    return r;
-  }
-
-  string header_oid = util::group_header_name(group_id);
-
-  r = io_ctx.remove(header_oid);
-  if (r < 0 && r != -ENOENT) {
-    lderr(cct) << "error removing header: " << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  r = cls_client::group_dir_remove(&io_ctx, RBD_GROUP_DIRECTORY, group_name,
-                                   group_id);
-  if (r < 0 && r != -ENOENT) {
-    lderr(cct) << "error removing group from directory" << dendl;
-    return r;
-  }
-
-  return 0;
-}
-
-template <typename I>
-int Group<I>::list(IoCtx& io_ctx, vector<string> *names)
-{
-  CephContext *cct = (CephContext *)io_ctx.cct();
-  ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl;
-
-  int max_read = 1024;
-  string last_read = "";
-  int r;
-  do {
-    map<string, string> groups;
-    r = cls_client::group_dir_list(&io_ctx, RBD_GROUP_DIRECTORY, last_read,
-                                   max_read, &groups);
-    if (r < 0) {
-      if (r != -ENOENT) {
-        lderr(cct) << "error listing group in directory: "
-                   << cpp_strerror(r) << dendl;
-      } else {
-        r = 0;
-      }
-      return r;
-    }
-    for (pair<string, string> group : groups) {
-      names->push_back(group.first);
-    }
-    if (!groups.empty()) {
-      last_read = groups.rbegin()->first;
-    }
-    r = groups.size();
-  } while (r == max_read);
-
-  return 0;
-}
-
-template <typename I>
-int Group<I>::image_add(librados::IoCtx& group_ioctx, const char *group_name,
-                   librados::IoCtx& image_ioctx, const char *image_name)
-{
-  CephContext *cct = (CephContext *)group_ioctx.cct();
-  ldout(cct, 20) << "io_ctx=" << &group_ioctx
-                << " group name " << group_name << " image "
-                << &image_ioctx << " name " << image_name << dendl;
-
-  string group_id;
-
-  int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
-                                 &group_id);
-  if (r < 0) {
-    lderr(cct) << "error reading consistency group id object: "
-              << cpp_strerror(r)
-              << dendl;
-    return r;
-  }
-  string group_header_oid = util::group_header_name(group_id);
-
-
-  ldout(cct, 20) << "adding image to group name " << group_name
-                << " group id " << group_header_oid << dendl;
-
-  string image_id;
-
-  r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name,
-                             &image_id);
-  if (r < 0) {
-    lderr(cct) << "error reading image id object: "
-              << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  string image_header_oid = util::header_name(image_id);
-
-  ldout(cct, 20) << "adding image " << image_name
-                << " image id " << image_header_oid << dendl;
-
-  cls::rbd::GroupImageStatus incomplete_st(
-    image_id, image_ioctx.get_id(),
-    cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE);
-  cls::rbd::GroupImageStatus attached_st(
-    image_id, image_ioctx.get_id(), cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED);
-
-  r = cls_client::group_image_set(&group_ioctx, group_header_oid,
-                                 incomplete_st);
-
-  cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id());
-
-  if (r < 0) {
-    lderr(cct) << "error adding image reference to consistency group: "
-              << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  r = cls_client::image_add_group(&image_ioctx, image_header_oid, group_spec);
-  if (r < 0) {
-    lderr(cct) << "error adding group reference to image: "
-              << cpp_strerror(-r) << dendl;
-    cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id());
-    cls_client::group_image_remove(&group_ioctx, group_header_oid, spec);
-    // Ignore errors in the clean up procedure.
-    return r;
-  }
-
-  r = cls_client::group_image_set(&group_ioctx, group_header_oid,
-                                 attached_st);
-
-  return r;
-}
-
-template <typename I>
-int Group<I>::image_remove(librados::IoCtx& group_ioctx, const char *group_name,
-                          librados::IoCtx& image_ioctx, const char *image_name)
-{
-  CephContext *cct = (CephContext *)group_ioctx.cct();
-  ldout(cct, 20) << "io_ctx=" << &group_ioctx
-                << " group name " << group_name << " image "
-                << &image_ioctx << " name " << image_name << dendl;
-
-  string image_id;
-  int r = cls_client::dir_get_id(&image_ioctx, RBD_DIRECTORY, image_name,
-                                 &image_id);
-  if (r < 0) {
-    lderr(cct) << "error reading image id object: "
-               << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  return Group<I>::image_remove_by_id(group_ioctx, group_name, image_ioctx,
-                                      image_id.c_str());
-}
-
-template <typename I>
-int Group<I>::image_remove_by_id(librados::IoCtx& group_ioctx,
-                                 const char *group_name,
-                                 librados::IoCtx& image_ioctx,
-                                 const char *image_id)
-{
-  CephContext *cct = (CephContext *)group_ioctx.cct();
-  ldout(cct, 20) << "group_remove_image_by_id " << &group_ioctx
-                 << " group name " << group_name << " image "
-                 << &image_ioctx << " id " << image_id << dendl;
-
-  string group_id;
-
-  int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY, group_name,
-                                 &group_id);
-  if (r < 0) {
-    lderr(cct) << "error reading consistency group id object: "
-              << cpp_strerror(r)
-              << dendl;
-    return r;
-  }
-  string group_header_oid = util::group_header_name(group_id);
-
-  ldout(cct, 20) << "adding image to group name " << group_name
-                << " group id " << group_header_oid << dendl;
-
-  string image_header_oid = util::header_name(image_id);
-
-  ldout(cct, 20) << "removing " << " image id " << image_header_oid << dendl;
-
-  cls::rbd::GroupSpec group_spec(group_id, group_ioctx.get_id());
-
-  cls::rbd::GroupImageStatus incomplete_st(
-    image_id, image_ioctx.get_id(),
-    cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE);
-
-  cls::rbd::GroupImageSpec spec(image_id, image_ioctx.get_id());
-
-  r = cls_client::group_image_set(&group_ioctx, group_header_oid,
-                                 incomplete_st);
-
-  if (r < 0) {
-    lderr(cct) << "couldn't put image into removing state: "
-              << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  r = cls_client::image_remove_group(&image_ioctx, image_header_oid,
-                                    group_spec);
-  if ((r < 0) && (r != -ENOENT)) {
-    lderr(cct) << "couldn't remove group reference from image"
-              << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  r = cls_client::group_image_remove(&group_ioctx, group_header_oid, spec);
-  if (r < 0) {
-    lderr(cct) << "couldn't remove image from group"
-              << cpp_strerror(-r) << dendl;
-    return r;
-  }
-
-  return 0;
-}
-
-template <typename I>
-int Group<I>::image_list(librados::IoCtx& group_ioctx,
-                    const char *group_name,
-                    std::vector<group_image_status_t> *images)
-{
-  CephContext *cct = (CephContext *)group_ioctx.cct();
-  ldout(cct, 20) << "io_ctx=" << &group_ioctx
-                << " group name " << group_name << dendl;
-
-  string group_id;
-
-  int r = cls_client::dir_get_id(&group_ioctx, RBD_GROUP_DIRECTORY,
-                                group_name, &group_id);
-  if (r < 0) {
-    lderr(cct) << "error reading consistency group id object: "
-              << cpp_strerror(r)
-              << dendl;
-    return r;
-  }
-  string group_header_oid = util::group_header_name(group_id);
-
-  ldout(cct, 20) << "listing images in group name "
-                << group_name << " group id " << group_header_oid << dendl;
-
-  std::vector<cls::rbd::GroupImageStatus> image_ids;
-
-  const int max_read = 1024;
-  do {
-    std::vector<cls::rbd::GroupImageStatus> image_ids_page;
-    cls::rbd::GroupImageSpec start_last;
-
-    r = cls_client::group_image_list(&group_ioctx, group_header_oid,
-                                     start_last, max_read, &image_ids_page);
-
-    if (r < 0) {
-      lderr(cct) << "error reading image list from consistency group: "
-       << cpp_strerror(-r) << dendl;
-      return r;
-    }
-    image_ids.insert(image_ids.end(),
-                    image_ids_page.begin(), image_ids_page.end());
-
-    if (image_ids_page.size() > 0)
-      start_last = image_ids_page.rbegin()->spec;
-
-    r = image_ids_page.size();
-  } while (r == max_read);
-
-  for (auto i : image_ids) {
-    librados::Rados rados(group_ioctx);
-    IoCtx ioctx;
-    rados.ioctx_create2(i.spec.pool_id, ioctx);
-    std::string image_name;
-    r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY,
-                                i.spec.image_id, &image_name);
-    if (r < 0) {
-      return r;
-    }
-
-    images->push_back(
-       group_image_status_t {
-          image_name,
-          i.spec.pool_id,
-          static_cast<group_image_state_t>(i.state)});
-  }
-
-  return 0;
-}
-
-template <typename I>
-int Group<I>::image_get_group(I *ictx, group_spec_t *group_spec)
-{
-  int r = ictx->state->refresh_if_required();
-  if (r < 0)
-    return r;
-
-  if (-1 != ictx->group_spec.pool_id) {
-    librados::Rados rados(ictx->md_ctx);
-    IoCtx ioctx;
-    rados.ioctx_create2(ictx->group_spec.pool_id, ioctx);
-
-    std::string group_name;
-    r = cls_client::dir_get_name(&ioctx, RBD_GROUP_DIRECTORY,
-                                ictx->group_spec.group_id, &group_name);
-    if (r < 0)
-      return r;
-    group_spec->pool = ictx->group_spec.pool_id;
-    group_spec->name = group_name;
-  } else {
-    group_spec->pool = -1;
-    group_spec->name = "";
-  }
-
-  return 0;
-}
-
-} // namespace api
-} // namespace librbd
-
-template class librbd::api::Group<librbd::ImageCtx>;
diff --git a/ceph/src/librbd/api/Group.h b/ceph/src/librbd/api/Group.h
deleted file mode 100644 (file)
index bcc772f..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#ifndef CEPH_LIBRBD_API_GROUP_H
-#define CEPH_LIBRBD_API_GROUP_H
-
-#include "include/rbd/librbd.hpp"
-#include <string>
-#include <vector>
-
-namespace librados { struct IoCtx; }
-
-namespace librbd {
-
-struct ImageCtx;
-
-namespace api {
-
-template <typename ImageCtxT = librbd::ImageCtx>
-struct Group {
-
-  static int create(librados::IoCtx& io_ctx, const char *imgname);
-  static int remove(librados::IoCtx& io_ctx, const char *group_name);
-  static int list(librados::IoCtx& io_ctx, std::vector<std::string> *names);
-
-  static int image_add(librados::IoCtx& group_ioctx, const char *group_name,
-                      librados::IoCtx& image_ioctx, const char *image_name);
-  static int image_remove(librados::IoCtx& group_ioctx, const char *group_name,
-                         librados::IoCtx& image_ioctx, const char *image_name);
-  static int image_remove_by_id(librados::IoCtx& group_ioctx,
-                                const char *group_name,
-                                librados::IoCtx& image_ioctx,
-                                const char *image_id);
-  static int image_list(librados::IoCtx& group_ioctx, const char *group_name,
-                       std::vector<group_image_status_t> *images);
-
-  static int image_get_group(ImageCtxT *ictx, group_spec_t *group_spec);
-
-};
-
-} // namespace api
-} // namespace librbd
-
-extern template class librbd::api::Group<librbd::ImageCtx>;
-
-#endif // CEPH_LIBRBD_API_GROUP_H
index 412d79c41b47bf6d636dbc26c8e0dd453e56019c..3753b94f85f95f160b681466b919b6686da34f7a 100644 (file)
@@ -316,7 +316,7 @@ template <typename I>
 void CloneRequest<I>::handle_refresh(int r) {
   ldout(m_cct, 20) << this << " " << __func__ << " r=" << r << dendl;
 
-  bool snap_protected;
+  bool snap_protected = false;
   if (r == 0) {
     m_p_imctx->snap_lock.get_read();
     r = m_p_imctx->is_snap_protected(m_p_imctx->snap_id, &snap_protected);
index 542dd28ab38d1c9a079299920fcd85bf07676358..31889b822a6d9e3622a2004c833e3641f5ea98d3 100644 (file)
@@ -28,7 +28,6 @@
 #include "librbd/internal.h"
 #include "librbd/Operations.h"
 #include "librbd/api/DiffIterate.h"
-#include "librbd/api/Group.h"
 #include "librbd/api/Mirror.h"
 #include "librbd/io/AioCompletion.h"
 #include "librbd/io/ImageRequestWQ.h"
@@ -104,7 +103,6 @@ struct C_AioCompletion : public Context {
 struct C_OpenComplete : public C_AioCompletion {
   librbd::ImageCtx *ictx;
   void **ictxp;
-  bool reopen;
   C_OpenComplete(librbd::ImageCtx *ictx, librbd::io::AioCompletion* comp,
                 void **ictxp)
     : C_AioCompletion(ictx, librbd::io::AIO_TYPE_OPEN, comp),
@@ -154,19 +152,6 @@ struct C_UpdateWatchCB : public librbd::UpdateWatchCtx {
   }
 };
 
-void group_image_status_cpp_to_c(const librbd::group_image_status_t &cpp_status,
-                                rbd_group_image_status_t *c_status) {
-  c_status->spec.name = strdup(cpp_status.name.c_str());
-  c_status->spec.pool = cpp_status.pool;
-  c_status->state = cpp_status.state;
-}
-
-void group_spec_cpp_to_c(const librbd::group_spec_t &cpp_spec,
-                        rbd_group_spec_t *c_spec) {
-  c_spec->name = strdup(cpp_spec.name.c_str());
-  c_spec->pool = cpp_spec.pool;
-}
-
 void mirror_image_info_cpp_to_c(const librbd::mirror_image_info_t &cpp_info,
                                rbd_mirror_image_info_t *c_info) {
   c_info->global_id = strdup(cpp_info.global_id.c_str());
@@ -676,95 +661,6 @@ namespace librbd {
     return librbd::api::Mirror<>::image_status_summary(io_ctx, states);
   }
 
-  int RBD::group_create(IoCtx& io_ctx, const char *group_name)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
-    tracepoint(librbd, group_create_enter, io_ctx.get_pool_name().c_str(),
-              io_ctx.get_id(), group_name);
-    int r = librbd::api::Group<>::create(io_ctx, group_name);
-    tracepoint(librbd, group_create_exit, r);
-    return r;
-  }
-
-  int RBD::group_remove(IoCtx& io_ctx, const char *group_name)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
-    tracepoint(librbd, group_remove_enter, io_ctx.get_pool_name().c_str(),
-              io_ctx.get_id(), group_name);
-    int r = librbd::api::Group<>::remove(io_ctx, group_name);
-    tracepoint(librbd, group_remove_exit, r);
-    return r;
-  }
-
-  int RBD::group_list(IoCtx& io_ctx, vector<string> *names)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
-    tracepoint(librbd, group_list_enter, io_ctx.get_pool_name().c_str(),
-              io_ctx.get_id());
-
-    int r = librbd::api::Group<>::list(io_ctx, names);
-    if (r >= 0) {
-      for (auto itr : *names) {
-       tracepoint(librbd, group_list_entry, itr.c_str());
-      }
-    }
-    tracepoint(librbd, group_list_exit, r);
-    return r;
-  }
-
-  int RBD::group_image_add(IoCtx& group_ioctx, const char *group_name,
-                           IoCtx& image_ioctx, const char *image_name)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-    tracepoint(librbd, group_image_add_enter, group_ioctx.get_pool_name().c_str(),
-              group_ioctx.get_id(), group_name, image_ioctx.get_pool_name().c_str(),
-              image_ioctx.get_id(), image_name);
-    int r = librbd::api::Group<>::image_add(group_ioctx, group_name,
-                                            image_ioctx, image_name);
-    tracepoint(librbd, group_image_add_exit, r);
-    return r;
-  }
-
-  int RBD::group_image_remove(IoCtx& group_ioctx, const char *group_name,
-                              IoCtx& image_ioctx, const char *image_name)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-    tracepoint(librbd, group_image_remove_enter, group_ioctx.get_pool_name().c_str(),
-              group_ioctx.get_id(), group_name, image_ioctx.get_pool_name().c_str(),
-              image_ioctx.get_id(), image_name);
-    int r = librbd::api::Group<>::image_remove(group_ioctx, group_name,
-                                               image_ioctx, image_name);
-    tracepoint(librbd, group_image_remove_exit, r);
-    return r;
-  }
-
-  int RBD::group_image_remove_by_id(IoCtx& group_ioctx, const char *group_name,
-                                    IoCtx& image_ioctx, const char *image_id)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-    tracepoint(librbd, group_image_remove_by_id_enter,
-               group_ioctx.get_pool_name().c_str(),
-               group_ioctx.get_id(), group_name,
-               image_ioctx.get_pool_name().c_str(),
-               image_ioctx.get_id(), image_id);
-    int r = librbd::api::Group<>::image_remove_by_id(group_ioctx, group_name,
-                                                     image_ioctx, image_id);
-    tracepoint(librbd, group_image_remove_by_id_exit, r);
-    return r;
-  }
-
-  int RBD::group_image_list(IoCtx& group_ioctx, const char *group_name,
-                            std::vector<group_image_status_t> *images)
-  {
-    TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-    tracepoint(librbd, group_image_list_enter, group_ioctx.get_pool_name().c_str(),
-              group_ioctx.get_id(), group_name);
-    int r = librbd::api::Group<>::image_list(group_ioctx, group_name, images);
-    tracepoint(librbd, group_image_list_exit, r);
-    return r;
-  }
-
-
   RBD::AioCompletion::AioCompletion(void *cb_arg, callback_t complete_cb)
   {
     pc = reinterpret_cast<void*>(librbd::io::AioCompletion::create(
@@ -966,15 +862,6 @@ namespace librbd {
     return r;
   }
 
-  int Image::get_group(group_spec_t *group_spec)
-  {
-    ImageCtx *ictx = (ImageCtx *)ctx;
-    tracepoint(librbd, image_get_group_enter, ictx->name.c_str());
-    int r = librbd::api::Group<>::image_get_group(ictx, group_spec);
-    tracepoint(librbd, image_get_group_exit, r);
-    return r;
-  }
-
   int Image::features(uint64_t *features)
   {
     ImageCtx *ictx = (ImageCtx *)ctx;
@@ -4116,214 +4003,3 @@ extern "C" void rbd_aio_release(rbd_completion_t c)
   comp->release();
 }
 
-extern "C" int rbd_group_create(rados_ioctx_t p, const char *name)
-{
-  librados::IoCtx io_ctx;
-  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
-  tracepoint(librbd, group_create_enter, io_ctx.get_pool_name().c_str(),
-             io_ctx.get_id(), name);
-  int r = librbd::api::Group<>::create(io_ctx, name);
-  tracepoint(librbd, group_create_exit, r);
-  return r;
-}
-
-extern "C" int rbd_group_remove(rados_ioctx_t p, const char *name)
-{
-  librados::IoCtx io_ctx;
-  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
-  tracepoint(librbd, group_remove_enter, io_ctx.get_pool_name().c_str(),
-             io_ctx.get_id(), name);
-  int r = librbd::api::Group<>::remove(io_ctx, name);
-  tracepoint(librbd, group_remove_exit, r);
-  return r;
-}
-
-extern "C" int rbd_group_list(rados_ioctx_t p, char *names, size_t *size)
-{
-  librados::IoCtx io_ctx;
-  librados::IoCtx::from_rados_ioctx_t(p, io_ctx);
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx));
-  tracepoint(librbd, group_list_enter, io_ctx.get_pool_name().c_str(),
-             io_ctx.get_id());
-
-  vector<string> cpp_names;
-  int r = librbd::list(io_ctx, cpp_names);
-  if (r < 0) {
-    tracepoint(librbd, group_list_exit, r);
-    return r;
-  }
-
-  size_t expected_size = 0;
-
-  for (size_t i = 0; i < cpp_names.size(); i++) {
-    expected_size += cpp_names[i].size() + 1;
-  }
-  if (*size < expected_size) {
-    *size = expected_size;
-    tracepoint(librbd, group_list_exit, -ERANGE);
-    return -ERANGE;
-  }
-
-  if (!names)
-    return -EINVAL;
-
-  names[expected_size] = '\0';
-  for (int i = 0; i < (int)cpp_names.size(); i++) {
-    const char* name = cpp_names[i].c_str();
-    tracepoint(librbd, group_list_entry, name);
-    strcpy(names, name);
-    names += strlen(names) + 1;
-  }
-  tracepoint(librbd, group_list_exit, (int)expected_size);
-  return (int)expected_size;
-}
-
-extern "C" int rbd_group_image_add(
-                                 rados_ioctx_t group_p, const char *group_name,
-                                 rados_ioctx_t image_p, const char *image_name)
-{
-  librados::IoCtx group_ioctx;
-  librados::IoCtx image_ioctx;
-
-  librados::IoCtx::from_rados_ioctx_t(group_p, group_ioctx);
-  librados::IoCtx::from_rados_ioctx_t(image_p, image_ioctx);
-
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-  tracepoint(librbd, group_image_add_enter, group_ioctx.get_pool_name().c_str(),
-            group_ioctx.get_id(), group_name, image_ioctx.get_pool_name().c_str(),
-            image_ioctx.get_id(), image_name);
-
-  int r = librbd::api::Group<>::image_add(group_ioctx, group_name, image_ioctx,
-                                          image_name);
-
-  tracepoint(librbd, group_image_add_exit, r);
-  return r;
-}
-
-extern "C" int rbd_group_image_remove(
-                                rados_ioctx_t group_p, const char *group_name,
-                                rados_ioctx_t image_p, const char *image_name)
-{
-  librados::IoCtx group_ioctx;
-  librados::IoCtx image_ioctx;
-
-  librados::IoCtx::from_rados_ioctx_t(group_p, group_ioctx);
-  librados::IoCtx::from_rados_ioctx_t(image_p, image_ioctx);
-
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-  tracepoint(librbd, group_image_remove_enter, group_ioctx.get_pool_name().c_str(),
-            group_ioctx.get_id(), group_name, image_ioctx.get_pool_name().c_str(),
-            image_ioctx.get_id(), image_name);
-
-  int r = librbd::api::Group<>::image_remove(group_ioctx, group_name,
-                                             image_ioctx, image_name);
-
-  tracepoint(librbd, group_image_remove_exit, r);
-  return r;
-}
-
-extern "C" int rbd_group_image_remove_by_id(rados_ioctx_t group_p,
-                                            const char *group_name,
-                                            rados_ioctx_t image_p,
-                                            const char *image_id)
-{
-  librados::IoCtx group_ioctx;
-  librados::IoCtx image_ioctx;
-
-  librados::IoCtx::from_rados_ioctx_t(group_p, group_ioctx);
-  librados::IoCtx::from_rados_ioctx_t(image_p, image_ioctx);
-
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-  tracepoint(librbd, group_image_remove_by_id_enter,
-             group_ioctx.get_pool_name().c_str(),
-             group_ioctx.get_id(), group_name,
-             image_ioctx.get_pool_name().c_str(),
-             image_ioctx.get_id(), image_id);
-
-  int r = librbd::api::Group<>::image_remove_by_id(group_ioctx, group_name,
-                                                   image_ioctx, image_id);
-
-  tracepoint(librbd, group_image_remove_by_id_exit, r);
-  return r;
-}
-
-extern "C" int rbd_group_image_list(rados_ioctx_t group_p,
-                                   const char *group_name,
-                                   rbd_group_image_status_t *images,
-                                   size_t *image_size)
-{
-  librados::IoCtx group_ioctx;
-  librados::IoCtx::from_rados_ioctx_t(group_p, group_ioctx);
-
-  TracepointProvider::initialize<tracepoint_traits>(get_cct(group_ioctx));
-  tracepoint(librbd, group_image_list_enter, group_ioctx.get_pool_name().c_str(),
-            group_ioctx.get_id(), group_name);
-
-  std::vector<librbd::group_image_status_t> cpp_images;
-  int r = librbd::api::Group<>::image_list(group_ioctx, group_name,
-                                           &cpp_images);
-
-  if (r == -ENOENT) {
-    tracepoint(librbd, group_image_list_exit, 0);
-    return 0;
-  }
-
-  if (r < 0) {
-    tracepoint(librbd, group_image_list_exit, r);
-    return r;
-  }
-
-  if (*image_size < cpp_images.size()) {
-    tracepoint(librbd, group_image_list_exit, -ERANGE);
-    return -ERANGE;
-  }
-
-  for (size_t i = 0; i < cpp_images.size(); ++i) {
-    group_image_status_cpp_to_c(cpp_images[i], &images[i]);
-  }
-
-  tracepoint(librbd, group_image_list_exit, r);
-  return r;
-}
-
-extern "C" int rbd_image_get_group(rados_ioctx_t image_p,
-                                  const char *image_name,
-                                  rbd_group_spec_t *c_group_spec)
-{
-  librados::IoCtx io_ctx;
-  librados::IoCtx::from_rados_ioctx_t(image_p, io_ctx);
-
-  librbd::ImageCtx *ictx = new librbd::ImageCtx(image_name, "", "", io_ctx, false);
-  int r = ictx->state->open(false);
-  if (r < 0) {
-    tracepoint(librbd, open_image_exit, r);
-    return r;
-  }
-
-  tracepoint(librbd, image_get_group_enter, ictx->name.c_str());
-  librbd::group_spec_t group_spec;
-  r = librbd::api::Group<>::image_get_group(ictx, &group_spec);
-  group_spec_cpp_to_c(group_spec, c_group_spec);
-  tracepoint(librbd, image_get_group_exit, r);
-  ictx->state->close();
-  return r;
-}
-
-extern "C" void rbd_group_spec_cleanup(rbd_group_spec_t *group_spec) {
-  free(group_spec->name);
-}
-
-extern "C" void rbd_group_image_status_cleanup(
-                                             rbd_group_image_status_t *image) {
-    free(image->spec.name);
-}
-
-extern "C" void rbd_group_image_status_list_cleanup(
-                                             rbd_group_image_status_t *images,
-                                             size_t len) {
-  for (size_t i = 0; i < len; ++i) {
-    rbd_group_image_status_cleanup(&images[i]);
-  }
-}
index da5313f0c827853e09fbc8690e76f432e78a8b04..149660b65b1ac9ed84a46007beeb3943975884de 100644 (file)
@@ -997,8 +997,8 @@ void CInode::_stored(int r, version_t v, Context *fin)
 {
   if (r < 0) {
     dout(1) << "store error " << r << " v " << v << " on " << *this << dendl;
-    mdcache->mds->clog->error() << "failed to store ino " << ino() << " object,"
-                               << " errno " << r;
+    mdcache->mds->clog->error() << "failed to store inode " << ino()
+                                << " object: " << cpp_strerror(r);
     mdcache->mds->handle_write_error(r);
     fin->complete(r);
     return;
@@ -1078,8 +1078,7 @@ void CInode::_fetched(bufferlist& bl, bufferlist& bl2, Context *fin)
   } else if (bl.length()) {
     p = bl.begin();
   } else {
-    derr << "No data while reading inode 0x" << std::hex << ino()
-      << std::dec << dendl;
+    derr << "No data while reading inode " << ino() << dendl;
     fin->complete(-ENOENT);
     return;
   }
@@ -1100,8 +1099,7 @@ void CInode::_fetched(bufferlist& bl, bufferlist& bl2, Context *fin)
       fin->complete(0);
     }
   } catch (buffer::error &err) {
-    derr << "Corrupt inode 0x" << std::hex << ino() << std::dec
-      << ": " << err << dendl;
+    derr << "Corrupt inode " << ino() << ": " << err << dendl;
     fin->complete(-EINVAL);
     return;
   }
@@ -1297,7 +1295,7 @@ void CInode::verify_diri_backtrace(bufferlist &bl, int err)
 
   if (err) {
     MDSRank *mds = mdcache->mds;
-    mds->clog->error() << "bad backtrace on dir ino " << ino();
+    mds->clog->error() << "bad backtrace on directory inode " << ino();
     assert(!"bad backtrace" == (g_conf->mds_verify_backtrace > 1));
 
     _mark_dirty_parent(mds->mdlog->get_current_segment(), false);
@@ -2124,10 +2122,12 @@ void CInode::finish_scatter_gather_update(int type)
        }
       }
 
-      if (pi->dirstat.nfiles < 0 ||
-         pi->dirstat.nsubdirs < 0) {
-       clog->error() << "bad/negative fragstat on " << ino()
-           << ", inode has " << pi->dirstat;
+      if (pi->dirstat.nfiles < 0 || pi->dirstat.nsubdirs < 0)
+      {
+        std::string path;
+        make_path_string(path);
+       clog->error() << "Inconsistent statistics detected: fragstat on inode "
+                      << ino() << " (" << path << "), inode has " << pi->dirstat;
        assert(!"bad/negative fragstat" == g_conf->mds_verify_scatter);
 
        if (pi->dirstat.nfiles < 0)
@@ -2216,8 +2216,9 @@ void CInode::finish_scatter_gather_update(int type)
          if (state_test(CInode::STATE_REPAIRSTATS)) {
            dout(20) << " rstat mismatch, fixing" << dendl;
          } else {
-           clog->error() << "unmatched rstat on " << ino() << ", inode has "
-                         << pi->rstat << ", dirfrags have " << rstat;
+           clog->error() << "inconsistent rstat on inode " << ino()
+                          << ", inode has " << pi->rstat
+                          << ", directory fragments have " << rstat;
            assert(!"unmatched rstat" == g_conf->mds_verify_scatter);
          }
          // trust the dirfrag for now
@@ -3923,8 +3924,8 @@ next:
       if (!results->backtrace.passed && in->scrub_infop->header->get_repair()) {
         std::string path;
         in->make_path_string(path);
-        in->mdcache->mds->clog->warn() << "bad backtrace on inode " << *in
-                           << ", rewriting it at " << path;
+        in->mdcache->mds->clog->warn() << "bad backtrace on inode " << in->ino()
+                                       << "(" << path << "), rewriting it";
         in->_mark_dirty_parent(in->mdcache->mds->mdlog->get_current_segment(),
                            false);
       }
@@ -3934,7 +3935,7 @@ next:
       {
         InoTable *inotable = mdcache->mds->inotable;
 
-        dout(10) << "scrub: inotable ino = 0x" << std::hex << inode.ino << dendl;
+        dout(10) << "scrub: inotable ino = " << inode.ino << dendl;
         dout(10) << "scrub: inotable free says "
           << inotable->is_marked_free(inode.ino) << dendl;
 
index 10abd5d2c0d8f46450d5ea9a052d07fc8bdac4f4..c605dfcd15716a1dc1585750f110ff58c60f51e6 100644 (file)
@@ -126,16 +126,10 @@ void FSMap::print_summary(Formatter *f, ostream *out) const
       f->dump_unsigned("max", fs->mds_map.max_mds);
     }
   } else {
-    if (filesystems.size() == 1) {
-      auto fs = filesystems.begin()->second;
-      *out << fs->mds_map.up.size() << "/" << fs->mds_map.in.size() << "/"
-           << fs->mds_map.max_mds << " up";
-    } else {
-      for (auto i : filesystems) {
-        auto fs = i.second;
-        *out << fs->mds_map.fs_name << "-" << fs->mds_map.up.size() << "/"
-             << fs->mds_map.in.size() << "/" << fs->mds_map.max_mds << " up ";
-      }
+    for (auto i : filesystems) {
+      auto fs = i.second;
+      *out << fs->mds_map.fs_name << "-" << fs->mds_map.up.size() << "/"
+          << fs->mds_map.in.size() << "/" << fs->mds_map.max_mds << " up ";
     }
   }
 
@@ -341,7 +335,32 @@ void FSMap::get_health_checks(health_check_map_t *checks) const
   for (const auto &i : filesystems) {
     const auto &fs = i.second;
     health_check_map_t fschecks;
+
     fs->mds_map.get_health_checks(&fschecks);
+
+    // Some of the failed ranks might be transient (i.e. there are standbys
+    // ready to replace them).  We will report only on "stuck" failed, i.e.
+    // ranks which are failed and have no standby replacement available.
+    std::set<mds_rank_t> stuck_failed;
+
+    for (const auto &rank : fs->mds_map.failed) {
+      const mds_gid_t replacement = find_replacement_for(
+          {fs->fscid, rank}, {}, g_conf->mon_force_standby_active);
+      if (replacement == MDS_GID_NONE) {
+        stuck_failed.insert(rank);
+      }
+    }
+
+    // FS_WITH_FAILED_MDS
+    if (!stuck_failed.empty()) {
+      health_check_t& fscheck = checks->get_or_add(
+        "FS_WITH_FAILED_MDS", HEALTH_WARN,
+        "%num% filesystem%plurals% %isorare% have a failed mds daemon");
+      ostringstream ss;
+      ss << "fs " << fs->mds_map.fs_name << " has " << stuck_failed.size()
+         << " failed mds" << (stuck_failed.size() > 1 ? "s" : "");
+      fscheck.detail.push_back(ss.str()); }
+
     checks->merge(fschecks);
     standby_count_wanted = std::max(
       standby_count_wanted,
@@ -351,8 +370,8 @@ void FSMap::get_health_checks(health_check_map_t *checks) const
   // MDS_INSUFFICIENT_STANDBY
   if (standby_count_wanted) {
     std::ostringstream oss, dss;
-    oss << "insufficient standby daemons available";
-    auto& d = checks->add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str());
+    oss << "insufficient standby MDS daemons available";
+    auto& d = checks->get_or_add("MDS_INSUFFICIENT_STANDBY", HEALTH_WARN, oss.str());
     dss << "have " << standby_daemons.size() << "; want " << standby_count_wanted
        << " more";
     d.detail.push_back(dss.str());
index 9fc11eef93b7c0e858727ae8db74c43fbb0ecf2c..e6600dbd4ea77e4cd6b10d2fb93842c24299f505 100644 (file)
@@ -8605,7 +8605,6 @@ void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err)
 {
   if (err < 0 && err != -EAGAIN) {
     info.checked.clear();
-    info.checked.insert(mds->get_nodeid());
     info.checking = MDS_RANK_NONE;
     info.check_peers = true;
     info.fetch_backtrace = true;
@@ -8617,7 +8616,13 @@ void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err)
       info.last_err = err;
   }
 
-  if (info.check_peers) {
+  if (info.check_peers || info.discover) {
+    if (info.discover) {
+      // got backtrace from peer, but failed to find inode. re-check peers
+      info.discover = false;
+      info.ancestors.clear();
+      info.checked.clear();
+    }
     info.check_peers = false;
     info.checking = MDS_RANK_NONE;
     do_open_ino_peer(ino, info);
@@ -8626,7 +8631,6 @@ void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err)
     info.fetch_backtrace = false;
     info.checking = mds->get_nodeid();
     info.checked.clear();
-    info.checked.insert(mds->get_nodeid());
     C_IO_MDC_OpenInoBacktraceFetched *fin =
       new C_IO_MDC_OpenInoBacktraceFetched(this, ino);
     fetch_backtrace(ino, info.pool, fin->bl,
@@ -8664,7 +8668,8 @@ void MDCache::do_open_ino_peer(inodeno_t ino, open_ino_info_t& info)
       }
   }
   if (peer < 0) {
-    if (all.size() > active.size() && all != info.checked) {
+    all.erase(mds->get_nodeid());
+    if (all != info.checked) {
       dout(10) << " waiting for more peers to be active" << dendl;
     } else {
       dout(10) << " all MDS peers have been checked " << dendl;
@@ -8813,7 +8818,6 @@ void MDCache::open_ino(inodeno_t ino, int64_t pool, MDSInternalContextBase* fin,
     info.waiters.push_back(fin);
   } else {
     open_ino_info_t& info = opening_inodes[ino];
-    info.checked.insert(mds->get_nodeid());
     info.want_replica = want_replica;
     info.want_xlocked = want_xlocked;
     info.tid = ++open_ino_last_tid;
@@ -8845,7 +8849,6 @@ void MDCache::find_ino_peers(inodeno_t ino, MDSInternalContextBase *c, mds_rank_
   fip.tid = tid;
   fip.fin = c;
   fip.hint = hint;
-  fip.checked.insert(mds->get_nodeid());
   _do_find_ino_peer(fip);
 }
 
@@ -8873,7 +8876,8 @@ void MDCache::_do_find_ino_peer(find_ino_peer_info_t& fip)
       }
   }
   if (m == MDS_RANK_NONE) {
-    if (all.size() > active.size()) {
+    all.erase(mds->get_nodeid());
+    if (all != fip.checked) {
       dout(10) << "_do_find_ino_peer waiting for more peers to be active" << dendl;
     } else {
       dout(10) << "_do_find_ino_peer failed on " << fip.ino << dendl;
@@ -9081,6 +9085,27 @@ void MDCache::request_finish(MDRequestRef& mdr)
     return; 
   }
 
+  switch(mdr->internal_op) {
+    case CEPH_MDS_OP_FRAGMENTDIR:
+      logger->inc(l_mdss_ireq_fragmentdir);
+      break;
+    case CEPH_MDS_OP_EXPORTDIR:
+      logger->inc(l_mdss_ireq_exportdir);
+      break;
+    case CEPH_MDS_OP_ENQUEUE_SCRUB:
+      logger->inc(l_mdss_ireq_enqueue_scrub);
+      break;
+    case CEPH_MDS_OP_FLUSH:
+      logger->inc(l_mdss_ireq_flush);
+      break;
+    case CEPH_MDS_OP_REPAIR_FRAGSTATS:
+      logger->inc(l_mdss_ireq_fragstats);
+      break;
+    case CEPH_MDS_OP_REPAIR_INODESTATS:
+      logger->inc(l_mdss_ireq_inodestats);
+      break;
+  }
+
   request_cleanup(mdr);
 }
 
@@ -9715,7 +9740,7 @@ void MDCache::handle_discover(MDiscover *dis)
 
   if (mds->get_state() <= MDSMap::STATE_REJOIN) {
     if (mds->get_state() < MDSMap::STATE_REJOIN &&
-       mds->get_want_state() != CEPH_MDS_STATE_REJOIN) {
+       mds->get_want_state() < CEPH_MDS_STATE_REJOIN) {
       dis->put();
       return;
     }
@@ -12362,6 +12387,19 @@ void MDCache::register_perfcounters()
     pcb.add_u64_counter(l_mdc_recovery_completed, "recovery_completed",
         "File recoveries completed", "recd", PerfCountersBuilder::PRIO_INTERESTING);
 
+    pcb.add_u64_counter(l_mdss_ireq_enqueue_scrub, "ireq_enqueue_scrub",
+        "Internal Request type enqueue scrub");
+    pcb.add_u64_counter(l_mdss_ireq_exportdir, "ireq_exportdir",
+        "Internal Request type export dir");
+    pcb.add_u64_counter(l_mdss_ireq_flush, "ireq_flush",
+        "Internal Request type flush");
+    pcb.add_u64_counter(l_mdss_ireq_fragmentdir, "ireq_fragmentdir",
+        "Internal Request type fragmentdir");
+    pcb.add_u64_counter(l_mdss_ireq_fragstats, "ireq_fragstats",
+        "Internal Request type frag stats");
+    pcb.add_u64_counter(l_mdss_ireq_inodestats, "ireq_inodestats",
+        "Internal Request type inode stats");
+
     logger.reset(pcb.create_perf_counters());
     g_ceph_context->get_perfcounters_collection()->add(logger.get());
     recovery_queue.set_logger(logger.get());
index 3b0801958913d60258ea8a0c57573206d1821986..8282ad7c44e778632f3cf5cbe9d8e574dbd3c616 100644 (file)
@@ -98,6 +98,13 @@ enum {
   // How many inodes ever completed size recovery
   l_mdc_recovery_completed,
 
+  l_mdss_ireq_enqueue_scrub,
+  l_mdss_ireq_exportdir,
+  l_mdss_ireq_flush,
+  l_mdss_ireq_fragmentdir,
+  l_mdss_ireq_fragstats,
+  l_mdss_ireq_inodestats,
+
   l_mdc_last,
 };
 
index b77c013b14a6ddedcdabc16b2e5988d8163b94c5..cb1f9b558b24c7ad98a70334f14ca4115d369029 100644 (file)
@@ -905,7 +905,7 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion)
   // If the pointer object is not present, then create it with
   // front = default ino and back = null
   JournalPointer jp(mds->get_nodeid(), mds->mdsmap->get_metadata_pool());
-  int const read_result = jp.load(mds->objecter);
+  const int read_result = jp.load(mds->objecter);
   if (read_result == -ENOENT) {
     inodeno_t const default_log_ino = MDS_INO_LOG_OFFSET + mds->get_nodeid();
     jp.front = default_log_ino;
index 2bbe759ed3e7db9e95b832f233407643f1b75513..883a4c3b9c68bda61d041dc1ed8c2273b2dbdedf 100644 (file)
@@ -781,6 +781,9 @@ int MDSDaemon::_handle_command(
     std::string val;
     cmd_getval(cct, cmdmap, "value", val);
     r = cct->_conf->set_val(key, val, true, &ss);
+    if (r == 0) {
+      cct->_conf->apply_changes(nullptr);
+    }
   } else if (prefix == "exit") {
     // We will send response before executing
     ss << "Exiting...";
index 1c6a7d791a90d77d1121566b678ea2b769114dc3..0c7a1a7378a3b89ffd86c943b57f3bad5132d335 100644 (file)
  * 
  */
 
-
-
 #ifndef CEPH_MDS_H
 #define CEPH_MDS_H
 
-#include "mdstypes.h"
-
-#include "msg/Dispatcher.h"
-#include "include/CompatSet.h"
-#include "include/types.h"
-#include "include/Context.h"
-#include "common/DecayCounter.h"
-#include "common/perf_counters.h"
+#include "common/LogClient.h"
 #include "common/Mutex.h"
-#include "common/Cond.h"
 #include "common/Timer.h"
-#include "common/LogClient.h"
-#include "common/TrackedOp.h"
-#include "common/Finisher.h"
-#include "common/cmdparse.h"
+#include "include/Context.h"
+#include "include/types.h"
 #include "mgr/MgrClient.h"
-
-#include "MDSRank.h"
-#include "MDSMap.h"
+#include "msg/Dispatcher.h"
 
 #include "Beacon.h"
-
+#include "MDSMap.h"
+#include "MDSRank.h"
 
 #define CEPH_MDS_PROTOCOL    30 /* cluster internal */
 
-class MonClient;
-
-class Server;
-class Locker;
-class MDCache;
-class MDBalancer;
-class MDSInternalContextBase;
-
-class Messenger;
-class Message;
-
-class SnapServer;
-class SnapClient;
-
-class MDSTableServer;
-class MDSTableClient;
-
 class AuthAuthorizeHandlerRegistry;
+class Message;
+class Messenger;
+class MonClient;
 
 class MDSDaemon : public Dispatcher, public md_config_obs_t {
  public:
index bd54469756f425cff695a4e8b34ece81fb774882..1d38f19f498e663b266cd500070bc69a46388a45 100644 (file)
@@ -408,29 +408,9 @@ void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
 
 void MDSMap::get_health_checks(health_check_map_t *checks) const
 {
-  // FS_WITH_FAILED_MDS
-  // MDS_FAILED
-  if (!failed.empty()) {
-    health_check_t& fscheck = checks->add(
-      "FS_WITH_FAILED_MDS", HEALTH_WARN,
-      "%num% filesystem%plurals% %isorare% have a failed mds daemon");
-    ostringstream ss;
-    ss << "fs " << fs_name << " has " << failed.size() << " failed mds"
-       << (failed.size() > 1 ? "s" : "");
-    fscheck.detail.push_back(ss.str());
-
-    health_check_t& check = checks->add("MDS_FAILED", HEALTH_ERR,
-                                        "%num% mds daemon%plurals% down");
-    for (auto p : failed) {
-      std::ostringstream oss;
-      oss << "fs " << fs_name << " mds." << p << " has failed";
-      check.detail.push_back(oss.str());
-    }
-  }
-
-  // MDS_DAMAGED
+  // MDS_DAMAGE
   if (!damaged.empty()) {
-    health_check_t& check = checks->add("MDS_DAMAGED", HEALTH_ERR,
+    health_check_t& check = checks->get_or_add("MDS_DAMAGE", HEALTH_ERR,
                                        "%num% mds daemon%plurals% damaged");
     for (auto p : damaged) {
       std::ostringstream oss;
@@ -440,9 +420,8 @@ void MDSMap::get_health_checks(health_check_map_t *checks) const
   }
 
   // FS_DEGRADED
-  // MDS_DEGRADED
   if (is_degraded()) {
-    health_check_t& fscheck = checks->add(
+    health_check_t& fscheck = checks->get_or_add(
       "FS_DEGRADED", HEALTH_WARN,
       "%num% filesystem%plurals% %isorare% degraded");
     ostringstream ss;
@@ -469,12 +448,6 @@ void MDSMap::get_health_checks(health_check_map_t *checks) const
       if (ss.str().length())
        detail.push_back(ss.str());
     }
-    if (!detail.empty()) {
-      health_check_t& check = checks->add(
-       "MDS_DEGRADED", HEALTH_WARN,
-       "%num% mds daemon%plurals% %isorare% degraded");
-      check.detail.insert(check.detail.end(), detail.begin(), detail.end());
-    }
   }
 }
 
@@ -541,7 +514,13 @@ void MDSMap::mds_info_t::decode(bufferlist::iterator& bl)
   DECODE_FINISH(bl);
 }
 
-
+std::string MDSMap::mds_info_t::human_name() const
+{
+  // Like "daemon mds.myhost restarted", "Activating daemon mds.myhost"
+  std::ostringstream out;
+  out << "daemon mds." << name;
+  return out.str();
+}
 
 void MDSMap::encode(bufferlist& bl, uint64_t features) const
 {
index 3c774c50c27754b7af47ef8aac35a3580001b69a..744e6423508f775242bde6d987b91060e57ccc64 100644 (file)
@@ -156,6 +156,10 @@ public:
     void decode(bufferlist::iterator& p);
     void dump(Formatter *f) const;
     void print_summary(ostream &out) const;
+
+    // The long form name for use in cluster log messages`
+    std::string human_name() const;
+
     static void generate_test_instances(list<mds_info_t*>& ls);
   private:
     void encode_versioned(bufferlist& bl, uint64_t features) const;
index d2501bbb56f44057fbd5fdbe74d2b200a0e5f928..52e357f0742e0ccaac5b9c1ccbf108685b7e29b0 100644 (file)
@@ -1144,8 +1144,14 @@ void MDSRank::starting_done()
 
   mdcache->open_root();
 
-  // start new segment
-  mdlog->start_new_segment();
+  if (mdcache->is_open()) {
+    mdlog->start_new_segment();
+  } else {
+    mdcache->wait_for_open(new MDSInternalContextWrapper(this,
+                          new FunctionContext([this] (int r) {
+                              mdlog->start_new_segment();
+                          })));
+  }
 }
 
 
@@ -1660,7 +1666,7 @@ void MDSRankDispatcher::handle_mds_map(
 
   // REJOIN
   // is everybody finally rejoining?
-  if (is_rejoin() || is_clientreplay() || is_active() || is_stopping()) {
+  if (is_starting() || is_rejoin() || is_clientreplay() || is_active() || is_stopping()) {
     // did we start?
     if (!oldmap->is_rejoining() && mdsmap->is_rejoining())
       rejoin_joint_start();
@@ -1670,7 +1676,8 @@ void MDSRankDispatcher::handle_mds_map(
        oldmap->is_rejoining() && !mdsmap->is_rejoining())
       mdcache->dump_cache();      // for DEBUG only
 
-    if (oldstate >= MDSMap::STATE_REJOIN) {
+    if (oldstate >= MDSMap::STATE_REJOIN ||
+       oldstate == MDSMap::STATE_STARTING) {
       // ACTIVE|CLIENTREPLAY|REJOIN => we can discover from them.
       set<mds_rank_t> olddis, dis;
       oldmap->get_mds_set(olddis, MDSMap::STATE_ACTIVE);
@@ -2262,6 +2269,7 @@ void MDSRank::command_get_subtrees(Formatter *f)
       f->dump_bool("is_auth", dir->is_auth());
       f->dump_int("auth_first", dir->get_dir_auth().first);
       f->dump_int("auth_second", dir->get_dir_auth().second);
+      f->dump_int("export_pin", dir->inode->get_export_pin());
       f->open_object_section("dir");
       dir->dump(f);
       f->close_section();
index 66a145c3c2293a8c0bf141cc72a57c4a34092763..2a595acdd7a47ea9e0cd1bbaf3e821171779f7c9 100644 (file)
@@ -393,9 +393,9 @@ void ScrubStack::_validate_inode_done(CInode *in, int r,
 
   // Inform the cluster log if we found an error
   if (!result.passed_validation) {
-    clog->warn() << "Scrub error on inode " << *in
+    clog->warn() << "Scrub error on inode " << in->ino()
                  << " (" << path << ") see " << g_conf->name
-                 << " log for details";
+                 << " log and `damage ls` output for details";
 
     // Put the verbose JSON output into the MDS log for later inspection
     JSONFormatter f;
index cf32c95f6864f4a9e3b59fba72a5a0a905043c49..952df343995eac50b17a691e53da796abedf97e2 100644 (file)
@@ -70,7 +70,6 @@ using namespace std;
 #undef dout_prefix
 #define dout_prefix *_dout << "mds." << mds->get_nodeid() << ".server "
 
-
 class ServerContext : public MDSInternalContextBase {
   protected:
   Server *server;
@@ -3095,9 +3094,11 @@ void Server::handle_client_lookup_ino(MDRequestRef& mdr,
     if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
       return;
 
-    // need read access to directory inode
-    if (!check_access(mdr, diri, MAY_READ))
-      return;
+    if (diri != NULL) {
+      // need read access to directory inode
+      if (!check_access(mdr, diri, MAY_READ))
+        return;
+    }
   }
 
   if (want_parent) {
index 2f554f26aa25b8dde3d5e554fa63f09a075b9b79..752ba10b326ee01c8be60f6f6cc15c5342f17a97 100644 (file)
@@ -31,39 +31,39 @@ class MDLog;
 
 enum {
   l_mdss_first = 1000,
+  l_mdss_dispatch_client_request,
+  l_mdss_dispatch_slave_request,
   l_mdss_handle_client_request,
-  l_mdss_handle_slave_request,
   l_mdss_handle_client_session,
-  l_mdss_dispatch_client_request,
+  l_mdss_handle_slave_request,
+  l_mdss_req_create,
+  l_mdss_req_getattr,
+  l_mdss_req_getfilelock,
+  l_mdss_req_link,
+  l_mdss_req_lookup,
   l_mdss_req_lookuphash,
   l_mdss_req_lookupino,
-  l_mdss_req_lookupparent,
   l_mdss_req_lookupname,
-  l_mdss_req_lookup,
+  l_mdss_req_lookupparent,
   l_mdss_req_lookupsnap,
-  l_mdss_req_getattr,
+  l_mdss_req_lssnap,
+  l_mdss_req_mkdir,
+  l_mdss_req_mknod,
+  l_mdss_req_mksnap,
+  l_mdss_req_open,
+  l_mdss_req_readdir,
+  l_mdss_req_rename,
+  l_mdss_req_renamesnap,
+  l_mdss_req_rmdir,
+  l_mdss_req_rmsnap,
+  l_mdss_req_rmxattr,
   l_mdss_req_setattr,
-  l_mdss_req_setlayout,
   l_mdss_req_setdirlayout,
-  l_mdss_req_setxattr,
-  l_mdss_req_rmxattr,
-  l_mdss_req_readdir,
   l_mdss_req_setfilelock,
-  l_mdss_req_getfilelock,
-  l_mdss_req_create,
-  l_mdss_req_open,
-  l_mdss_req_mknod,
-  l_mdss_req_link,
-  l_mdss_req_unlink,
-  l_mdss_req_rmdir,
-  l_mdss_req_rename,
-  l_mdss_req_mkdir,
+  l_mdss_req_setlayout,
+  l_mdss_req_setxattr,
   l_mdss_req_symlink,
-  l_mdss_req_lssnap,
-  l_mdss_req_mksnap,
-  l_mdss_req_rmsnap,
-  l_mdss_req_renamesnap,
-  l_mdss_dispatch_slave_request,
+  l_mdss_req_unlink,
   l_mdss_last,
 };
 
index 561f2db9e8dc5fe8e0eaf321f7c8a6c493d2f2af..3057ba577b13e497dcabda0959ed2978e85299ec 100644 (file)
@@ -826,7 +826,7 @@ void Session::notify_cap_release(size_t n_caps)
  * in order to generate health metrics if the session doesn't see
  * a commensurate number of calls to ::notify_cap_release
  */
-void Session::notify_recall_sent(int const new_limit)
+void Session::notify_recall_sent(const int new_limit)
 {
   if (recalled_at.is_zero()) {
     // Entering recall phase, set up counters so we can later
index f96d00ab275077c6a07115d01493b51265394356..ebd4921cafb41b42702297b570acca835994b6bd 100644 (file)
@@ -148,7 +148,7 @@ public:
   interval_set<inodeno_t> pending_prealloc_inos; // journaling prealloc, will be added to prealloc_inos
 
   void notify_cap_release(size_t n_caps);
-  void notify_recall_sent(int const new_limit);
+  void notify_recall_sent(const int new_limit);
   void clear_recalled_at();
 
   inodeno_t next_ino() const {
index caa717024d165318e92268009040fec537804498..ba59221d9aa05a7db58a627c6000fff7cf8d46d6 100644 (file)
@@ -73,14 +73,12 @@ public:
 class C_IO_PurgeStrayPurged : public StrayManagerIOContext {
   CDentry *dn;
   bool only_head;
-  // How many ops_in_flight were allocated to this purge?
-  uint32_t ops_allowance;
 public:
   C_IO_PurgeStrayPurged(StrayManager *sm_, CDentry *d, bool oh) : 
     StrayManagerIOContext(sm_), dn(d), only_head(oh) { }
   void finish(int r) override {
     assert(r == 0 || r == -ENOENT);
-    sm->_purge_stray_purged(dn, ops_allowance, only_head);
+    sm->_purge_stray_purged(dn, only_head);
   }
 };
 
@@ -163,7 +161,7 @@ public:
 };
 
 void StrayManager::_purge_stray_purged(
-    CDentry *dn, uint32_t ops_allowance, bool only_head)
+    CDentry *dn, bool only_head)
 {
   CInode *in = dn->get_projected_linkage()->get_inode();
   dout(10) << "_purge_stray_purged " << *dn << " " << *in << dendl;
index 54629ee046bc127c97c89ce8455bb1f618672966..b5b7d73a17e3eeb80d0f50224c68224fba309968 100644 (file)
@@ -63,7 +63,7 @@ class StrayManager
   /**
    * Completion handler for a Filer::purge on a stray inode.
    */
-  void _purge_stray_purged(CDentry *dn, uint32_t ops, bool only_head);
+  void _purge_stray_purged(CDentry *dn, bool only_head);
 
   void _purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *ls);
 
index 4b6f44cd81d3b672705bc809e3febe23312d001b..bc4d48d24f901f47a33851d161c03c7586a4ceef 100644 (file)
@@ -197,8 +197,8 @@ class MMDSCacheRejoin : public Message {
   map<dirfrag_t, map<string_snap_t, slave_reqid> > xlocked_dentries;
   
   MMDSCacheRejoin() :
-    Message(MSG_MDS_CACHEREJOIN, HEAD_VERSION, COMPAT_VERSION)
-  {}
+    Message(MSG_MDS_CACHEREJOIN, HEAD_VERSION, COMPAT_VERSION),
+    op(0) {}
   MMDSCacheRejoin(int o) : 
     Message(MSG_MDS_CACHEREJOIN, HEAD_VERSION, COMPAT_VERSION),
     op(o) {}
index 65489cd7dc92d807353fa049fba98d9f323b5fa4..b42a4ad649202763797ac6caf4ecce256bc33f25 100644 (file)
@@ -21,9 +21,9 @@
 
 class MMDSTableRequest : public Message {
  public:
-  __u16 table;
-  __s16 op;
-  uint64_t reqid;
+  __u16 table = 0;
+  __s16 op = 0;
+  uint64_t reqid = 0;
   bufferlist bl;
 
   MMDSTableRequest() : Message(MSG_MDS_TABLE_REQUEST) {}
index 6c5367e13ee42143d27d62b04d24157cc235fab4..ddf58dc6d44e0ba3f609d64e276d5038e76db042 100644 (file)
@@ -48,7 +48,7 @@ public:
     ::decode(what, p);
   }
 
-  ceph_tid_t handle;
+  ceph_tid_t handle = 0;
   string what;
 
 private:
index 55dcd2049155495adae617088ef48116e48b779d..4df114d3c0edd12b204665df6ef25ff9e3f6c047 100644 (file)
@@ -53,9 +53,9 @@ public:
       ::decode(oldest_version, p);
   }
 
-  ceph_tid_t handle;
-  version_t version;
-  version_t oldest_version;
+  ceph_tid_t handle = 0;
+  version_t version = 0;
+  version_t oldest_version = 0;
 
 private:
   ~MMonGetVersionReply() override {}
index aecdf00800d015b38e2a985946a37f2e14ee18cc..d78e63c6b892c8f1f705f2f08666d992a489e1fd 100644 (file)
@@ -26,8 +26,8 @@ struct MMonHealth : public MMonQuorumService
     OP_TELL = 1,
   };
 
-  int service_type;
-  int service_op;
+  int service_type = 0;
+  int service_op = 0;
 
   // service specific data
   DataStats data_stats;
index 4b21ee38a0ba910ff65183956372ad5e1eb47454..8a5e4f9341613fcd69dc3a47ed6d2ea0cd5aa069 100644 (file)
@@ -47,18 +47,18 @@ class MMonPaxos : public Message {
     }
   }
 
-  epoch_t epoch;   // monitor epoch
-  __s32 op;          // paxos op
-
-  version_t first_committed;  // i've committed to
-  version_t last_committed;  // i've committed to
-  version_t pn_from;         // i promise to accept after
-  version_t pn;              // with with proposal
-  version_t uncommitted_pn;     // previous pn, if we are a LAST with an uncommitted value
+  epoch_t epoch = 0;   // monitor epoch
+  __s32 op = 0;          // paxos op
+
+  version_t first_committed = 0;  // i've committed to
+  version_t last_committed = 0;  // i've committed to
+  version_t pn_from = 0;         // i promise to accept after
+  version_t pn = 0;              // with with proposal
+  version_t uncommitted_pn = 0;     // previous pn, if we are a LAST with an uncommitted value
   utime_t lease_timestamp;
   utime_t sent_timestamp;
 
-  version_t latest_version;
+  version_t latest_version = 0;
   bufferlist latest_value;
 
   map<version_t,bufferlist> values;
index 91bf116b9dd5d0fff24b6fd32d9e93d76ee67615..c98fc3a759493528c9d489fb65958157deefb819 100644 (file)
@@ -47,14 +47,14 @@ public:
   }
   
   uuid_d fsid;
-  int32_t op;
+  int32_t op = 0;
   string name;
   set<int32_t> quorum;
   bufferlist monmap_bl;
-  version_t paxos_first_version;
-  version_t paxos_last_version;
-  bool has_ever_joined;
-  uint64_t required_features;
+  version_t paxos_first_version = 0;
+  version_t paxos_last_version = 0;
+  bool has_ever_joined = 0;
+  uint64_t required_features = 0;
 
   MMonProbe()
     : Message(MSG_MON_PROBE, HEAD_VERSION, COMPAT_VERSION) {}
index d816cd15c780a44773d7df81af63ef2daf4de8f4..2b383820b4713d4b1dae3300d74c133a6143bfa9 100644 (file)
@@ -53,9 +53,9 @@ public:
     }
   }
 
-  uint32_t op;
-  uint64_t cookie;
-  version_t last_committed;
+  uint32_t op = 0;
+  uint64_t cookie = 0;
+  version_t last_committed = 0;
   pair<string,string> last_key;
   bufferlist chunk_bl;
   entity_inst_t reply_to;
index aacffa932c12ed3d31aa58157e2fcef1525c8af0..82b52e9cc1456887358c530c499b6f0919a42253 100644 (file)
@@ -21,7 +21,7 @@
 
 class MOSDAlive : public PaxosServiceMessage {
  public:
-  epoch_t want;
+  epoch_t want = 0;
 
   MOSDAlive(epoch_t h, epoch_t w) : PaxosServiceMessage(MSG_OSD_ALIVE, h), want(w) { }
   MOSDAlive() : PaxosServiceMessage(MSG_OSD_ALIVE, 0) {}
index 81a9230c53078b76234cb2077f3a7805b3e5a9ac..793b4eeb5c6c5f40f7aa5247b69dc0e78026667e 100644 (file)
@@ -33,9 +33,9 @@ class MOSDFailure : public PaxosServiceMessage {
   
   uuid_d fsid;
   entity_inst_t target_osd;
-  __u8 flags;
-  epoch_t       epoch;
-  int32_t failed_for;  // known to be failed since at least this long
+  __u8 flags = 0;
+  epoch_t       epoch = 0;
+  int32_t failed_for = 0;  // known to be failed since at least this long
 
   MOSDFailure() : PaxosServiceMessage(MSG_OSD_FAILURE, 0, HEAD_VERSION) { }
   MOSDFailure(const uuid_d &fs, const entity_inst_t& f, int duration, epoch_t e)
index 0ae30f3d0fd5f0203ff4642b7c6025dd8c78fe76..72246288a51493ca59db1636abd13f9fad3316aa 100644 (file)
@@ -28,7 +28,7 @@ class MOSDMap : public Message {
   uuid_d fsid;
   map<epoch_t, bufferlist> maps;
   map<epoch_t, bufferlist> incremental_maps;
-  epoch_t oldest_map, newest_map;
+  epoch_t oldest_map =0, newest_map = 0;
 
   epoch_t get_first() const {
     epoch_t e = 0;
index e2c948c3ed84a2072dc0c3784c8b8b4af5a234ba..0b6cb1af85372618c7b98d168ab03fe506cf57c6 100644 (file)
@@ -25,8 +25,8 @@ class MOSDMarkMeDown : public PaxosServiceMessage {
  public:
   uuid_d fsid;
   entity_inst_t target_osd;
-  epoch_t epoch;
-  bool request_ack;          // ack requested
+  epoch_t epoch = 0;
+  bool request_ack = false;          // ack requested
 
   MOSDMarkMeDown()
     : PaxosServiceMessage(MSG_OSD_MARK_ME_DOWN, 0,
index 69d12a7a9df4cf4845c54270adb588157e820acf..80c6ecf77687411867dc6d71d32e00ae3fcaa691 100755 (executable)
@@ -37,11 +37,11 @@ class MOSDOp : public MOSDFastDispatchOp {
   static const int COMPAT_VERSION = 3;
 
 private:
-  uint32_t client_inc;
-  __u32 osdmap_epoch;
-  __u32 flags;
+  uint32_t client_inc = 0;
+  __u32 osdmap_epoch = 0;
+  __u32 flags = 0;
   utime_t mtime;
-  int32_t retry_attempt;   // 0 is first attempt.  -1 if we don't know.
+  int32_t retry_attempt = -1;   // 0 is first attempt.  -1 if we don't know.
 
   hobject_t hobj;
   spg_t pgid;
index 30358fdd630f40dcad284a07591e1c9c71f1e20e..19502a5602b25f6a7d5345fb01588138181b679f 100644 (file)
@@ -38,13 +38,13 @@ class MOSDOpReply : public Message {
   object_t oid;
   pg_t pgid;
   vector<OSDOp> ops;
-  int64_t flags;
+  int64_t flags = 0;
   errorcode32_t result;
   eversion_t bad_replay_version;
   eversion_t replay_version;
-  version_t user_version;
-  epoch_t osdmap_epoch;
-  int32_t retry_attempt;
+  version_t user_version = 0;
+  epoch_t osdmap_epoch = 0;
+  int32_t retry_attempt = -1;
   bool do_redirect;
   request_redirect_t redirect;
 
index 7f637b9ef83d241e238045785bbcd345d3429419..9146dd4040b3421025973eed8470106ceca521cd 100644 (file)
@@ -35,8 +35,8 @@ public:
     }
   }
 
-  __u32 op;
-  epoch_t map_epoch, query_epoch;
+  __u32 op = 0;
+  epoch_t map_epoch = 0, query_epoch = 0;
   spg_t pgid;
   hobject_t last_backfill;
   pg_stat_t stats;
index 01072c267fd8ed2a625351e6e0db1366933d870b..97e5e3652344d21c2914542b297f3d5347b457e6 100644 (file)
@@ -28,7 +28,7 @@ struct MOSDPGCreate : public Message {
   const static int HEAD_VERSION = 3;
   const static int COMPAT_VERSION = 3;
 
-  version_t          epoch;
+  version_t          epoch = 0;
   map<pg_t,pg_create_t> mkpg;
   map<pg_t,utime_t> ctimes;
 
index 106c499cdde66447a2bc480fadd2a32fac7a8585..e17dcf7cc8d004a1204dce3b5d20f356a4e14468 100644 (file)
@@ -23,7 +23,7 @@ class MOSDPGInfo : public Message {
   static const int HEAD_VERSION = 5;
   static const int COMPAT_VERSION = 1;
 
-  epoch_t epoch;
+  epoch_t epoch = 0;
 
 public:
   vector<pair<pg_notify_t,PastIntervals> > pg_list;
index 57c8a0efe7e7e666ead11c1d1d6c64791bc3b203..ac6a1f72bd53c502eff945d21a34448fbc9fea84 100644 (file)
@@ -23,12 +23,12 @@ class MOSDPGLog : public Message {
   static const int HEAD_VERSION = 5;
   static const int COMPAT_VERSION = 2;
 
-  epoch_t epoch;
+  epoch_t epoch = 0;
   /// query_epoch is the epoch of the query being responded to, or
   /// the current epoch if this is not being sent in response to a
   /// query. This allows the recipient to disregard responses to old
   /// queries.
-  epoch_t query_epoch;
+  epoch_t query_epoch = 0;
 
 public:
   shard_id_t to;
index a93dfce187403bde568d2ffcc8ad1dca5f986208..a73bf05692a4dad1e5aa11e0fa02b4746ddd979b 100644 (file)
@@ -28,7 +28,7 @@ class MOSDPGNotify : public Message {
   static const int HEAD_VERSION = 6;
   static const int COMPAT_VERSION = 2;
 
-  epoch_t epoch;
+  epoch_t epoch = 0;
   /// query_epoch is the epoch of the query being responded to, or
   /// the current epoch if this is not being sent in response to a
   /// query. This allows the recipient to disregard responses to old
index bd6bf44150be2c2aaeb5fc4ebc114d3d0ae5a14c..9c4595687b4bb6fb0b5d50a170081db12323754f 100644 (file)
@@ -27,7 +27,7 @@ class MOSDPGQuery : public Message {
   static const int HEAD_VERSION = 4;
   static const int COMPAT_VERSION = 3;
 
-  version_t epoch;
+  version_t epoch = 0;
 
  public:
   version_t get_epoch() const { return epoch; }
index 0a6afa507218d8bf21e3b426335e8d460aacbeec..f692ad428c361edb0d01456a38049855eacdbc0c 100644 (file)
@@ -25,7 +25,7 @@ class MOSDPGRemove : public Message {
   static const int HEAD_VERSION = 3;
   static const int COMPAT_VERSION = 2;
 
-  epoch_t epoch;
+  epoch_t epoch = 0;
 
  public:
   vector<spg_t> pg_list;
index 3c01b406fd850f6175b13f93b6d432529095b924..16fa3c52294f0e31113898cdd3b01d86db17d9b9 100644 (file)
@@ -35,8 +35,8 @@ public:
     }
   }
 
-  __u32 op;
-  epoch_t map_epoch, query_epoch;
+  __u32 op = 0;
+  epoch_t map_epoch = 0, query_epoch = 0;
   pg_shard_t from;
   spg_t pgid;
   hobject_t begin, end;
index 5366fed183502c625028a1c81cd1d36261eadfd7..3ea7a211c7f4b971d00e5e246a779b94e17778e1 100644 (file)
@@ -21,7 +21,7 @@
 
 class MOSDPGTemp : public PaxosServiceMessage {
  public:
-  epoch_t map_epoch;
+  epoch_t map_epoch = 0;
   map<pg_t, vector<int32_t> > pg_temp;
 
   MOSDPGTemp(epoch_t e) : PaxosServiceMessage(MSG_OSD_PGTEMP, e), map_epoch(e) { }
index 030da7c2c8dc702c4234d1f2000a2fdab22bdd35..35168e0f484667c98f3fe6ad8b71c9bd0f7dc093 100644 (file)
@@ -23,7 +23,7 @@ class MOSDPGTrim : public Message {
   static const int COMPAT_VERSION = 2;
 
 public:
-  epoch_t epoch;
+  epoch_t epoch = 0;
   spg_t pgid;
   eversion_t trim_to;
 
index c286319ebf81ecad8f803453f48b7ff1619c7127..90ea498a7bc331834a9b0bca4b22d6488b24be0a 100644 (file)
@@ -59,8 +59,8 @@ class MOSDPing : public Message {
   }
 
   uuid_d fsid;
-  epoch_t map_epoch;
-  __u8 op;
+  epoch_t map_epoch = 0;
+  __u8 op = 0;
   utime_t stamp;
   uint32_t min_message_size;
 
index 8b2304278546543778b6510e3bc1cdcd4760a33c..ce800aac068b3543fb981a595d0b27578912affd 100644 (file)
@@ -30,7 +30,7 @@ struct MOSDRepScrub : public MOSDFastDispatchOp {
   spg_t pgid;             // PG to scrub
   eversion_t scrub_from; // only scrub log entries after scrub_from
   eversion_t scrub_to;   // last_update_applied when message sent
-  epoch_t map_epoch, min_epoch;
+  epoch_t map_epoch = 0, min_epoch = 0;
   bool chunky;           // true for chunky scrubs
   hobject_t start;       // lower bound of scrub, inclusive
   hobject_t end;         // upper bound of scrub, exclusive
index af8281e7c46fa6b05cbf8b7823fcdaff6e2c4ed9..d9659fa7fb9b0ff19b76bd682013df7a026d4fd1 100644 (file)
@@ -29,8 +29,8 @@ struct MOSDScrub : public Message {
 
   uuid_d fsid;
   vector<pg_t> scrub_pgs;
-  bool repair;
-  bool deep;
+  bool repair = false;
+  bool deep = false;
 
   MOSDScrub() : Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION) {}
   MOSDScrub(const uuid_d& f, bool r, bool d) :
index 3bab8f57770a7ad4fcae204eea71960675b6f1b9..4fd3a522fd32132f6ccffcf76861d3cfddc4acaf 100644 (file)
@@ -30,7 +30,7 @@ class MOSDSubOp : public MOSDFastDispatchOp {
   static const int COMPAT_VERSION = 7;
 
 public:
-  epoch_t map_epoch;
+  epoch_t map_epoch = 0;
   
   // metadata from original request
   osd_reqid_t reqid;
@@ -41,14 +41,14 @@ public:
   hobject_t poid;
   object_locator_t oloc;
   
-  __u8 acks_wanted;
+  __u8 acks_wanted = 0;
 
   // op to exec
   vector<OSDOp> ops;
   utime_t mtime;
 
-  bool old_exists;
-  uint64_t old_size;
+  bool old_exists = false;
+  uint64_t old_size = 0;
   eversion_t old_version;
 
   SnapSet snapset;
@@ -71,7 +71,7 @@ public:
   interval_set<uint64_t> data_subset;
   map<hobject_t, interval_set<uint64_t>> clone_subsets;
 
-  bool first, complete;
+  bool first = false, complete = false;
 
   interval_set<uint64_t> data_included;
   ObjectRecoveryInfo recovery_info;
index 691d4e2c1102b5ce28e68e0e31bc9af65a2694ff..332d38aa18d8135aef90d3034ecbdea1d46d7482 100644 (file)
@@ -33,7 +33,7 @@ class MOSDSubOpReply : public MOSDFastDispatchOp {
   static const int HEAD_VERSION = 2;
   static const int COMPAT_VERSION = 1;
 public:
-  epoch_t map_epoch;
+  epoch_t map_epoch = 0;
   
   // subop metadata
   osd_reqid_t reqid;
@@ -44,8 +44,8 @@ public:
   vector<OSDOp> ops;
 
   // result
-  __u8 ack_type;
-  int32_t result;
+  __u8 ack_type = 0;
+  int32_t result = 0;
   
   // piggybacked osd state
   eversion_t last_complete_ondisk;
index dcceff2b3c2083f9b67f0bcac1886899d28d621b..c368675a89d49c9c7f39882ccb4c19231a92196d 100644 (file)
@@ -23,7 +23,7 @@ public:
   uuid_d fsid;
   map<pg_t,pg_stat_t> pg_stat;
   osd_stat_t osd_stat;
-  epoch_t epoch;
+  epoch_t epoch = 0;
   utime_t had_map_for;
   
   MPGStats() : PaxosServiceMessage(MSG_PGSTATS, 0) {}
index e694bbe7c90ec2facf82c0f7e4df85ad777c9cb8..15d97faa2c4bfdd53f0a7724941a94a7b80b3126 100644 (file)
@@ -25,12 +25,12 @@ class MPoolOp : public PaxosServiceMessage {
 
 public:
   uuid_d fsid;
-  __u32 pool;
+  __u32 pool = 0;
   string name;
-  __u32 op;
-  uint64_t auid;
+  __u32 op = 0;
+  uint64_t auid = 0;
   snapid_t snapid;
-  __s16 crush_rule;
+  __s16 crush_rule = 0;
 
   MPoolOp()
     : PaxosServiceMessage(CEPH_MSG_POOLOP, 0, HEAD_VERSION, COMPAT_VERSION) { }
index abc489b5e111fbbedc688c1f3f11eee56b659339..0374c1d904a1c21e27d565470e343d8f91eb5cdd 100644 (file)
@@ -20,8 +20,8 @@
 class MPoolOpReply : public PaxosServiceMessage {
 public:
   uuid_d fsid;
-  __u32 replyCode;
-  epoch_t epoch;
+  __u32 replyCode = 0;
+  epoch_t epoch = 0;
   bufferlist response_data;
 
   MPoolOpReply() : PaxosServiceMessage(CEPH_MSG_POOLOP_REPLY, 0)
index bf7560d0734e3cf62341f4e27e59689c86f6c350..a27ef9b54276ea4d1df48a54434986e30be07f3a 100644 (file)
 #include "messages/PaxosServiceMessage.h"
 
 class MStatfs : public PaxosServiceMessage {
+
+  static const int HEAD_VERSION = 2;
+  static const int COMPAT_VERSION = 0;
+
 public:
   uuid_d fsid;
+  boost::optional<int64_t> data_pool;
 
-  MStatfs() : PaxosServiceMessage(CEPH_MSG_STATFS, 0) {}
-  MStatfs(const uuid_d& f, ceph_tid_t t, version_t v) :
-    PaxosServiceMessage(CEPH_MSG_STATFS, v), fsid(f) {
+  MStatfs() : PaxosServiceMessage(CEPH_MSG_STATFS, 0, HEAD_VERSION) {}
+  MStatfs(const uuid_d& f, ceph_tid_t t, boost::optional<int64_t> _data_pool,
+             version_t v) : PaxosServiceMessage(CEPH_MSG_STATFS, v,
+                                            HEAD_VERSION, COMPAT_VERSION),
+                                                fsid(f), data_pool(_data_pool) {
     set_tid(t);
   }
 
@@ -35,17 +42,24 @@ private:
 public:
   const char *get_type_name() const override { return "statfs"; }
   void print(ostream& out) const override {
-    out << "statfs(" << get_tid() << " v" << version << ")";
+    out << "statfs(" << get_tid() << " pool "
+        << (data_pool ? *data_pool : -1) << " v" << version << ")";
   }
 
   void encode_payload(uint64_t features) override {
     paxos_encode();
     ::encode(fsid, payload);
+    ::encode(data_pool, payload);
   }
   void decode_payload() override {
     bufferlist::iterator p = payload.begin();
     paxos_decode(p);
     ::decode(fsid, p);
+    if (header.version >= 2) {
+      ::decode(data_pool, p);
+    } else {
+      data_pool = boost::optional<int64_t> ();
+    }
   }
 };
 
index 83ba51fc89ef3923dec69755a5fd4e559681fc54..8994e9646646099b80b1c14131c1edc9f524878c 100644 (file)
@@ -25,9 +25,9 @@ struct MTimeCheck : public Message
     OP_REPORT = 3,
   };
 
-  int op;
-  version_t epoch;
-  version_t round;
+  int op = 0;
+  version_t epoch = 0;
+  version_t round = 0;
 
   utime_t timestamp;
   map<entity_inst_t, double> skews;
index 2961394bcb66158a2a89a747a54117c69b34e17c..d8b7b01cc6278dbdcea73f760931d157bf12a6d2 100644 (file)
@@ -577,7 +577,7 @@ PyObject *PyModules::get_config_prefix(const std::string &handle,
 }
 
 void PyModules::set_config(const std::string &handle,
-    const std::string &key, const std::string &val)
+    const std::string &key, const boost::optional<std::string>& val)
 {
   const std::string global_key = config_prefix + handle + "/" + key;
 
@@ -586,18 +586,25 @@ void PyModules::set_config(const std::string &handle,
     PyThreadState *tstate = PyEval_SaveThread();
     Mutex::Locker l(lock);
     PyEval_RestoreThread(tstate);
-    config_cache[global_key] = val;
+    if (val) {
+      config_cache[global_key] = *val;
+    } else {
+      config_cache.erase(global_key);
+    }
 
     std::ostringstream cmd_json;
-
     JSONFormatter jf;
     jf.open_object_section("cmd");
-    jf.dump_string("prefix", "config-key set");
-    jf.dump_string("key", global_key);
-    jf.dump_string("val", val);
+    if (val) {
+      jf.dump_string("prefix", "config-key set");
+      jf.dump_string("key", global_key);
+      jf.dump_string("val", *val);
+    } else {
+      jf.dump_string("prefix", "config-key del");
+      jf.dump_string("key", global_key);
+    }
     jf.close_section();
     jf.flush(cmd_json);
-
     set_cmd.run(&monc, cmd_json.str());
   }
   set_cmd.wait();
index 431abec76f0e3cf4b814ed51239dc5d7feb99a46..c7aad4e5df8859587ac1f7a353e3a930d6d44e16 100644 (file)
@@ -114,7 +114,7 @@ public:
   PyObject *get_config_prefix(const std::string &handle,
                              const std::string &prefix) const;
   void set_config(const std::string &handle,
-      const std::string &key, const std::string &val);
+      const std::string &key, const boost::optional<std::string> &val);
 
   void set_health_checks(const std::string& handle,
                         health_check_map_t&& checks);
index c8d3c73ff803fbe2d52fb3c0546cb237c19ec5b0..f03929102e4753ccad1c864bbe7c5375748763f0 100644 (file)
@@ -359,11 +359,14 @@ ceph_config_set(PyObject *self, PyObject *args)
   char *handle = nullptr;
   char *key = nullptr;
   char *value = nullptr;
-  if (!PyArg_ParseTuple(args, "sss:ceph_config_set", &handle, &key, &value)) {
+  if (!PyArg_ParseTuple(args, "ssz:ceph_config_set", &handle, &key, &value)) {
     return nullptr;
   }
-
-  global_handle->set_config(handle, key, value);
+  boost::optional<string> val;
+  if (value) {
+    val = value;
+  }
+  global_handle->set_config(handle, key, val);
 
   Py_RETURN_NONE;
 }
index 817e13d05542fbf4b3f3284466be8414693b6cb5..c9c836dc5151d99bedb09bcd156ce822575c6665 100644 (file)
@@ -19,6 +19,7 @@
 #include "mon/MonitorDBStore.h"
 #include "mon/ConfigKeyService.h"
 #include "mon/OSDMonitor.h"
+#include "mon/MDSMonitor.h"
 
 #include "messages/MMonCommand.h"
 #include "messages/MAuth.h"
@@ -538,6 +539,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op)
       prefix == "auth rm" ||
       prefix == "auth get-or-create" ||
       prefix == "auth get-or-create-key" ||
+      prefix == "fs authorize" ||
       prefix == "auth import" ||
       prefix == "auth caps") {
     return false;
@@ -1271,6 +1273,98 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op)
     wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, rdata,
                                              get_last_committed() + 1));
     return true;
+  } else if (prefix == "fs authorize") {
+    string filesystem;
+    cmd_getval(g_ceph_context, cmdmap, "filesystem", filesystem);
+    string mds_cap_string, osd_cap_string;
+    string osd_cap_wanted = "r";
+
+    for (auto it = caps_vec.begin();
+        it != caps_vec.end() && (it + 1) != caps_vec.end();
+        it += 2) {
+      const string &path = *it;
+      const string &cap = *(it+1);
+      if (cap != "r" && cap != "rw" && cap != "rwp") {
+       ss << "Only 'r', 'rw', and 'rwp' permissions are allowed for filesystems.";
+       err = -EINVAL;
+       goto done;
+      }
+      if (cap.find('w') != string::npos) {
+       osd_cap_wanted = "rw";
+      }
+
+      mds_cap_string += mds_cap_string.empty() ? "" : ", ";
+      mds_cap_string += "allow " + cap;
+      if (path != "/") {
+       mds_cap_string += " path=" + path;
+      }
+    }
+
+    auto fs = mon->mdsmon()->get_fsmap().get_filesystem(filesystem);
+    if (!fs) {
+      ss << "filesystem " << filesystem << " does not exist.";
+      err = -EINVAL;
+      goto done;
+    }
+
+    auto data_pools = fs->mds_map.get_data_pools();
+    for (auto p : data_pools) {
+      const string &pool_name = mon->osdmon()->osdmap.get_pool_name(p);
+      osd_cap_string += osd_cap_string.empty() ? "" : ", ";
+      osd_cap_string += "allow " + osd_cap_wanted + " pool=" + pool_name;
+    }
+
+    std::map<string, bufferlist> wanted_caps = {
+      { "mon", _encode_cap("allow r") },
+      { "osd", _encode_cap(osd_cap_string) },
+      { "mds", _encode_cap(mds_cap_string) }
+    };
+
+    EntityAuth entity_auth;
+    if (mon->key_server.get_auth(entity, entity_auth)) {
+      for (const auto &sys_cap : wanted_caps) {
+       if (entity_auth.caps.count(sys_cap.first) == 0 ||
+           !entity_auth.caps[sys_cap.first].contents_equal(sys_cap.second)) {
+         ss << "key for " << entity << " exists but cap " << sys_cap.first
+            << " does not match";
+         err = -EINVAL;
+         goto done;
+       }
+      }
+
+      KeyRing kr;
+      kr.add(entity, entity_auth.key);
+      if (f) {
+       kr.set_caps(entity, entity_auth.caps);
+       kr.encode_formatted("auth", f.get(), rdata);
+      } else {
+       kr.encode_plaintext(rdata);
+      }
+      err = 0;
+      goto done;
+    }
+
+    KeyServerData::Incremental auth_inc;
+    auth_inc.op = KeyServerData::AUTH_INC_ADD;
+    auth_inc.name = entity;
+    auth_inc.auth.key.create(g_ceph_context, CEPH_CRYPTO_AES);
+    auth_inc.auth.caps = wanted_caps;
+
+    push_cephx_inc(auth_inc);
+    KeyRing kr;
+    kr.add(entity, auth_inc.auth.key);
+    if (f) {
+      kr.set_caps(entity, wanted_caps);
+      kr.encode_formatted("auth", f.get(), rdata);
+    } else {
+      kr.encode_plaintext(rdata);
+    }
+
+    rdata.append(ds);
+    getline(ss, rs);
+    wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, rdata,
+                                                 get_last_committed() + 1));
+    return true;
   } else if (prefix == "auth caps" && !entity_name.empty()) {
     KeyServerData::Incremental auth_inc;
     auth_inc.name = entity;
index a2244a3c6da04895d354c376b08e0b107de305a3..b7fde85528deff25ee46b52e537e213fa055c245 100644 (file)
@@ -35,9 +35,19 @@ static ostream& _prefix(std::ostream *_dout, Monitor *mon, epoch_t epoch) {
 void Elector::init()
 {
   epoch = mon->store->get(Monitor::MONITOR_NAME, "election_epoch");
-  if (!epoch)
+  if (!epoch) {
+    dout(1) << "init, first boot, initializing epoch at 1 " << dendl;
     epoch = 1;
-  dout(1) << "init, last seen epoch " << epoch << dendl;
+  } else if (epoch % 2) {
+    dout(1) << "init, last seen epoch " << epoch
+           << ", mid-election, bumping" << dendl;
+    ++epoch;
+    auto t(std::make_shared<MonitorDBStore::Transaction>());
+    t->put(Monitor::MONITOR_NAME, "election_epoch", epoch);
+    mon->store->apply_transaction(t);
+  } else {
+    dout(1) << "init, last seen epoch " << epoch << dendl;
+  }
 }
 
 void Elector::shutdown()
@@ -117,8 +127,14 @@ void Elector::defer(int who)
   ack_stamp = ceph_clock_now();
   MMonElection *m = new MMonElection(MMonElection::OP_ACK, epoch, mon->monmap);
   m->mon_features = ceph::features::mon::get_supported();
-  m->sharing_bl = mon->get_supported_commands_bl();
   mon->collect_metadata(&m->metadata);
+
+  // This field is unused completely in luminous, but jewel uses it to
+  // determine whether we are a dumpling mon due to some crufty old
+  // code.  It only needs to see this buffer non-empty, so put
+  // something useless there.
+  m->sharing_bl = mon->get_local_commands_bl(mon->get_required_mon_features());
+
   mon->messenger->send_message(m, mon->monmap->get_inst(who));
   
   // set a timer
@@ -201,30 +217,23 @@ void Elector::victory()
   assert(epoch % 2 == 1);  // election
   bump_epoch(epoch+1);     // is over!
 
-  // decide my supported commands for peons to advertise
-  const bufferlist *cmds_bl = NULL;
-  const MonCommand *cmds;
-  int cmdsize;
-  mon->get_locally_supported_monitor_commands(&cmds, &cmdsize);
-  cmds_bl = &mon->get_supported_commands_bl();
-  
   // tell everyone!
   for (set<int>::iterator p = quorum.begin();
        p != quorum.end();
        ++p) {
     if (*p == mon->rank) continue;
-    MMonElection *m = new MMonElection(MMonElection::OP_VICTORY, epoch, mon->monmap);
+    MMonElection *m = new MMonElection(MMonElection::OP_VICTORY, epoch,
+                                      mon->monmap);
     m->quorum = quorum;
     m->quorum_features = cluster_features;
     m->mon_features = mon_features;
-    m->sharing_bl = *cmds_bl;
+    m->sharing_bl = mon->get_local_commands_bl(mon_features);
     mon->messenger->send_message(m, mon->monmap->get_inst(*p));
   }
 
   // tell monitor
   mon->win_election(epoch, quorum,
-                    cluster_features, mon_features, metadata,
-                    cmds, cmdsize);
+                    cluster_features, mon_features, metadata);
 }
 
 
@@ -394,11 +403,10 @@ void Elector::handle_victory(MonOpRequestRef op)
 
   // stash leader's commands
   assert(m->sharing_bl.length());
-  MonCommand *new_cmds;
-  int cmdsize;
+  vector<MonCommand> new_cmds;
   bufferlist::iterator bi = m->sharing_bl.begin();
-  MonCommand::decode_array(&new_cmds, &cmdsize, bi);
-  mon->set_leader_supported_commands(new_cmds, cmdsize);
+  MonCommand::decode_vector(new_cmds, bi);
+  mon->set_leader_commands(new_cmds);
 }
 
 void Elector::nak_old_peer(MonOpRequestRef op)
index ecbe9f0400e71ccf9dac9fb00084c4ae5daf194d..9d0b50d18cfbada9920b38178b048c4c82921e4b 100644 (file)
@@ -202,7 +202,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t)
       health.decode(bl_i);
     }
     for (const auto &metric : health.metrics) {
-      int const rank = info.rank;
+      const int rank = info.rank;
       health_check_t *check = &new_checks.get_or_add(
        mds_metric_name(metric.type),
        metric.sev,
@@ -565,7 +565,9 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
           mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
           return false;
         }
-        mon->clog->info() << "MDS daemon '" << m->get_name() << "' restarted";
+        const MDSMap::mds_info_t &existing_info =
+          pending_fsmap.get_info_gid(existing);
+        mon->clog->info() << existing_info.human_name() << " restarted";
        fail_mds_gid(existing);
         failed_mds = true;
       }
@@ -658,6 +660,13 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
             << "  standby_for_rank=" << m->get_standby_for_rank()
             << dendl;
     if (state == MDSMap::STATE_STOPPED) {
+      const auto fscid = pending_fsmap.mds_roles.at(gid);
+      auto fs = pending_fsmap.get_filesystem(fscid);
+      mon->clog->info() << info.human_name() << " finished "
+                        << "deactivating rank " << info.rank << " in filesystem "
+                        << fs->mds_map.fs_name << " (now has "
+                        << fs->mds_map.get_num_in_mds() << " ranks)";
+
       auto erased = pending_fsmap.stop(gid);
       erased.push_back(gid);
 
@@ -668,6 +677,8 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
           pending_daemon_health_rm.insert(erased_gid);
         }
       }
+
+
     } else if (state == MDSMap::STATE_DAMAGED) {
       if (!mon->osdmon()->is_writeable()) {
         dout(4) << __func__ << ": DAMAGED from rank " << info.rank
@@ -733,6 +744,14 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
         info->state = state;
         info->state_seq = seq;
       });
+
+      if (state == MDSMap::STATE_ACTIVE) {
+        auto fscid = pending_fsmap.mds_roles.at(gid);
+        auto fs = pending_fsmap.get_filesystem(fscid);
+        mon->clog->info() << info.human_name() << " is now active in "
+                          << "filesystem " << fs->mds_map.fs_name << " as rank "
+                          << info.rank;
+      }
     }
   }
 
@@ -777,7 +796,7 @@ void MDSMonitor::_updated(MonOpRequestRef op)
   op->mark_mdsmon_event(__func__);
   MMDSBeacon *m = static_cast<MMDSBeacon*>(op->get_req());
   dout(10) << "_updated " << m->get_orig_source() << " " << *m << dendl;
-  mon->clog->info() << m->get_orig_source_inst() << " "
+  mon->clog->debug() << m->get_orig_source_inst() << " "
          << ceph_mds_state_name(m->get_state());
 
   if (m->get_state() == MDSMap::STATE_STOPPED) {
@@ -831,7 +850,7 @@ void MDSMonitor::get_health(list<pair<health_status_t, string> >& summary,
     health.decode(bl_i);
 
     for (const auto &metric : health.metrics) {
-      int const rank = info.rank;
+      const int rank = info.rank;
       std::ostringstream message;
       message << "mds" << rank << ": " << metric.message;
       summary.push_back(std::make_pair(metric.sev, message.str()));
@@ -1243,8 +1262,11 @@ mds_gid_t MDSMonitor::gid_from_arg(const std::string& arg, std::ostream &ss)
   return MDS_GID_NONE;
 }
 
-int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
+int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg,
+    MDSMap::mds_info_t *failed_info)
 {
+  assert(failed_info != nullptr);
+
   mds_gid_t gid = gid_from_arg(arg, ss);
   if (gid == MDS_GID_NONE) {
     return 0;
@@ -1252,6 +1274,11 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
   if (!mon->osdmon()->is_writeable()) {
     return -EAGAIN;
   }
+
+  // Take a copy of the info before removing the MDS from the map,
+  // so that the caller knows which mds (if any) they ended up removing.
+  *failed_info = pending_fsmap.get_info_gid(gid);
+
   fail_mds_gid(gid);
   ss << "failed mds gid " << gid;
   assert(mon->osdmon()->is_writeable());
@@ -1455,10 +1482,18 @@ int MDSMonitor::filesystem_command(
   } else if (prefix == "mds fail") {
     string who;
     cmd_getval(g_ceph_context, cmdmap, "who", who);
-    r = fail_mds(ss, who);
+
+    MDSMap::mds_info_t failed_info;
+    r = fail_mds(ss, who, &failed_info);
     if (r < 0 && r == -EAGAIN) {
       mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
       return -EAGAIN; // don't propose yet; wait for message to be retried
+    } else if (r == 0) {
+      // Only log if we really did something (not when was already gone)
+      if (failed_info.global_id != MDS_GID_NONE) {
+        mon->clog->info() << failed_info.human_name() << " marked failed by "
+                          << op->get_session()->entity_name;
+      }
     }
   } else if (prefix == "mds rm") {
     mds_gid_t gid;
@@ -1971,8 +2006,14 @@ bool MDSMonitor::maybe_expand_cluster(std::shared_ptr<Filesystem> fs)
       break;
     }
 
-    dout(1) << "adding standby " << pending_fsmap.get_info_gid(newgid).addr
+    const auto &new_info = pending_fsmap.get_info_gid(newgid);
+    dout(1) << "assigned standby " << new_info.addr
             << " as mds." << mds << dendl;
+
+    mon->clog->info() << new_info.human_name() << " assigned to "
+                         "filesystem " << fs->mds_map.fs_name << " as rank "
+                      << mds << " (now has " << fs->mds_map.get_num_in_mds()
+                      << " ranks)";
     pending_fsmap.promote(newgid, fs, mds);
     do_propose = true;
   }
@@ -2023,10 +2064,10 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
       << " " << ceph_mds_state_name(info.state)
       << " with " << sgid << "/" << si.name << " " << si.addr << dendl;
 
-    mon->clog->warn() << "MDS daemon '" << info.name << "'"
+    mon->clog->warn() << info.human_name() 
                       << " is not responding, replacing it "
                       << "as rank " << info.rank
-                      << " with standby '" << si.name << "'";
+                      << " with standby " << si.human_name();
 
     // Remember what NS the old one was in
     const fs_cluster_id_t fscid = pending_fsmap.mds_roles.at(gid);
@@ -2044,9 +2085,8 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
     dout(10) << " failing and removing " << gid << " " << info.addr << " mds." << info.rank 
       << "." << info.inc << " " << ceph_mds_state_name(info.state)
       << dendl;
-    mon->clog->info() << "MDS standby '"  << info.name
-                      << "' is not responding, removing it from the set of "
-                      << "standbys";
+    mon->clog->info() << "Standby " << info.human_name() << " is not "
+                         "responding, dropping it";
     fail_mds_gid(gid);
     *mds_propose = true;
   } else if (!info.laggy()) {
@@ -2079,6 +2119,10 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> fs)
         const MDSMap::mds_info_t si = pending_fsmap.get_info_gid(sgid);
         dout(0) << " taking over failed mds." << f << " with " << sgid
                 << "/" << si.name << " " << si.addr << dendl;
+        mon->clog->info() << "Standby " << si.human_name()
+                          << " assigned to filesystem " << fs->mds_map.fs_name
+                          << " as rank " << f;
+
         pending_fsmap.promote(sgid, fs, f);
        do_propose = true;
       }
index f17f58f96f2c42f64361bbd06581b40173335ecf..c14c9603943cc25c1a3699f70516f5f6b6f8d6d2 100644 (file)
@@ -90,7 +90,8 @@ class MDSMonitor : public PaxosService {
   void get_health(list<pair<health_status_t,string> >& summary,
                  list<pair<health_status_t,string> > *detail,
                  CephContext *cct) const override;
-  int fail_mds(std::ostream &ss, const std::string &arg);
+  int fail_mds(std::ostream &ss, const std::string &arg,
+      MDSMap::mds_info_t *failed_info);
 
   bool preprocess_command(MonOpRequestRef op);
   bool prepare_command(MonOpRequestRef op);
@@ -147,6 +148,7 @@ protected:
   std::map<uint64_t, MDSHealth> pending_daemon_health;
   std::set<uint64_t> pending_daemon_health_rm;
 
+
   map<mds_gid_t, Metadata> pending_metadata;
 
   mds_gid_t gid_from_arg(const std::string& arg, std::ostream& err);
index fb760260f3ec695cb5e799c62cb8ad13c75c2714..fc4f08d8d70c51ec41bf23a1943f0e79d876410f 100644 (file)
@@ -938,3 +938,12 @@ int MgrMonitor::dump_metadata(const string& name, Formatter *f, ostream *err)
   return 0;
 }
 
+const std::vector<MonCommand> &MgrMonitor::get_command_descs() const
+{
+  if (command_descs.empty()) {
+    // must have just upgraded; fallback to static commands
+    return mgr_commands;
+  } else {
+    return command_descs;
+  }
+}
index f63fb714f795d6be8bb5d01eb207593225a09860..65451633dbefc0aa85c71bd4d65b50e222548922 100644 (file)
@@ -99,9 +99,7 @@ public:
 
   void print_summary(Formatter *f, std::ostream *ss) const;
 
-  const std::vector<MonCommand> &get_command_descs() const {
-    return command_descs;
-  }
+  const std::vector<MonCommand> &get_command_descs() const;
 
   int load_metadata(const string& name, std::map<string, string>& m,
                    ostream *err);
index 6a0606a4be9fac7b8f5aa874a536e7c71c56b682..3ea5f30032a80a90c7ba1bde48715515490e13f0 100644 (file)
@@ -17,7 +17,7 @@ class MgrPGStatService : public MonPGStatService {
 public:
   MgrPGStatService(PGMapDigest& d) : digest(d) {}
 
-  const pool_stat_t* get_pool_stat(int poolid) const override {
+  const pool_stat_t* get_pool_stat(int64_t poolid) const override {
     auto i = digest.pg_pool_sum.find(poolid);
     if (i != digest.pg_pool_sum.end()) {
       return &i->second;
@@ -25,8 +25,9 @@ public:
     return nullptr;
   }
 
-  ceph_statfs get_statfs() const override {
-    return digest.get_statfs();
+  ceph_statfs get_statfs(OSDMap& osdmap,
+                        boost::optional<int64_t> data_pool) const override {
+    return digest.get_statfs(osdmap, data_pool);
   }
 
   void print_summary(Formatter *f, ostream *out) const override {
@@ -295,6 +296,7 @@ bool MgrStatMonitor::preprocess_statfs(MonOpRequestRef op)
   op->mark_pgmon_event(__func__);
   auto statfs = static_cast<MStatfs*>(op->get_req());
   auto session = statfs->get_session();
+
   if (!session)
     return true;
   if (!session->is_capable("pg", MON_CAP_R)) {
@@ -316,7 +318,8 @@ bool MgrStatMonitor::preprocess_statfs(MonOpRequestRef op)
     ver = mon->pgmon()->get_last_committed();
   }
   auto reply = new MStatfsReply(statfs->fsid, statfs->get_tid(), ver);
-  reply->h.st = mon->pgservice->get_statfs();
+  reply->h.st = mon->pgservice->get_statfs(mon->osdmon()->osdmap,
+                                          statfs->data_pool);
   mon->send_reply(op, reply);
   return true;
 }
index 3096fd2b46ad16ff1983d815ab5e56ccefd5d8ef..c93644f6811e9f08065d29be961f540e46bdb37c 100644 (file)
@@ -260,6 +260,17 @@ void MonCapGrant::expand_profile_mon(const EntityName& name) const
     profile_grants.back().command_args["caps_osd"] = StringConstraint(
       StringConstraint::MATCH_TYPE_EQUAL, "allow rwx");
   }
+  if (profile == "bootstrap-rbd") {
+    profile_grants.push_back(MonCapGrant("mon", MON_CAP_R));  // read monmap
+    profile_grants.push_back(MonCapGrant("auth get-or-create"));  // FIXME: this can expose other mds keys
+    profile_grants.back().command_args["entity"] = StringConstraint(
+      StringConstraint::MATCH_TYPE_PREFIX, "client.");
+    profile_grants.back().command_args["caps_mon"] = StringConstraint(
+      StringConstraint::MATCH_TYPE_EQUAL, "profile rbd");
+    profile_grants.back().command_args["caps_osd"] = StringConstraint(
+      StringConstraint::MATCH_TYPE_REGEX,
+      "^([ ,]*profile(=|[ ]+)['\"]?rbd[^ ,'\"]*['\"]?([ ]+pool(=|[ ]+)['\"]?[^,'\"]+['\"]?)?)+$");
+  }
   if (profile == "fs-client") {
     profile_grants.push_back(MonCapGrant("mon", MON_CAP_R));
     profile_grants.push_back(MonCapGrant("mds", MON_CAP_R));
@@ -281,7 +292,7 @@ void MonCapGrant::expand_profile_mon(const EntityName& name) const
     profile_grants.back().command_args["blacklistop"] = StringConstraint(
       StringConstraint::MATCH_TYPE_EQUAL, "add");
     profile_grants.back().command_args["addr"] = StringConstraint(
-      StringConstraint::MATCH_TYPE_REGEX, "^[^/]/[0-9]*$");
+      StringConstraint::MATCH_TYPE_REGEX, "^[^/]+/[0-9]+$");
   }
 
   if (profile == "role-definer") {
@@ -330,8 +341,8 @@ mon_rwxa_t MonCapGrant::get_allowed(CephContext *cct,
         break;
       case StringConstraint::MATCH_TYPE_REGEX:
         {
-         boost::regex pattern(p->second.value,
-                               boost::regex::basic | boost::regex::no_except);
+         boost::regex pattern(
+            p->second.value, boost::regex::extended | boost::regex::no_except);
           if (pattern.empty() || !boost::regex_match(q->second, pattern))
            return 0;
         }
index e32cc5df2be24ffbc94dbdd328c1d2b953666c9d..ce2429fbc949cde15ca32f2c76b5cfdea993321c 100644 (file)
@@ -120,6 +120,39 @@ struct MonCommand {
     DECODE_FINISH(bl);
   }
 
+  // this uses a u16 for the count, so we need a special encoder/decoder.
+  static void encode_vector(const std::vector<MonCommand>& cmds,
+                           bufferlist &bl) {
+    ENCODE_START(2, 1, bl);
+    uint16_t s = cmds.size();
+    ::encode(s, bl);
+    for (unsigned i = 0; i < s; ++i) {
+      cmds[i].encode_bare(bl);
+    }
+    for (unsigned i = 0; i < s; i++) {
+      ::encode(cmds[i].flags, bl);
+    }
+    ENCODE_FINISH(bl);
+  }
+  static void decode_vector(std::vector<MonCommand> &cmds,
+                           bufferlist::iterator &bl) {
+    DECODE_START(2, bl);
+    uint16_t s = 0;
+    ::decode(s, bl);
+    cmds.resize(s);
+    for (unsigned i = 0; i < s; ++i) {
+      cmds[i].decode_bare(bl);
+    }
+    if (struct_v >= 2) {
+      for (unsigned i = 0; i < s; i++)
+        ::decode(cmds[i].flags, bl);
+    } else {
+      for (unsigned i = 0; i < s; i++)
+        cmds[i].flags = 0;
+    }
+    DECODE_FINISH(bl);
+  }
+
   bool requires_perm(char p) const {
     return (req_perms.find(p) != std::string::npos);
   }
index 97d77d625eb8b7b719db14075c395fb25fa5e897..ef3c094b2339e83749eccfcdb6d01d06a504e767 100644 (file)
  */
 
 // note: this should be replaced shortly!
-COMMAND("pg force_create_pg name=pgid,type=CephPgid", \
-       "force creation of pg <pgid>", "pg", "rw", "cli,rest")
+COMMAND_WITH_FLAG("pg force_create_pg name=pgid,type=CephPgid", \
+                 "force creation of pg <pgid>", "pg", "rw", "cli,rest",
+                 FLAG(DEPRECATED))
 COMMAND_WITH_FLAG("pg set_full_ratio name=ratio,type=CephFloat,range=0.0|1.0", \
                  "set ratio at which pgs are considered full", \
                  "pg", "rw", "cli,rest", FLAG(DEPRECATED))
@@ -175,6 +176,12 @@ COMMAND("auth get-or-create " \
        "name=caps,type=CephString,n=N,req=false", \
        "add auth info for <entity> from input file, or random key if no input given, and/or any caps specified in the command", \
        "auth", "rwx", "cli,rest")
+COMMAND("fs authorize " \
+   "name=filesystem,type=CephString " \
+   "name=entity,type=CephString " \
+       "name=caps,type=CephString,n=N", \
+       "add auth for <entity> to access file system <filesystem> based on following directory and permissions pairs", \
+       "auth", "rwx", "cli,rest")
 COMMAND("auth caps " \
        "name=entity,type=CephString " \
        "name=caps,type=CephString,n=N", \
@@ -499,9 +506,6 @@ COMMAND("osd map " \
 COMMAND("osd lspools " \
        "name=auid,type=CephInt,req=false", \
        "list pools", "osd", "r", "cli,rest")
-COMMAND("osd blacklist ls", "show blacklisted clients", "osd", "r", "cli,rest")
-COMMAND("osd blacklist clear", "clear all blacklisted clients", "osd", "rw",
-        "cli,rest")
 COMMAND_WITH_FLAG("osd crush rule list", "list crush rules", "osd", "r", "cli,rest",
                  FLAG(DEPRECATED))
 COMMAND("osd crush rule ls", "list crush rules", "osd", "r", "cli,rest")
@@ -641,10 +645,9 @@ COMMAND("osd crush tree "
         "name=shadow,type=CephChoices,strings=--show-shadow,req=false", \
        "dump crush buckets and items in a tree view",
        "osd", "r", "cli,rest")
-COMMAND("osd crush class rm " \
-       "name=class,type=CephString,goodchars=[A-Za-z0-9-_]", \
-       "remove crush device class <class>", \
-       "osd", "rw", "cli,rest")
+COMMAND("osd crush ls name=node,type=CephString,goodchars=goodchars=[A-Za-z0-9-_.]",
+       "list items beneath a node in the CRUSH tree",
+       "osd", "r", "cli,rest")
 COMMAND("osd crush class ls", \
        "list all crush device classes", \
        "osd", "r", "cli,rest")
@@ -878,6 +881,9 @@ COMMAND("osd blacklist " \
        "name=expire,type=CephFloat,range=0.0,req=false", \
        "add (optionally until <expire> seconds from now) or remove <addr> from blacklist", \
        "osd", "rw", "cli,rest")
+COMMAND("osd blacklist ls", "show blacklisted clients", "osd", "r", "cli,rest")
+COMMAND("osd blacklist clear", "clear all blacklisted clients", "osd", "rw",
+        "cli,rest")
 COMMAND("osd pool mksnap " \
        "name=pool,type=CephPoolname " \
        "name=snap,type=CephString", \
index def5571c3d134275a6a8fb0be2810cbb807921a4..f0a2e0a1619dfbf9bcd78406433ce8ee6564c5d5 100644 (file)
@@ -103,16 +103,19 @@ const string Monitor::MONITOR_STORE_PREFIX = "monitor_store";
 #undef FLAG
 #undef COMMAND
 #undef COMMAND_WITH_FLAG
-MonCommand mon_commands[] = {
 #define FLAG(f) (MonCommand::FLAG_##f)
 #define COMMAND(parsesig, helptext, modulename, req_perms, avail)      \
   {parsesig, helptext, modulename, req_perms, avail, FLAG(NONE)},
 #define COMMAND_WITH_FLAG(parsesig, helptext, modulename, req_perms, avail, flags) \
   {parsesig, helptext, modulename, req_perms, avail, flags},
+MonCommand mon_commands[] = {
 #include <mon/MonCommands.h>
+};
+MonCommand pgmonitor_commands[] = {
+#include <mon/PGMonitorCommands.h>
+};
 #undef COMMAND
 #undef COMMAND_WITH_FLAG
-};
 
 
 void C_MonContext::finish(int r) {
@@ -143,8 +146,6 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s,
   auth_service_required(cct,
                        cct->_conf->auth_supported.empty() ?
                        cct->_conf->auth_service_required : cct->_conf->auth_supported ),
-  leader_supported_mon_commands(NULL),
-  leader_supported_mon_commands_size(0),
   mgr_messenger(mgr_m),
   mgr_client(cct_, mgr_m),
   pgservice(nullptr),
@@ -205,14 +206,25 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s,
 
   exited_quorum = ceph_clock_now();
 
+  // prepare local commands
+  local_mon_commands.resize(ARRAY_SIZE(mon_commands));
+  for (unsigned i = 0; i < ARRAY_SIZE(mon_commands); ++i) {
+    local_mon_commands[i] = mon_commands[i];
+  }
+  MonCommand::encode_vector(local_mon_commands, local_mon_commands_bl);
+
+  local_upgrading_mon_commands = local_mon_commands;
+  for (unsigned i = 0; i < ARRAY_SIZE(pgmonitor_commands); ++i) {
+    local_upgrading_mon_commands.push_back(pgmonitor_commands[i]);
+  }
+  MonCommand::encode_vector(local_upgrading_mon_commands,
+                           local_upgrading_mon_commands_bl);
+
   // assume our commands until we have an election.  this only means
   // we won't reply with EINVAL before the election; any command that
   // actually matters will wait until we have quorum etc and then
   // retry (and revalidate).
-  const MonCommand *cmds;
-  int cmdsize;
-  get_locally_supported_monitor_commands(&cmds, &cmdsize);
-  set_leader_supported_commands(cmds, cmdsize);
+  leader_mon_commands = local_mon_commands;
 
   // note: OSDMonitor may update this based on the luminous flag.
   pgservice = mgrstatmon()->get_pg_stat_service();
@@ -227,8 +239,6 @@ Monitor::~Monitor()
   delete paxos;
   assert(session_map.sessions.empty());
   delete mon_caps;
-  if (leader_supported_mon_commands != mon_commands)
-    delete[] leader_supported_mon_commands;
 }
 
 
@@ -786,13 +796,6 @@ int Monitor::init()
   mgr_messenger->add_dispatcher_tail(this);  // for auth ms_* calls
 
   bootstrap();
-
-  // encode command sets
-  const MonCommand *cmds;
-  int cmdsize;
-  get_locally_supported_monitor_commands(&cmds, &cmdsize);
-  MonCommand::encode_array(cmds, cmdsize, supported_commands_bl);
-
   return 0;
 }
 
@@ -1861,14 +1864,10 @@ void Monitor::win_standalone_election()
   map<int,Metadata> metadata;
   collect_metadata(&metadata[0]);
 
-  const MonCommand *my_cmds = nullptr;
-  int cmdsize = 0;
-  get_locally_supported_monitor_commands(&my_cmds, &cmdsize);
   win_election(elector.get_epoch(), q,
                CEPH_FEATURES_ALL,
                ceph::features::mon::get_supported(),
-              metadata,
-               my_cmds, cmdsize);
+              metadata);
 }
 
 const utime_t& Monitor::get_leader_since() const
@@ -1896,8 +1895,7 @@ void Monitor::_finish_svc_election()
 
 void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features,
                            const mon_feature_t& mon_features,
-                          const map<int,Metadata>& metadata,
-                           const MonCommand *cmdset, int cmdsize)
+                          const map<int,Metadata>& metadata)
 {
   dout(10) << __func__ << " epoch " << epoch << " quorum " << active
           << " features " << features
@@ -1916,7 +1914,7 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features,
   clog->info() << "mon." << name << "@" << rank
                << " won leader election with quorum " << quorum;
 
-  set_leader_supported_commands(cmdset, cmdsize);
+  set_leader_commands(get_local_commands(mon_features));
 
   paxos->leader_init();
   // NOTE: tell monmap monitor first.  This is important for the
@@ -2464,6 +2462,7 @@ health_status_t Monitor::get_health_status(
 {
   health_status_t r = HEALTH_OK;
   bool compat = g_conf->mon_health_preluminous_compat;
+  bool compat_warn = g_conf->get_val<bool>("mon_health_preluminous_compat_warning");
   if (f) {
     f->open_object_section("health");
     f->open_object_section("checks");
@@ -2489,25 +2488,37 @@ health_status_t Monitor::get_health_status(
     *plain += "\n";
   }
 
-  if (f && compat) {
-    f->open_array_section("summary");
-    for (auto& svc : paxos_service) {
-      svc->get_health_checks().dump_summary_compat(f);
+  if (f && (compat || compat_warn)) {
+    health_status_t cr = compat_warn ? min(HEALTH_WARN, r) : r;
+    if (compat) {
+      f->open_array_section("summary");
+      if (compat_warn) {
+       f->open_object_section("item");
+       f->dump_stream("severity") << HEALTH_WARN;
+       f->dump_string("summary", "'ceph health' JSON format has changed in luminous; update your health monitoring scripts");
+       f->close_section();
+      }
+      for (auto& svc : paxos_service) {
+       svc->get_health_checks().dump_summary_compat(f);
+      }
+      f->close_section();
     }
-    f->close_section();
-    f->dump_stream("overall_status") << r;
+    f->dump_stream("overall_status") << cr;
   }
 
   if (want_detail) {
-    if (f && compat) {
+    if (f && (compat || compat_warn)) {
       f->open_array_section("detail");
+      if (compat_warn) {
+       f->dump_string("item", "'ceph health' JSON format has changed in luminous. If you see this your monitoring system is scraping the wrong fields. Disable this with 'mon health preluminous compat warning = false'");
+      }
     }
 
     for (auto& svc : paxos_service) {
       svc->get_health_checks().dump_detail(f, plain, compat);
     }
 
-    if (f && compat) {
+    if (f && (compat || compat_warn)) {
       f->close_section();
     }
   }
@@ -2812,18 +2823,14 @@ void Monitor::_generate_command_map(map<string,cmd_vartype>& cmdmap,
 
 const MonCommand *Monitor::_get_moncommand(
   const string &cmd_prefix,
-  const MonCommand *cmds,
-  int cmds_size)
-{
-  const MonCommand *this_cmd = NULL;
-  for (const MonCommand *cp = cmds;
-       cp < &cmds[cmds_size]; cp++) {
-    if (cp->cmdstring.compare(0, cmd_prefix.size(), cmd_prefix) == 0) {
-      this_cmd = cp;
-      break;
+  const vector<MonCommand>& cmds)
+{
+  for (auto& c : cmds) {
+    if (c.cmdstring.compare(0, cmd_prefix.size(), cmd_prefix) == 0) {
+      return &c;
     }
   }
-  return this_cmd;
+  return nullptr;
 }
 
 bool Monitor::_allowed_command(MonSession *s, string &module, string &prefix,
@@ -2870,20 +2877,6 @@ void Monitor::format_command_descriptions(const std::vector<MonCommand> &command
   f->flush(*rdata);
 }
 
-void Monitor::get_locally_supported_monitor_commands(const MonCommand **cmds,
-                                                    int *count)
-{
-  *cmds = mon_commands;
-  *count = ARRAY_SIZE(mon_commands);
-}
-void Monitor::set_leader_supported_commands(const MonCommand *cmds, int size)
-{
-  if (leader_supported_mon_commands != mon_commands)
-    delete[] leader_supported_mon_commands;
-  leader_supported_mon_commands = cmds;
-  leader_supported_mon_commands_size = size;
-}
-
 bool Monitor::is_keyring_required()
 {
   string auth_cluster_required = g_conf->auth_supported.empty() ?
@@ -2975,11 +2968,16 @@ void Monitor::handle_command(MonOpRequestRef op)
       osdmon()->osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS;
 
     std::vector<MonCommand> commands;
-    commands = static_cast<MgrMonitor*>(
+
+    // only include mgr commands once all mons are upgrade (and we've dropped
+    // the hard-coded PGMonitor commands)
+    if (quorum_mon_features.contains_all(ceph::features::mon::FEATURE_LUMINOUS)) {
+      commands = static_cast<MgrMonitor*>(
         paxos_service[PAXOS_MGR])->get_command_descs();
+    }
 
-    for (int i = 0 ; i < leader_supported_mon_commands_size; ++i) {
-      commands.push_back(leader_supported_mon_commands[i]);
+    for (auto& c : leader_mon_commands) {
+      commands.push_back(c);
     }
 
     format_command_descriptions(commands, f, &rdata, hide_mgr_flag);
@@ -3015,12 +3013,9 @@ void Monitor::handle_command(MonOpRequestRef op)
   const auto& mgr_cmds = mgrmon()->get_command_descs();
   const MonCommand *mgr_cmd = nullptr;
   if (!mgr_cmds.empty()) {
-    mgr_cmd = _get_moncommand(prefix, &mgr_cmds.at(0), mgr_cmds.size());
+    mgr_cmd = _get_moncommand(prefix, mgr_cmds);
   }
-  leader_cmd = _get_moncommand(prefix,
-                               // the boost underlying this isn't const for some reason
-                               const_cast<MonCommand*>(leader_supported_mon_commands),
-                               leader_supported_mon_commands_size);
+  leader_cmd = _get_moncommand(prefix, leader_mon_commands);
   if (!leader_cmd) {
     leader_cmd = mgr_cmd;
     if (!leader_cmd) {
@@ -3029,8 +3024,9 @@ void Monitor::handle_command(MonOpRequestRef op)
     }
   }
   // validate command is in our map & matches, or forward if it is allowed
-  const MonCommand *mon_cmd = _get_moncommand(prefix, mon_commands,
-                                              ARRAY_SIZE(mon_commands));
+  const MonCommand *mon_cmd = _get_moncommand(
+    prefix,
+    get_local_commands(quorum_mon_features));
   if (!mon_cmd) {
     mon_cmd = mgr_cmd;
   }
@@ -3128,7 +3124,8 @@ void Monitor::handle_command(MonOpRequestRef op)
     return;
   }
 
-  if (module == "mds" || module == "fs") {
+  if ((module == "mds" || module == "fs")  &&
+      prefix != "fs authorize") {
     mdsmon()->dispatch(op);
     return;
   }
@@ -3157,7 +3154,7 @@ void Monitor::handle_command(MonOpRequestRef op)
     monmon()->dispatch(op);
     return;
   }
-  if (module == "auth") {
+  if (module == "auth" || prefix == "fs authorize") {
     authmon()->dispatch(op);
     return;
   }
@@ -3269,6 +3266,9 @@ void Monitor::handle_command(MonOpRequestRef op)
     std::string val;
     cmd_getval(cct, cmdmap, "value", val);
     r = g_conf->set_val(key, val, true, &ss);
+    if (r == 0) {
+      g_conf->apply_changes(nullptr);
+    }
     rs = ss.str();
     goto out;
   } else if (prefix == "status" ||
@@ -5318,7 +5318,7 @@ void Monitor::scrub_check_results()
     }
   }
   if (!errors)
-    clog->info() << "scrub ok on " << quorum << ": " << mine;
+    clog->debug() << "scrub ok on " << quorum << ": " << mine;
 }
 
 inline void Monitor::scrub_timeout()
index 2716f0cb5d0dcdbbe80ec61e50bf68165f686220..fa4ba9fa8f56f26034c3335b1232037a7ec6911f 100644 (file)
@@ -165,8 +165,13 @@ public:
 
   CompatSet features;
 
-  const MonCommand *leader_supported_mon_commands;
-  int leader_supported_mon_commands_size;
+  vector<MonCommand> leader_mon_commands; // quorum leader's commands
+  vector<MonCommand> local_mon_commands;  // commands i support
+  bufferlist local_mon_commands_bl;       // encoded version of above
+
+  // for upgrading mon cluster that still uses PGMonitor
+  vector<MonCommand> local_upgrading_mon_commands;  // mixed mon cluster commands
+  bufferlist local_upgrading_mon_commands_bl;       // encoded version of above
 
   Messenger *mgr_messenger;
   MgrClient mgr_client;
@@ -248,7 +253,6 @@ private:
    * Intersection of quorum members mon-specific feature bits
    */
   mon_feature_t quorum_mon_features;
-  bufferlist supported_commands_bl; // encoded MonCommands we support
 
   set<string> outside_quorum;
 
@@ -600,18 +604,13 @@ public:
   void win_election(epoch_t epoch, set<int>& q,
                    uint64_t features,
                     const mon_feature_t& mon_features,
-                   const map<int,Metadata>& metadata,
-                   const MonCommand *cmdset, int cmdsize);
+                   const map<int,Metadata>& metadata);
   void lose_election(epoch_t epoch, set<int>& q, int l,
                     uint64_t features,
                      const mon_feature_t& mon_features);
   // end election (called by Elector)
   void finish_election();
 
-  const bufferlist& get_supported_commands_bl() {
-    return supported_commands_bl;
-  }
-
   void update_logger();
 
   /**
@@ -687,8 +686,7 @@ public:
                                     map<string,string> &param_str_map);
   static const MonCommand *_get_moncommand(
     const string &cmd_prefix,
-    const MonCommand *cmds,
-    int cmds_size);
+    const vector<MonCommand>& cmds);
   bool _allowed_command(MonSession *s, string &module, string &prefix,
                         const map<string,cmd_vartype>& cmdmap,
                         const map<string,string>& param_str_map,
@@ -967,9 +965,23 @@ public:
                                          Formatter *f,
                                          bufferlist *rdata,
                                          bool hide_mgr_flag=false);
-  void get_locally_supported_monitor_commands(const MonCommand **cmds, int *count);
-  /// the Monitor owns this pointer once you pass it in
-  void set_leader_supported_commands(const MonCommand *cmds, int size);
+
+  const std::vector<MonCommand> &get_local_commands(mon_feature_t f) {
+    if (f.contains_all(ceph::features::mon::FEATURE_LUMINOUS))
+      return local_mon_commands;
+    else
+      return local_upgrading_mon_commands;
+  }
+  const bufferlist& get_local_commands_bl(mon_feature_t f) {
+    if (f.contains_all(ceph::features::mon::FEATURE_LUMINOUS))
+      return local_mon_commands_bl;
+    else
+      return local_upgrading_mon_commands_bl;
+  }
+  void set_leader_commands(const std::vector<MonCommand>& cmds) {
+    leader_mon_commands = cmds;
+  }
+
   static bool is_keyring_required();
 };
 
index b2f7b3e9882b3120ee0e72e89680fb92932083e6..d1c1766c2800cd05685b93985d2d0f6dee19cb71 100644 (file)
@@ -4984,6 +4984,43 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
                               show_shadow);
       rdata.append(ss.str());
     }
+  } else if (prefix == "osd crush ls") {
+    string name;
+    if (!cmd_getval(g_ceph_context, cmdmap, "node", name)) {
+      ss << "no node specified";
+      r = -EINVAL;
+      goto reply;
+    }
+    if (!osdmap.crush->name_exists(name)) {
+      ss << "node '" << name << "' does not exist";
+      r = -ENOENT;
+      goto reply;
+    }
+    int id = osdmap.crush->get_item_id(name);
+    list<int> result;
+    if (id >= 0) {
+      result.push_back(id);
+    } else {
+      int num = osdmap.crush->get_bucket_size(id);
+      for (int i = 0; i < num; ++i) {
+       result.push_back(osdmap.crush->get_bucket_item(id, i));
+      }
+    }
+    if (f) {
+      f->open_array_section("items");
+      for (auto i : result) {
+       f->dump_string("item", osdmap.crush->get_item_name(i));
+      }
+      f->close_section();
+      f->flush(rdata);
+    } else {
+      ostringstream ss;
+      for (auto i : result) {
+       ss << osdmap.crush->get_item_name(i) << "\n";
+      }
+      rdata.append(ss.str());
+    }
+    r = 0;
   } else if (prefix == "osd crush class ls") {
     boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty"));
     f->open_array_section("crush_classes");
@@ -7391,7 +7428,6 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
         }
 
         auto class_name = newcrush.get_item_class(osd);
-        stringstream ts;
         if (!class_name) {
           ss << "osd." << osd << " belongs to no class, ";
           continue;
@@ -7480,63 +7516,6 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
       goto reply;
     else
       goto update;
-  } else if (prefix == "osd crush class rm") {
-    string device_class;
-    if (!cmd_getval(g_ceph_context, cmdmap, "class", device_class)) {
-      err = -EINVAL; // no value!
-      goto reply;
-    }
-    if (osdmap.require_osd_release < CEPH_RELEASE_LUMINOUS) {
-      ss << "you must complete the upgrade and 'ceph osd require-osd-release "
-        << "luminous' before using crush device classes";
-      err = -EPERM;
-      goto reply;
-    }
-
-    CrushWrapper newcrush;
-    _get_pending_crush(newcrush);
-
-    if (!newcrush.class_exists(device_class)) {
-      err = -ENOENT;
-      ss << "class '" << device_class << "' does not exist";
-      goto reply;
-    }
-
-    int class_id = newcrush.get_class_id(device_class);
-
-    stringstream ts;
-    if (newcrush.class_is_in_use(class_id, &ts)) {
-      err = -EBUSY;
-      ss << "class '" << device_class << "' " << ts.str();
-      goto reply;
-    }
-
-    set<int> osds;
-    newcrush.get_devices_by_class(device_class, &osds);
-    for (auto& p: osds) {
-      err = newcrush.remove_device_class(g_ceph_context, p, &ss);
-      if (err < 0) {
-        // ss has reason for failure
-        goto reply;
-      }
-    }
-
-    if (osds.empty()) {
-      // empty class, remove directly
-      err = newcrush.remove_class_name(device_class);
-      if (err < 0) {
-        ss << "class '" << device_class << "' cannot be removed '"
-           << cpp_strerror(err) << "'";
-        goto reply;
-      }
-    }
-
-    pending_inc.crush.clear();
-    newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
-    ss << "removed class " << device_class << " with id " << class_id
-       << " from crush map";
-    goto update;
-
   } else if (prefix == "osd crush weight-set create" ||
             prefix == "osd crush weight-set create-compat") {
     CrushWrapper newcrush;
@@ -8730,6 +8709,11 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
       err = -EINVAL;
       goto reply;
     }
+    if (rel == osdmap.require_osd_release) {
+      // idempotent
+      err = 0;
+      goto reply;
+    }
     if (rel == CEPH_RELEASE_LUMINOUS) {
       if (!HAVE_FEATURE(osdmap.get_up_osd_features(), SERVER_LUMINOUS)) {
        ss << "not all up OSDs have CEPH_FEATURE_SERVER_LUMINOUS feature";
@@ -9689,9 +9673,9 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
          << "really do.";
       err = -EPERM;
       goto reply;
-    } else if (is_destroy && !osdmap.exists(id)) {
+    } else if (!osdmap.exists(id)) {
       ss << "osd." << id << " does not exist";
-      err = -ENOENT;
+      err = 0; // idempotent
       goto reply;
     } else if (osdmap.is_up(id)) {
       ss << "osd." << id << " is not `down`.";
index c0ead08097ca13338567b5497eaacbe029060319..9ec7d8f8a834ada0d49401d695b4a2bfd1117e40 100644 (file)
@@ -614,6 +614,68 @@ void PGMapDigest::pool_cache_io_rate_summary(Formatter *f, ostream *out,
   cache_io_rate_summary(f, out, p->second.first, ts->second);
 }
 
+static float pool_raw_used_rate(const OSDMap &osd_map, int64_t poolid)
+{
+  const pg_pool_t *pool = osd_map.get_pg_pool(poolid);
+
+  switch (pool->get_type()) {
+  case pg_pool_t::TYPE_REPLICATED:
+    return pool->get_size();
+    break;
+  case pg_pool_t::TYPE_ERASURE:
+  {
+    auto& ecp =
+      osd_map.get_erasure_code_profile(pool->erasure_code_profile);
+    auto pm = ecp.find("m");
+    auto pk = ecp.find("k");
+    if (pm != ecp.end() && pk != ecp.end()) {
+      int k = atoi(pk->second.c_str());
+      int m = atoi(pm->second.c_str());
+      int mk = m + k;
+      assert(mk != 0);
+      assert(k != 0);
+      return (float)mk / k;
+    } else {
+      return 0.0;
+    }
+  }
+  break;
+  default:
+    assert(0 == "unrecognized pool type");
+  }
+}
+
+ceph_statfs PGMapDigest::get_statfs(OSDMap &osdmap,
+                                   boost::optional<int64_t> data_pool) const
+{
+  ceph_statfs statfs;
+  bool filter = false;
+  object_stat_sum_t sum;
+
+  if (data_pool) {
+    auto i = pg_pool_sum.find(*data_pool);
+    if (i != pg_pool_sum.end()) {
+      sum = i->second.stats.sum;
+      filter = true;
+    }
+  }
+
+  if (filter) {
+    statfs.kb_used = (sum.num_bytes >> 10);
+    statfs.kb_avail = get_pool_free_space(osdmap, *data_pool) >> 10;
+    statfs.num_objects = sum.num_objects;
+    statfs.kb = statfs.kb_used + statfs.kb_avail;
+  } else {
+    // these are in KB.
+    statfs.kb = osd_sum.kb;
+    statfs.kb_used = osd_sum.kb_used;
+    statfs.kb_avail = osd_sum.kb_avail;
+    statfs.num_objects = pg_sum.stats.sum.num_objects;
+  }
+
+  return statfs;
+}
+
 void PGMapDigest::dump_pool_stats_full(
   const OSDMap &osd_map,
   stringstream *ss,
@@ -668,33 +730,8 @@ void PGMapDigest::dump_pool_stats_full(
     } else {
       avail = avail_by_rule[ruleno];
     }
-    switch (pool->get_type()) {
-    case pg_pool_t::TYPE_REPLICATED:
-      avail /= pool->get_size();
-      raw_used_rate = pool->get_size();
-      break;
-    case pg_pool_t::TYPE_ERASURE:
-    {
-      auto& ecp =
-        osd_map.get_erasure_code_profile(pool->erasure_code_profile);
-      auto pm = ecp.find("m");
-      auto pk = ecp.find("k");
-      if (pm != ecp.end() && pk != ecp.end()) {
-       int k = atoi(pk->second.c_str());
-       int m = atoi(pm->second.c_str());
-       int mk = m + k;
-       assert(mk != 0);
-       avail = avail * k / mk;
-       assert(k != 0);
-       raw_used_rate = (float)mk / k;
-      } else {
-       raw_used_rate = 0.0;
-      }
-    }
-    break;
-    default:
-      assert(0 == "unrecognized pool type");
-    }
+
+    raw_used_rate = ::pool_raw_used_rate(osd_map, pool_id);
 
     if (f) {
       f->open_object_section("pool");
@@ -824,6 +861,21 @@ void PGMapDigest::dump_object_stat_sum(
   }
 }
 
+int64_t PGMapDigest::get_pool_free_space(const OSDMap &osd_map,
+                                        int64_t poolid) const
+{
+  const pg_pool_t *pool = osd_map.get_pg_pool(poolid);
+  int ruleno = osd_map.crush->find_rule(pool->get_crush_rule(),
+                                       pool->get_type(),
+                                       pool->get_size());
+  int64_t avail;
+  avail = get_rule_avail(ruleno);
+  if (avail < 0)
+    avail = 0;
+
+  return avail / ::pool_raw_used_rate(osd_map, poolid);
+}
+
 int64_t PGMap::get_rule_avail(const OSDMap& osdmap, int ruleno) const
 {
   map<int,float> wm;
@@ -3204,7 +3256,7 @@ void PGMap::get_health_checks(
   }
 
   // POOL_APP
-  {
+  if (g_conf->get_val<bool>("mon_warn_on_pool_no_app")) {
     list<string> detail;
     for (auto &it : pools) {
       const pg_pool_t &pool = it.second;
index 3e81c7e05fa618b963f7a85809ba45e72b60ed48..3432f796633cf07c25e91a434c8a5a4d6b1ee705 100644 (file)
@@ -147,6 +147,12 @@ public:
   void pool_cache_io_rate_summary(Formatter *f, ostream *out,
                                   uint64_t poolid) const;
 
+  /**
+   * Return the number of additional bytes that can be stored in this
+   * pool before the first OSD fills up, accounting for PG overhead.
+   */
+  int64_t get_pool_free_space(const OSDMap &osd_map, int64_t poolid) const;
+
   virtual void dump_pool_stats_full(const OSDMap &osd_map, stringstream *ss,
                                    Formatter *f, bool verbose) const;
   void dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const;
@@ -171,15 +177,8 @@ public:
       return p->second.primary;
   }
 
-  ceph_statfs get_statfs() const {
-    ceph_statfs statfs;
-    // these are in KB.
-    statfs.kb = osd_sum.kb;
-    statfs.kb_used = osd_sum.kb_used;
-    statfs.kb_avail = osd_sum.kb_avail;
-    statfs.num_objects = pg_sum.stats.sum.num_objects;
-    return statfs;
-  }
+  ceph_statfs get_statfs(OSDMap &osdmap,
+                         boost::optional<int64_t> data_pool) const;
 
   int64_t get_rule_avail(int ruleno) const {
     auto i = avail_space_by_rule.find(ruleno);
@@ -597,7 +596,7 @@ public:
 
   bool is_readable() const override { return true; }
 
-  const pool_stat_t* get_pool_stat(int poolid) const override {
+  const pool_stat_t* get_pool_stat(int64_t poolid) const override {
     auto i = pgmap.pg_pool_sum.find(poolid);
     if (i != pgmap.pg_pool_sum.end()) {
       return &i->second;
@@ -639,7 +638,8 @@ public:
   size_t get_num_pg_by_osd(int osd) const override {
     return pgmap.get_num_pg_by_osd(osd);
   }
-  ceph_statfs get_statfs() const override {
+  ceph_statfs get_statfs(OSDMap& osd_map,
+                        boost::optional<int64_t> data_pool) const override {
     ceph_statfs statfs;
     statfs.kb = pgmap.osd_sum.kb;
     statfs.kb_used = pgmap.osd_sum.kb_used;
diff --git a/ceph/src/mon/PGMonitorCommands.h b/ceph/src/mon/PGMonitorCommands.h
new file mode 100644 (file)
index 0000000..12f6d83
--- /dev/null
@@ -0,0 +1,120 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/* no guard; may be included multiple times */
+
+COMMAND("pg stat", "show placement group status.",
+       "pg", "r", "cli,rest")
+COMMAND("pg getmap", "get binary pg map to -o/stdout", "pg", "r", "cli,rest")
+
+COMMAND("pg dump "                                                     \
+       "name=dumpcontents,type=CephChoices,strings=all|summary|sum|delta|pools|osds|pgs|pgs_brief,n=N,req=false", \
+       "show human-readable versions of pg map (only 'all' valid with plain)", "pg", "r", "cli,rest")
+COMMAND("pg dump_json "                                                        \
+       "name=dumpcontents,type=CephChoices,strings=all|summary|sum|pools|osds|pgs,n=N,req=false", \
+       "show human-readable version of pg map in json only",\
+       "pg", "r", "cli,rest")
+COMMAND("pg dump_pools_json", "show pg pools info in json only",\
+       "pg", "r", "cli,rest")
+
+COMMAND("pg ls-by-pool "               \
+        "name=poolstr,type=CephString " \
+       "name=states,type=CephString,n=N,req=false", \
+       "list pg with pool = [poolname]", "pg", "r", "cli,rest")
+COMMAND("pg ls-by-primary " \
+        "name=osd,type=CephOsdName " \
+        "name=pool,type=CephInt,req=false " \
+       "name=states,type=CephString,n=N,req=false", \
+       "list pg with primary = [osd]", "pg", "r", "cli,rest")
+COMMAND("pg ls-by-osd " \
+        "name=osd,type=CephOsdName " \
+        "name=pool,type=CephInt,req=false " \
+       "name=states,type=CephString,n=N,req=false", \
+       "list pg on osd [osd]", "pg", "r", "cli,rest")
+COMMAND("pg ls " \
+        "name=pool,type=CephInt,req=false " \
+       "name=states,type=CephString,n=N,req=false", \
+       "list pg with specific pool, osd, state", "pg", "r", "cli,rest")
+COMMAND("pg dump_stuck " \
+       "name=stuckops,type=CephChoices,strings=inactive|unclean|stale|undersized|degraded,n=N,req=false " \
+       "name=threshold,type=CephInt,req=false",
+       "show information about stuck pgs",\
+       "pg", "r", "cli,rest")
+COMMAND("pg debug " \
+       "name=debugop,type=CephChoices,strings=unfound_objects_exist|degraded_pgs_exist", \
+       "show debug info about pgs", "pg", "r", "cli,rest")
+
+COMMAND("pg scrub name=pgid,type=CephPgid", "start scrub on <pgid>", \
+       "pg", "rw", "cli,rest")
+COMMAND("pg deep-scrub name=pgid,type=CephPgid", "start deep-scrub on <pgid>", \
+       "pg", "rw", "cli,rest")
+COMMAND("pg repair name=pgid,type=CephPgid", "start repair on <pgid>", \
+       "pg", "rw", "cli,rest")
+
+// stuff in osd namespace
+COMMAND("osd perf", \
+        "print dump of OSD perf summary stats", \
+        "osd", \
+        "r", \
+        "cli,rest")
+COMMAND("osd df " \
+       "name=output_method,type=CephChoices,strings=plain|tree,req=false", \
+       "show OSD utilization", "osd", "r", "cli,rest")
+COMMAND("osd blocked-by", \
+       "print histogram of which OSDs are blocking their peers", \
+       "osd", "r", "cli,rest")
+COMMAND("osd pool stats " \
+        "name=name,type=CephString,req=false",
+        "obtain stats from all pools, or from specified pool",
+        "osd", "r", "cli,rest")
+COMMAND("osd reweight-by-utilization " \
+       "name=oload,type=CephInt,req=false " \
+       "name=max_change,type=CephFloat,req=false "                     \
+       "name=max_osds,type=CephInt,req=false "                 \
+       "name=no_increasing,type=CephChoices,strings=--no-increasing,req=false",\
+       "reweight OSDs by utilization [overload-percentage-for-consideration, default 120]", \
+       "osd", "rw", "cli,rest")
+COMMAND("osd test-reweight-by-utilization " \
+       "name=oload,type=CephInt,req=false " \
+       "name=max_change,type=CephFloat,req=false "                     \
+       "name=max_osds,type=CephInt,req=false "                 \
+       "name=no_increasing,type=CephChoices,strings=--no-increasing,req=false",\
+       "dry run of reweight OSDs by utilization [overload-percentage-for-consideration, default 120]", \
+       "osd", "r", "cli,rest")
+COMMAND("osd reweight-by-pg " \
+       "name=oload,type=CephInt,req=false " \
+       "name=max_change,type=CephFloat,req=false "                     \
+       "name=max_osds,type=CephInt,req=false "                 \
+       "name=pools,type=CephPoolname,n=N,req=false",                   \
+       "reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \
+       "osd", "rw", "cli,rest")
+COMMAND("osd test-reweight-by-pg " \
+       "name=oload,type=CephInt,req=false " \
+       "name=max_change,type=CephFloat,req=false "                     \
+       "name=max_osds,type=CephInt,req=false "                 \
+       "name=pools,type=CephPoolname,n=N,req=false",                   \
+       "dry run of reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \
+       "osd", "r", "cli,rest")
+
+COMMAND("osd scrub " \
+       "name=who,type=CephString", \
+       "initiate scrub on osd <who>, or use <all|any|*> to scrub all", \
+        "osd", "rw", "cli,rest")
+COMMAND("osd deep-scrub " \
+       "name=who,type=CephString", \
+       "initiate deep scrub on osd <who>, or use <all|any|*> to deep scrub all", \
+        "osd", "rw", "cli,rest")
+COMMAND("osd repair " \
+       "name=who,type=CephString", \
+       "initiate repair on osd <who>, or use <all|any|*> to repair all", \
+        "osd", "rw", "cli,rest")
+
+COMMAND("pg force_create_pg name=pgid,type=CephPgid", \
+       "force creation of pg <pgid>", "pg", "rw", "cli,rest")
+COMMAND_WITH_FLAG("pg set_full_ratio name=ratio,type=CephFloat,range=0.0|1.0", \
+                 "set ratio at which pgs are considered full", \
+                 "pg", "rw", "cli,rest", FLAG(DEPRECATED))
+COMMAND_WITH_FLAG("pg set_nearfull_ratio "                             \
+                 "name=ratio,type=CephFloat,range=0.0|1.0",            \
+                 "set ratio at which pgs are considered nearly full",  \
+                 "pg", "rw", "cli,rest", FLAG(DEPRECATED))
index 7c8cb004bfa151925c689ecac65e8f91c74620f3..1354ed3d4cbb9aea627badfae7186f50c41a2c82 100644 (file)
@@ -37,7 +37,7 @@ public:
    *  post-luminous, but not when we are redirecting to the PGMonitor
    */
   virtual bool is_readable() const { return true; }
-  virtual const pool_stat_t* get_pool_stat(int poolid) const = 0;
+  virtual const pool_stat_t* get_pool_stat(int64_t poolid) const = 0;
   virtual const osd_stat_t& get_osd_sum() const {
     ceph_abort();
   }
@@ -73,7 +73,8 @@ public:
   virtual size_t get_num_pg_by_osd(int osd) const {
     ceph_abort();
   }
-  virtual ceph_statfs get_statfs() const = 0;
+  virtual ceph_statfs get_statfs(OSDMap &osd_map,
+                                boost::optional<int64_t> data_pool) const = 0;
   virtual void print_summary(Formatter *f, ostream *out) const = 0;
   virtual void dump_info(Formatter *f) const = 0;
   virtual void dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) const = 0;
index 51f0e1095d8e5179d4481f94c81f4844d189935a..a8971ce44244d5b523b84348a6aaf58177900e42 100644 (file)
@@ -35,10 +35,16 @@ struct health_check_t {
 
   void dump(Formatter *f) const {
     f->dump_stream("severity") << severity;
-    f->dump_string("summary", summary);
+
+    f->open_object_section("summary");
+    f->dump_string("message", summary);
+    f->close_section();
+
     f->open_array_section("detail");
     for (auto& p : detail) {
-      f->dump_string("item", p);
+      f->open_object_section("detail_item");
+      f->dump_string("message", p);
+      f->close_section();
     }
     f->close_section();
   }
@@ -138,11 +144,17 @@ struct health_check_map_t {
       if (f) {
        f->open_object_section(p.first.c_str());
        f->dump_stream("severity") << p.second.severity;
-       f->dump_string("message", p.second.summary);
+
+        f->open_object_section("summary");
+        f->dump_string("message", p.second.summary);
+        f->close_section();
+
        if (detail) {
          f->open_array_section("detail");
          for (auto& d : p.second.detail) {
-           f->dump_string("item", d);
+            f->open_object_section("detail_item");
+            f->dump_string("message", d);
+            f->close_section();
          }
          f->close_section();
        }
index e732f08e9f397d420844e91e35f13f4c5d11d130..068896c982ac7e68602395316b7067f45a30a696 100644 (file)
@@ -217,14 +217,14 @@ class Infiniband {
   };
 
  private:
-  uint32_t max_send_wr;
-  uint32_t max_recv_wr;
-  uint32_t max_sge;
-  uint8_t  ib_physical_port;
-  MemoryManager* memory_manager;
-  ibv_srq* srq;             // shared receive work queue
-  Device *device;
-  ProtectionDomain *pd;
+  uint32_t max_send_wr = 0;
+  uint32_t max_recv_wr = 0;
+  uint32_t max_sge = 0;
+  uint8_t  ib_physical_port = 0;
+  MemoryManager* memory_manager = nullptr;
+  ibv_srq* srq = nullptr;             // shared receive work queue
+  Device *device = NULL;
+  ProtectionDomain *pd = NULL;
   DeviceList *device_list = nullptr;
   RDMADispatcher *dispatcher = nullptr;
   void wire_gid_to_gid(const char *wgid, union ibv_gid *gid);
index 27373fb6487ae425754b3ebe571cc7c4a2e9f000..3c2d90094d140ad374d8926896ceabb57df36592 100644 (file)
@@ -300,7 +300,6 @@ class RDMAServerSocketImpl : public ServerSocketImpl {
 class RDMAStack : public NetworkStack {
   vector<std::thread> threads;
   RDMADispatcher *dispatcher;
-  PerfCounters *perf_counter;
 
   std::atomic<bool> fork_finished = {false};
 
index c88cf09c63af518db187968404694fe2634ee60a..97624c09fca651dea2d43d193dc4de986c41a27f 100644 (file)
@@ -1575,6 +1575,19 @@ public:
     return true;
   }
 
+  /**
+   * is_journal_rotational
+   *
+   * Check whether journal is backed by a rotational (HDD) or non-rotational
+   * (SSD) device.
+   *
+   *
+   * @return true for HDD, false for SSD
+   */
+  virtual bool is_journal_rotational() {
+    return true;
+  }
+
   virtual string get_default_device_class() {
     return is_rotational() ? "hdd" : "ssd";
   }
index d3fdbb71a8da7942fdd03576227f23c69897971f..075d3eff6ef18451c6ba04cdd41463ad6cbc0ee6 100644 (file)
@@ -191,10 +191,10 @@ int get_next_clear_bit(bufferlist& bl, int start)
   const char *p = bl.c_str();
   int bits = bl.length() << 3;
   while (start < bits) {
-    int which_byte = start / 8;
-    int which_bit = start % 8;
-    unsigned char byte_mask = 1 << which_bit;
-    if ((p[which_byte] & byte_mask) == 0) {
+    // byte = start / 8 (or start >> 3)
+    // bit = start % 8 (or start & 7)
+    unsigned char byte_mask = 1 << (start & 7);
+    if ((p[start >> 3] & byte_mask) == 0) {
       return start;
     }
     ++start;
index ed94e60067c230e90cd5a661fd2cbfc6032dffa9..2a30e36e953d11ea6f58738ea4e6078278eab85b 100644 (file)
@@ -2292,3 +2292,10 @@ int BlueFS::unlink(const string& dirname, const string& filename)
   _drop_link(file);
   return 0;
 }
+
+bool BlueFS::wal_is_rotational()
+{
+  if (!bdev[BDEV_WAL] || bdev[BDEV_WAL]->is_rotational())
+    return true;
+  return false;
+}
index bb14ffc3697d035347d9b502aa8e93266b7441b6..1b38a6ab1d493840046c58a8bcdb0c7ff547f920 100644 (file)
@@ -370,6 +370,7 @@ public:
   int unlink(const string& dirname, const string& filename);
   int mkdir(const string& dirname);
   int rmdir(const string& dirname);
+  bool wal_is_rotational();
 
   bool dir_exists(const string& dirname);
   int stat(const string& dirname, const string& filename,
index 1b1e2e903572f918ee0df75ba6ae5a3314e13181..34e8a2fea0b8eb30e67a92f907063e015d31c34a 100644 (file)
@@ -407,6 +407,7 @@ rocksdb::Status BlueRocksEnv::GetChildren(
   const std::string& dir,
   std::vector<std::string>* result)
 {
+  result->clear();
   int r = fs->readdir(dir, result);
   if (r < 0)
     return rocksdb::Status::IOError(dir, strerror(ENOENT));//    return err_to_status(r);
index 1ad6c81e6c389762b29f91beebad1742c294631f..e74ea82c1a79f0367d7a5229a69cd437acc8655e 100644 (file)
@@ -3736,17 +3736,17 @@ int BlueStore::_set_cache_sizes()
     (double)1.0 - (double)cache_meta_ratio - (double)cache_kv_ratio;
 
   if (cache_meta_ratio < 0 || cache_meta_ratio > 1.0) {
-    derr << __func__ << "bluestore_cache_meta_ratio (" << cache_meta_ratio
+    derr << __func__ << " bluestore_cache_meta_ratio (" << cache_meta_ratio
         << ") must be in range [0,1.0]" << dendl;
     return -EINVAL;
   }
   if (cache_kv_ratio < 0 || cache_kv_ratio > 1.0) {
-    derr << __func__ << "bluestore_cache_kv_ratio (" << cache_kv_ratio
+    derr << __func__ << " bluestore_cache_kv_ratio (" << cache_kv_ratio
         << ") must be in range [0,1.0]" << dendl;
     return -EINVAL;
   }
   if (cache_meta_ratio + cache_kv_ratio > 1.0) {
-    derr << __func__ << "bluestore_cache_meta_ratio (" << cache_meta_ratio
+    derr << __func__ << " bluestore_cache_meta_ratio (" << cache_meta_ratio
         << ") + bluestore_cache_kv_ratio (" << cache_kv_ratio
         << ") = " << cache_meta_ratio + cache_kv_ratio << "; must be <= 1.0"
         << dendl;
@@ -4396,6 +4396,17 @@ bool BlueStore::is_rotational()
   return rotational;
 }
 
+bool BlueStore::is_journal_rotational()
+{
+  if (!bluefs) {
+    dout(5) << __func__ << " bluefs disabled, default to store media type"
+            << dendl;
+    return is_rotational();
+  }
+  dout(10) << __func__ << " " << (int)bluefs->wal_is_rotational() << dendl;
+  return bluefs->wal_is_rotational();
+}
+
 bool BlueStore::test_mount_in_use()
 {
   // most error conditions mean the mount is not in use (e.g., because
@@ -8814,9 +8825,13 @@ int BlueStore::queue_transactions(
   if (txc->deferred_txn) {
     // ensure we do not block here because of deferred writes
     if (!throttle_deferred_bytes.get_or_fail(txc->cost)) {
+      dout(10) << __func__ << " failed get throttle_deferred_bytes, aggressive"
+              << dendl;
+      ++deferred_aggressive;
       deferred_try_submit();
       throttle_deferred_bytes.get(txc->cost);
-    }
+      --deferred_aggressive;
+   }
   }
   utime_t tend = ceph_clock_now();
 
@@ -10499,8 +10514,6 @@ int BlueStore::_do_remove(
     return 0;
   }
 
-  uint32_t b_start = OBJECT_MAX_SIZE;
-  uint32_t b_end = 0;
   for (auto& e : h->extent_map.extent_map) {
     const bluestore_blob_t& b = e.blob->get_blob();
     SharedBlob *sb = e.blob->shared_blob.get();
@@ -10510,17 +10523,9 @@ int BlueStore::_do_remove(
       dout(20) << __func__ << "  unsharing " << e << dendl;
       bluestore_blob_t& blob = e.blob->dirty_blob();
       blob.clear_flag(bluestore_blob_t::FLAG_SHARED);
-      if (e.logical_offset < b_start) {
-        b_start = e.logical_offset;
-      }
-      if (e.logical_end() > b_end) {
-        b_end = e.logical_end();
-      }
+      h->extent_map.dirty_range(e.logical_offset, 1);
     }
   }
-
-  assert(b_end > b_start);
-  h->extent_map.dirty_range(b_start, b_end - b_start);
   txc->write_onode(h);
 
   return 0;
@@ -10928,7 +10933,7 @@ int BlueStore::_do_clone_range(
       // make sure it is shared
       if (!blob.is_shared()) {
        c->make_blob_shared(_assign_blobid(txc), e.blob);
-       if (dirty_range_begin == 0) {
+       if (dirty_range_begin == 0 && dirty_range_end == 0) {
           dirty_range_begin = e.logical_offset;
         }
         assert(e.logical_end() > 0);
index 06a5418a64bf9e835765802db9f7ddeaa4de2952..d7a1980320ebb16151784804f71124919f802977 100644 (file)
@@ -2109,6 +2109,7 @@ public:
   bool allows_journal() override { return false; };
 
   bool is_rotational() override;
+  bool is_journal_rotational() override;
 
   string get_default_device_class() override {
     string device_class;
index f9b931a62861640e0edf94820a22aaf9794a6ef4..b097a3c18a61ba812932f636680b0470ebae1109 100644 (file)
@@ -1157,6 +1157,30 @@ bool FileStore::is_rotational()
   return rotational;
 }
 
+bool FileStore::is_journal_rotational()
+{
+  bool journal_rotational;
+  if (backend) {
+    journal_rotational = backend->is_journal_rotational();
+  } else {
+    int fd = ::open(journalpath.c_str(), O_RDONLY);
+    if (fd < 0)
+      return true;
+    struct statfs st;
+    int r = ::fstatfs(fd, &st);
+    ::close(fd);
+    if (r < 0) {
+      return true;
+    }
+    create_backend(st.f_type);
+    journal_rotational = backend->is_journal_rotational();
+    delete backend;
+    backend = NULL;
+  }
+  dout(10) << __func__ << " " << (int)journal_rotational << dendl;
+  return journal_rotational;
+}
+
 int FileStore::_detect_fs()
 {
   struct statfs st;
index 7f2edb91a0339afb0e75af2bf3ae9648e58e2449..7af6863fed41eec7b7afa2065529bdcdba3ec667 100644 (file)
@@ -484,6 +484,7 @@ public:
   }
 
   bool is_rotational() override;
+  bool is_journal_rotational() override;
 
   void dump_perf_counters(Formatter *f) override {
     f->open_object_section("perf_counters");
@@ -837,6 +838,9 @@ protected:
   const string& get_basedir_path() {
     return filestore->basedir;
   }
+  const string& get_journal_path() {
+    return filestore->journalpath;
+  }
   const string& get_current_path() {
     return filestore->current_fn;
   }
@@ -874,6 +878,7 @@ public:
   virtual bool has_fiemap() = 0;
   virtual bool has_seek_data_hole() = 0;
   virtual bool is_rotational() = 0;
+  virtual bool is_journal_rotational() = 0;
   virtual int do_fiemap(int fd, off_t start, size_t len, struct fiemap **pfiemap) = 0;
   virtual int clone_range(int from, int to, uint64_t srcoff, uint64_t len, uint64_t dstoff) = 0;
   virtual int set_alloc_hint(int fd, uint64_t hint) = 0;
index 33680a4ca6b79ad5ac36fc97dbbfbcb12722ee4f..3ed4e4b5d941bc7ef4aa939bc6c2a1505a886d27 100644 (file)
@@ -87,6 +87,27 @@ GenericFileStoreBackend::GenericFileStoreBackend(FileStore *fs):
     }
     ::close(fd);
   }
+  // journal rotational?
+  {
+    // NOTE: the below won't work on btrfs; we'll assume rotational.
+    string fn = get_journal_path();
+    int fd = ::open(fn.c_str(), O_RDONLY);
+    if (fd < 0) {
+      return;
+    }
+    char partition[PATH_MAX], devname[PATH_MAX];
+    int r = get_device_by_fd(fd, partition, devname, sizeof(devname));
+    if (r < 0) {
+      dout(1) << "unable to get journal device name for "
+              << get_journal_path() << ": " << cpp_strerror(r) << dendl;
+      m_journal_rotational = true;
+    } else {
+      m_journal_rotational = block_device_is_rotational(devname);
+      dout(20) << __func__ << " journal devname " << devname
+               << " journal rotational " << (int)m_journal_rotational << dendl;
+    }
+    ::close(fd);
+  }
 }
 
 int GenericFileStoreBackend::detect_features()
index 8478067fa8fb86b72e13621e0bc29a60be359be3..207c3d0d40c63445bc3f4957f7caa91c109e3897 100644 (file)
@@ -29,6 +29,7 @@ private:
   bool m_filestore_fsync_flushes_journal_data;
   bool m_filestore_splice;
   bool m_rotational = true;
+  bool m_journal_rotational = true;
 public:
   explicit GenericFileStoreBackend(FileStore *fs);
   ~GenericFileStoreBackend() override {}
@@ -42,6 +43,9 @@ public:
   bool is_rotational() override {
     return m_rotational;
   }
+  bool is_journal_rotational() override {
+    return m_journal_rotational;
+  }
   int list_checkpoints(list<string>& ls) override { return 0; }
   int create_checkpoint(const string& name, uint64_t *cid) override { return -EOPNOTSUPP; }
   int sync_checkpoint(uint64_t id) override { return -EOPNOTSUPP; }
index 3e721e9bc9843cd6eb5ecd6328477ef214c6a94e..58245ecb49de3eb2ad53bcef1a0c7e8e8d77a933 100644 (file)
@@ -2357,10 +2357,12 @@ float OSD::get_osd_recovery_sleep()
 {
   if (cct->_conf->osd_recovery_sleep)
     return cct->_conf->osd_recovery_sleep;
-  if (store_is_rotational)
-    return cct->_conf->osd_recovery_sleep_hdd;
-  else
+  if (!store_is_rotational && !journal_is_rotational)
     return cct->_conf->osd_recovery_sleep_ssd;
+  else if (store_is_rotational && !journal_is_rotational)
+    return cct->_conf->get_val<double>("osd_recovery_sleep_hybrid");
+  else
+    return cct->_conf->osd_recovery_sleep_hdd;
 }
 
 int OSD::init()
@@ -2379,6 +2381,7 @@ int OSD::init()
   dout(2) << "init " << dev_path
          << " (looks like " << (store_is_rotational ? "hdd" : "ssd") << ")"
          << dendl;
+  dout(2) << "journal " << journal_path << dendl;
   assert(store);  // call pre_init() first!
 
   store->set_cache_shards(get_num_op_shards());
@@ -2388,6 +2391,9 @@ int OSD::init()
     derr << "OSD:init: unable to mount object store" << dendl;
     return r;
   }
+  journal_is_rotational = store->is_journal_rotational();
+  dout(2) << "journal looks like " << (journal_is_rotational ? "hdd" : "ssd")
+          << dendl;
 
   enable_disable_fuse(false);
 
@@ -2515,6 +2521,9 @@ int OSD::init()
 
   clear_temp_objects();
 
+  // initialize osdmap references in sharded wq
+  op_shardedwq.prune_pg_waiters(osdmap, whoami);
+
   // load up pgs (as they previously existed)
   load_pgs();
 
@@ -2637,15 +2646,15 @@ int OSD::init()
 
   r = update_crush_device_class();
   if (r < 0) {
-    derr << __func__ <<" unable to update_crush_device_class: "
-         << cpp_strerror(r) << dendl;
+    derr << __func__ << " unable to update_crush_device_class: "
+        << cpp_strerror(r) << dendl;
     osd_lock.Lock();
     goto monout;
   }
 
   r = update_crush_location();
   if (r < 0) {
-    derr << __func__ <<" unable to update_crush_location: "
+    derr << __func__ << " unable to update_crush_location: "
          << cpp_strerror(r) << dendl;
     osd_lock.Lock();
     goto monout;
@@ -3526,6 +3535,7 @@ int OSD::update_crush_device_class()
   }
 
   if (device_class.empty()) {
+    dout(20) << __func__ << " no device class stored locally" << dendl;
     return 0;
   }
 
@@ -3535,11 +3545,12 @@ int OSD::update_crush_device_class()
     string("\"ids\": [\"") + stringify(whoami) + string("\"]}");
 
   r = mon_cmd_maybe_osd_create(cmd);
-  if (r == -EPERM) {
-    r = 0;
-  }
-
-  return r;
+  // the above cmd can fail for various reasons, e.g.:
+  //   (1) we are connecting to a pre-luminous monitor
+  //   (2) user manually specify a class other than
+  //       'ceph-disk prepare --crush-device-class'
+  // simply skip result-checking for now
+  return 0;
 }
 
 void OSD::write_superblock(ObjectStore::Transaction& t)
@@ -5882,6 +5893,7 @@ void OSD::_collect_metadata(map<string,string> *pm)
   // backend
   (*pm)["osd_objectstore"] = store->get_type();
   (*pm)["rotational"] = store_is_rotational ? "1" : "0";
+  (*pm)["journal_rotational"] = journal_is_rotational ? "1" : "0";
   (*pm)["default_device_class"] = store->get_default_device_class();
   store->collect_metadata(pm);
 
@@ -6404,6 +6416,9 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
     cmd_getval(cct, cmdmap, "value", val);
     osd_lock.Unlock();
     r = cct->_conf->set_val(key, val, true, &ss);
+    if (r == 0) {
+      cct->_conf->apply_changes(nullptr);
+    }
     osd_lock.Lock();
   }
   else if (prefix == "cluster_log") {
@@ -8955,17 +8970,19 @@ void OSD::handle_force_recovery(Message *m)
 {
   MOSDForceRecovery *msg = static_cast<MOSDForceRecovery*>(m);
   assert(msg->get_type() == MSG_OSD_FORCE_RECOVERY);
-  RWLock::RLocker l(pg_map_lock);
 
-  vector<PG*> local_pgs;
+  vector<PGRef> local_pgs;
   local_pgs.reserve(msg->forced_pgs.size());
 
-  for (auto& i : msg->forced_pgs) {
-    spg_t locpg;
-    if (osdmap->get_primary_shard(i, &locpg)) {
-      auto pg_map_entry = pg_map.find(locpg);
-      if (pg_map_entry != pg_map.end()) {
-       local_pgs.push_back(pg_map_entry->second);
+  {
+    RWLock::RLocker l(pg_map_lock);
+    for (auto& i : msg->forced_pgs) {
+      spg_t locpg;
+      if (osdmap->get_primary_shard(i, &locpg)) {
+       auto pg_map_entry = pg_map.find(locpg);
+       if (pg_map_entry != pg_map.end()) {
+         local_pgs.push_back(pg_map_entry->second);
+       }
       }
     }
   }
@@ -9231,14 +9248,12 @@ bool OSDService::_recover_now(uint64_t *available_pushes)
 }
 
 
-void OSDService::adjust_pg_priorities(vector<PG*> pgs, int newflags)
+void OSDService::adjust_pg_priorities(const vector<PGRef>& pgs, int newflags)
 {
   if (!pgs.size() || !(newflags & (OFR_BACKFILL | OFR_RECOVERY)))
     return;
   int newstate = 0;
 
-  Mutex::Locker l(recovery_lock);
-
   if (newflags & OFR_BACKFILL) {
     newstate = PG_STATE_FORCED_BACKFILL;
   } else if (newflags & OFR_RECOVERY) {
@@ -9259,17 +9274,21 @@ void OSDService::adjust_pg_priorities(vector<PG*> pgs, int newflags)
 
   if (newflags & OFR_CANCEL) {
     for (auto& i : pgs) {
-      i->change_recovery_force_mode(newstate, true);
+      i->lock();
+      i->_change_recovery_force_mode(newstate, true);
+      i->unlock();
     }
   } else {
     for (auto& i : pgs) {
       // make sure the PG is in correct state before forcing backfill or recovery, or
       // else we'll make PG keeping FORCE_* flag forever, requiring osds restart
       // or forcing somehow recovery/backfill.
+      i->lock();
       int pgstate = i->get_state();
       if ( ((newstate == PG_STATE_FORCED_RECOVERY) && (pgstate & (PG_STATE_DEGRADED | PG_STATE_RECOVERY_WAIT | PG_STATE_RECOVERING))) ||
            ((newstate == PG_STATE_FORCED_BACKFILL) && (pgstate & (PG_STATE_DEGRADED | PG_STATE_BACKFILL_WAIT | PG_STATE_BACKFILL))) )
-        i->change_recovery_force_mode(newstate, false);
+        i->_change_recovery_force_mode(newstate, false);
+      i->unlock();
     }
   }
 }
index 573235d8ca4a2c4cd27a385cde61ed208cae6791..a4244e5d76ca42a2e7124a7e53f0ea76ea462484 100644 (file)
@@ -962,7 +962,7 @@ public:
     _queue_for_recovery(make_pair(queued, pg), reserved_pushes);
   }
 
-  void adjust_pg_priorities(vector<PG*> pgs, int newflags);
+  void adjust_pg_priorities(const vector<PGRef>& pgs, int newflags);
 
   // osd map cache (past osd maps)
   Mutex map_cache_lock;
@@ -1222,6 +1222,7 @@ protected:
   std::string dev_path, journal_path;
 
   bool store_is_rotational = true;
+  bool journal_is_rotational = true;
 
   ZTracer::Endpoint trace_endpoint;
   void create_logger();
@@ -2335,14 +2336,15 @@ private:
       srand(time(NULL));
       unsigned which = rand() % (sizeof(index_lookup) / sizeof(index_lookup[0]));
       return index_lookup[which];
-    } else if (cct->_conf->osd_op_queue == "wpq") {
-      return io_queue::weightedpriority;
+    } else if (cct->_conf->osd_op_queue == "prioritized") {
+      return io_queue::prioritized;
     } else if (cct->_conf->osd_op_queue == "mclock_opclass") {
       return io_queue::mclock_opclass;
     } else if (cct->_conf->osd_op_queue == "mclock_client") {
       return io_queue::mclock_client;
     } else {
-      return io_queue::prioritized;
+      // default / catch-all is 'wpq'
+      return io_queue::weightedpriority;
     }
   }
 
@@ -2350,10 +2352,11 @@ private:
     if (cct->_conf->osd_op_queue_cut_off == "debug_random") {
       srand(time(NULL));
       return (rand() % 2 < 1) ? CEPH_MSG_PRIO_HIGH : CEPH_MSG_PRIO_LOW;
-    } else if (cct->_conf->osd_op_queue_cut_off == "low") {
-      return CEPH_MSG_PRIO_LOW;
-    } else {
+    } else if (cct->_conf->osd_op_queue_cut_off == "high") {
       return CEPH_MSG_PRIO_HIGH;
+    } else {
+      // default / catch-all is 'low'
+      return CEPH_MSG_PRIO_LOW;
     }
   }
 
index 57035c4c8ea8e704597a7afeb75a905b562e5e6c..718c69223ef3fba021eab2e3078c2841cc444f87 100644 (file)
@@ -3311,7 +3311,7 @@ int OSDMap::build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid,
 
   int poolbase = get_max_osd() ? get_max_osd() : 1;
 
-  int const default_replicated_rule = crush->get_osd_pool_default_crush_replicated_ruleset(cct);
+  const int default_replicated_rule = crush->get_osd_pool_default_crush_replicated_ruleset(cct);
   assert(default_replicated_rule >= 0);
 
   if (default_pool) {
index 37ae7d797831fc015084cd0fd9d7747f0bdcb8db..e0707efe2d581bf8ce1ca81ff5c3ab2391e8c01c 100644 (file)
@@ -1644,6 +1644,7 @@ void PG::activate(ObjectStore::Transaction& t,
       dout(10) << "activate peer osd." << peer << " " << pi << dendl;
 
       MOSDPGLog *m = 0;
+      assert(peer_missing.count(peer));
       pg_missing_t& pm = peer_missing[peer];
 
       bool needs_past_intervals = pi.dne();
@@ -2041,17 +2042,17 @@ void PG::mark_clean()
   kick_snap_trim();
 }
 
-void PG::change_recovery_force_mode(int new_mode, bool clear)
+void PG::_change_recovery_force_mode(int new_mode, bool clear)
 {
-  lock(true);
-  if (clear) {
-    state_clear(new_mode);
-  } else {
-    state_set(new_mode);
+  if (!deleting) {
+    // we can't and shouldn't do anything if the PG is being deleted locally
+    if (clear) {
+      state_clear(new_mode);
+    } else {
+      state_set(new_mode);
+    }
+    publish_stats_to_osd();
   }
-  publish_stats_to_osd();
-
-  unlock();
 }
 
 inline int PG::clamp_recovery_priority(int priority)
@@ -2229,6 +2230,10 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
     // in the future).
     info.set_last_backfill(hobject_t());
     child->info.set_last_backfill(hobject_t());
+    // restarting backfill implies that the missing set is empty,
+    // since it is only used for objects prior to last_backfill
+    pg_log.reset_backfill();
+    child->pg_log.reset_backfill();
   }
 
   child->info.stats = info.stats;
@@ -3310,6 +3315,17 @@ void PG::read_state(ObjectStore *store, bufferlist &bl)
 
   last_written_info = info;
 
+  // if we are upgrading from jewel, we need to force rebuild of
+  // missing set.  v9 was fastinfo, added v11.0.2-331-g1d5dc29a13
+  // (before kraken).  persisted missing set was circa
+  // v11.0.0-866-gb0e239da95 (a bit earlier, also before kraken).
+  // v8 was pre-jewel (per-pg meta object).
+  bool force_rebuild_missing = info_struct_v < 9;
+  if (force_rebuild_missing) {
+    dout(10) << __func__ << " detected upgrade from jewel, force_rebuild_missing"
+            << dendl;
+  }
+
   ostringstream oss;
   pg_log.read_log_and_missing(
     store,
@@ -3317,12 +3333,19 @@ void PG::read_state(ObjectStore *store, bufferlist &bl)
     info_struct_v < 8 ? coll_t::meta() : coll,
     ghobject_t(info_struct_v < 8 ? OSD::make_pg_log_oid(pg_id) : pgmeta_oid),
     info,
+    force_rebuild_missing,
     oss,
     cct->_conf->osd_ignore_stale_divergent_priors,
     cct->_conf->osd_debug_verify_missing_on_start);
   if (oss.tellp())
     osd->clog->error() << oss.rdbuf();
 
+  if (force_rebuild_missing) {
+    dout(10) << __func__ << " forced rebuild of missing got "
+            << pg_log.get_missing()
+            << dendl;
+  }
+
   // log any weirdness
   log_weirdness();
 }
@@ -4492,7 +4515,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
          const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
          stringstream oss;
          oss << info.pgid.pgid << " " << mode << " starts" << std::endl;
-         osd->clog->info(oss);
+         osd->clog->debug(oss);
        }
 
        scrubber.seed = -1;
@@ -4931,7 +4954,7 @@ void PG::scrub_finish()
     if (total_errors)
       osd->clog->error(oss);
     else
-      osd->clog->info(oss);
+      osd->clog->debug(oss);
   }
 
   // finish up
@@ -5069,6 +5092,7 @@ void PG::merge_new_log_entries(
     assert(peer_missing.count(peer));
     assert(peer_info.count(peer));
     pg_missing_t& pmissing(peer_missing[peer]);
+    dout(20) << __func__ << " peer_missing for " << peer << " = " << pmissing << dendl;
     pg_info_t& pinfo(peer_info[peer]);
     bool invalidate_stats = PGLog::append_log_entries_update_missing(
       pinfo.last_backfill,
@@ -8003,18 +8027,23 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx)
        ++i) {
     if (*i == pg->get_primary()) continue;
     const pg_info_t& pi = pg->peer_info[*i];
+    // reset this so to make sure the pg_missing_t is initialized and
+    // has the correct semantics even if we don't need to get a
+    // missing set from a shard. This way later additions due to
+    // lost+unfound delete work properly.
+    pg->peer_missing[*i].may_include_deletes = !pg->perform_deletes_during_peering();
 
     if (pi.is_empty())
       continue;                                // no pg data, nothing divergent
 
     if (pi.last_update < pg->pg_log.get_tail()) {
       ldout(pg->cct, 10) << " osd." << *i << " is not contiguous, will restart backfill" << dendl;
-      pg->peer_missing[*i];
+      pg->peer_missing[*i].clear();
       continue;
     }
     if (pi.last_backfill == hobject_t()) {
       ldout(pg->cct, 10) << " osd." << *i << " will fully backfill; can infer empty missing set" << dendl;
-      pg->peer_missing[*i];
+      pg->peer_missing[*i].clear();
       continue;
     }
 
@@ -8025,7 +8054,7 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx)
       // FIXME: we can do better here.  if last_update==last_complete we
       //        can infer the rest!
       ldout(pg->cct, 10) << " osd." << *i << " has no missing, identical log" << dendl;
-      pg->peer_missing[*i];
+      pg->peer_missing[*i].clear();
       continue;
     }
 
index 5ce78db003ae19d4e28575fb4db1ca4069c8b0af..80b75dc5eb604c82ad4e2de1682e74f2427b574e 100644 (file)
@@ -982,7 +982,7 @@ public:
   unsigned get_backfill_priority();
 
   void mark_clean();  ///< mark an active pg clean
-  void change_recovery_force_mode(int new_mode, bool clear);
+  void _change_recovery_force_mode(int new_mode, bool clear);
 
   /// return [start,end) bounds for required past_intervals
   static pair<epoch_t, epoch_t> get_required_past_interval_bounds(
index 19405de25be67aac67edb054f9a7c6d9ecf94f2e..c86bfb5bd1364325d09ca34a0c3b560b3b7f2879 100644 (file)
@@ -476,13 +476,13 @@ public:
     }
 
     void index(pg_log_dup_t& e) {
-      if (PGLOG_INDEXED_DUPS) {
+      if (indexed_data & PGLOG_INDEXED_DUPS) {
        dup_index[e.reqid] = &e;
       }
     }
 
     void unindex(const pg_log_dup_t& e) {
-      if (PGLOG_INDEXED_DUPS) {
+      if (indexed_data & PGLOG_INDEXED_DUPS) {
        auto i = dup_index.find(e.reqid);
        if (i != dup_index.end()) {
          dup_index.erase(i);
@@ -758,6 +758,7 @@ public:
           missing.get_items().at(
             missing.get_rmissing().begin()->second
             ).need) {
+      assert(log.complete_to != log.log.end());
       ++log.complete_to;
     }
     assert(log.complete_to != log.log.end());
@@ -840,23 +841,38 @@ protected:
     // strip out and ignore ERROR entries
     mempool::osd_pglog::list<pg_log_entry_t> entries;
     eversion_t last;
+    bool seen_non_error = false;
     for (list<pg_log_entry_t>::const_iterator i = orig_entries.begin();
         i != orig_entries.end();
         ++i) {
       // all entries are on hoid
       assert(i->soid == hoid);
-      if (i != orig_entries.begin() && i->prior_version != eversion_t()) {
+      // did not see error entries before this entry and this entry is not error
+      // then this entry is the first non error entry
+      bool first_non_error = ! seen_non_error && ! i->is_error();
+      if (! i->is_error() ) {
+        // see a non error entry now
+        seen_non_error = true;
+      }
+      
+      // No need to check the first entry since it prior_version is unavailable
+      // in the list
+      // No need to check if the prior_version is the minimal version
+      // No need to check the first non-error entry since the leading error
+      // entries are not its prior version
+      if (i != orig_entries.begin() && i->prior_version != eversion_t() &&
+          ! first_non_error) {
        // in increasing order of version
        assert(i->version > last);
        // prior_version correct (unless it is an ERROR entry)
        assert(i->prior_version == last || i->is_error());
       }
-      last = i->version;
       if (i->is_error()) {
        ldpp_dout(dpp, 20) << __func__ << ": ignoring " << *i << dendl;
       } else {
        ldpp_dout(dpp, 20) << __func__ << ": keeping " << *i << dendl;
        entries.push_back(*i);
+       last = i->version;
       }
     }
     if (entries.empty()) {
@@ -1222,13 +1238,14 @@ public:
     coll_t log_coll,
     ghobject_t log_oid,
     const pg_info_t &info,
+    bool force_rebuild_missing,
     ostringstream &oss,
     bool tolerate_divergent_missing_log,
     bool debug_verify_stored_missing = false
     ) {
     return read_log_and_missing(
       store, pg_coll, log_coll, log_oid, info,
-      log, missing, oss,
+      log, missing, force_rebuild_missing, oss,
       tolerate_divergent_missing_log,
       &clear_divergent_priors,
       this,
@@ -1245,6 +1262,7 @@ public:
     const pg_info_t &info,
     IndexedLog &log,
     missing_type &missing,
+    bool force_rebuild_missing,
     ostringstream &oss,
     bool tolerate_divergent_missing_log,
     bool *clear_divergent_priors = nullptr,
@@ -1266,7 +1284,6 @@ public:
     eversion_t on_disk_rollback_info_trimmed_to = eversion_t();
     ObjectMap::ObjectMapIterator p = store->get_omap_iterator(log_coll, log_oid);
     map<eversion_t, hobject_t> divergent_priors;
-    bool has_divergent_priors = false;
     missing.may_include_deletes = false;
     list<pg_log_entry_t> entries;
     list<pg_log_dup_t> dups;
@@ -1281,7 +1298,7 @@ public:
          ::decode(divergent_priors, bp);
          ldpp_dout(dpp, 20) << "read_log_and_missing " << divergent_priors.size()
                             << " divergent_priors" << dendl;
-         has_divergent_priors = true;
+         assert(force_rebuild_missing);
          debug_verify_stored_missing = false;
        } else if (p->key() == "can_rollback_to") {
          ::decode(on_disk_can_rollback_to, bp);
@@ -1328,7 +1345,7 @@ public:
       std::move(entries),
       std::move(dups));
 
-    if (has_divergent_priors || debug_verify_stored_missing) {
+    if (force_rebuild_missing || debug_verify_stored_missing) {
       // build missing
       if (debug_verify_stored_missing || info.last_complete < info.last_update) {
        ldpp_dout(dpp, 10)
@@ -1421,7 +1438,7 @@ public:
            }
          }
        } else {
-         assert(has_divergent_priors);
+         assert(force_rebuild_missing);
          for (map<eversion_t, hobject_t>::reverse_iterator i =
                 divergent_priors.rbegin();
               i != divergent_priors.rend();
@@ -1475,7 +1492,7 @@ public:
       }
     }
 
-    if (!has_divergent_priors) {
+    if (!force_rebuild_missing) {
       if (clear_divergent_priors)
        (*clear_divergent_priors) = false;
       missing.flush();
index b91cf4385a7b9f86dbd9064e06c57b7455a4e199..46bee0125e6a2c01f33d7bd106499a21679e5f2c 100644 (file)
@@ -1326,7 +1326,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
     if (!op->session) {
       _calc_target(&op->target, nullptr);
       OSDSession *s = NULL;
-      int const r = _get_session(op->target.osd, &s, sul);
+      const int r = _get_session(op->target.osd, &s, sul);
       assert(r == 0);
       assert(s != NULL);
       op->session = s;
@@ -4235,7 +4235,9 @@ void Objecter::_finish_pool_stat_op(PoolStatOp *op, int r)
   delete op;
 }
 
-void Objecter::get_fs_stats(ceph_statfs& result, Context *onfinish)
+void Objecter::get_fs_stats(ceph_statfs& result,
+                           boost::optional<int64_t> data_pool,
+                           Context *onfinish)
 {
   ldout(cct, 10) << "get_fs_stats" << dendl;
   unique_lock l(rwlock);
@@ -4243,6 +4245,7 @@ void Objecter::get_fs_stats(ceph_statfs& result, Context *onfinish)
   StatfsOp *op = new StatfsOp;
   op->tid = ++last_tid;
   op->stats = &result;
+  op->data_pool = data_pool;
   op->onfinish = onfinish;
   if (mon_timeout > timespan(0)) {
     op->ontimeout = timer.add_event(mon_timeout,
@@ -4265,6 +4268,7 @@ void Objecter::_fs_stats_submit(StatfsOp *op)
 
   ldout(cct, 10) << "fs_stats_submit" << op->tid << dendl;
   monc->send_mon_message(new MStatfs(monc->get_fsid(), op->tid,
+                                    op->data_pool,
                                     last_seen_pgmap_version));
   op->last_submit = ceph::mono_clock::now();
 
index edcee5b922d9f52eda5702d030a6326ac35cb80c..527022b5d2ac830318965a8723bef33111e4cc45 100644 (file)
@@ -1555,6 +1555,7 @@ public:
   struct StatfsOp {
     ceph_tid_t tid;
     struct ceph_statfs *stats;
+    boost::optional<int64_t> data_pool;
     Context *onfinish;
     uint64_t ontimeout;
 
@@ -2929,7 +2930,8 @@ private:
   void _fs_stats_submit(StatfsOp *op);
 public:
   void handle_fs_stats_reply(MStatfsReply *m);
-  void get_fs_stats(struct ceph_statfs& result, Context *onfinish);
+  void get_fs_stats(struct ceph_statfs& result, boost::optional<int64_t> poolid,
+                   Context *onfinish);
   int statfs_op_cancel(ceph_tid_t tid, int r);
   void _finish_statfs_op(StatfsOp *op, int r);
 
index 70cc7f8efe727eb19bbf2128e1abdfeb75fec29c..59dc312d9fa11ec7bfa9dd64f94018345316f4ea 100644 (file)
                         <ul>
                             <li rv-each-check="health.checks">
                                 <span rv-style="check.severity | health_color">{check.type}</span>:
-                                {check.message}
+                                {check.summary.message}
                             </li>
                         </ul>
                     </div>
index fe68c16321e7ed4c627e4992f53634caa9ac8faf..113180191dc9b4a02cbd99cd396eb21ef2d9f6cc 100644 (file)
@@ -278,8 +278,8 @@ cdef extern from "rados/librados.h" nogil:
     void rados_write_op_truncate(rados_write_op_t write_op, uint64_t offset)
     void rados_write_op_zero(rados_write_op_t write_op, uint64_t offset, uint64_t len)
 
-    void rados_read_op_omap_get_vals(rados_read_op_t read_op, const char * start_after, const char * filter_prefix, uint64_t max_return, rados_omap_iter_t * iter, int * prval)
-    void rados_read_op_omap_get_keys(rados_read_op_t read_op, const char * start_after, uint64_t max_return, rados_omap_iter_t * iter, int * prval)
+    void rados_read_op_omap_get_vals2(rados_read_op_t read_op, const char * start_after, const char * filter_prefix, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval)
+    void rados_read_op_omap_get_keys2(rados_read_op_t read_op, const char * start_after, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval)
     void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, const char * const* keys, size_t keys_len, rados_omap_iter_t * iter, int * prval)
     int rados_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, const char * oid, int flags)
     int rados_aio_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, int flags)
@@ -3341,8 +3341,8 @@ returned %d, but should return zero on success." % (self.name, ret))
             int prval = 0
 
         with nogil:
-            rados_read_op_omap_get_vals(_read_op.read_op, _start_after, _filter_prefix,
-                                        _max_return, &iter_addr,  &prval)
+            rados_read_op_omap_get_vals2(_read_op.read_op, _start_after, _filter_prefix,
+                                         _max_return, &iter_addr, NULL, &prval)
         it = OmapIterator(self)
         it.ctx = iter_addr
         return it, int(prval)
@@ -3368,8 +3368,8 @@ returned %d, but should return zero on success." % (self.name, ret))
             int prval = 0
 
         with nogil:
-            rados_read_op_omap_get_keys(_read_op.read_op, _start_after,
-                                        _max_return, &iter_addr,  &prval)
+            rados_read_op_omap_get_keys2(_read_op.read_op, _start_after,
+                                         _max_return, &iter_addr, NULL, &prval)
         it = OmapIterator(self)
         it.ctx = iter_addr
         return it, int(prval)
index 618992b776a1d876708f4c166cd98d45efb1def2..efeda706cc8a5beac236bcdc1ddc662e09d0cad6 100644 (file)
@@ -1,5 +1,5 @@
 add_executable(rbd-fuse
   rbd-fuse.cc)
 target_link_libraries(rbd-fuse
-  ceph-common librbd librados ${FUSE_LIBRARIES})
+  librbd librados ${FUSE_LIBRARIES})
 install(TARGETS rbd-fuse DESTINATION bin)
index c55f513f3c46a10140f538d2a7cd8f5f8a280984..6295097ca7fce5a868943137b012886067960e9c 100644 (file)
@@ -19,6 +19,7 @@
 #include <getopt.h>
 #include <assert.h>
 #include <string>
+#include <mutex>
 
 #if defined(__FreeBSD__)
 #include <sys/param.h>
@@ -26,7 +27,6 @@
 
 #include "include/compat.h"
 #include "include/rbd/librbd.h"
-#include "common/Mutex.h"
 
 static int gotrados = 0;
 char *pool_name;
@@ -34,7 +34,7 @@ char *mount_image_name;
 rados_t cluster;
 rados_ioctx_t ioctx;
 
-Mutex readdir_lock("read_dir");
+std::mutex readdir_lock;
 
 struct rbd_stat {
        u_char valid;
@@ -213,11 +213,11 @@ iter_images(void *cookie,
 {
        struct rbd_image *im;
 
-       readdir_lock.Lock();
-       
+       readdir_lock.lock();
+
        for (im = rbd_image_data.images; im != NULL; im = im->next)
                iter(cookie, im->image_name);
-       readdir_lock.Unlock();
+       readdir_lock.unlock();
 }
 
 static void count_images_cb(void *cookie, const char *image)
@@ -229,9 +229,9 @@ static int count_images(void)
 {
        unsigned int count = 0;
 
-       readdir_lock.Lock();
+       readdir_lock.lock();
        enumerate_images(&rbd_image_data);
-       readdir_lock.Unlock();
+       readdir_lock.unlock();
 
        iter_images(&count, count_images_cb);
        return count;
@@ -270,9 +270,9 @@ static int rbdfs_getattr(const char *path, struct stat *stbuf)
        }
 
        if (!in_opendir) {
-               readdir_lock.Lock();
+               readdir_lock.lock();
                enumerate_images(&rbd_image_data);
-               readdir_lock.Unlock();
+               readdir_lock.unlock();
        }
        fd = open_rbd_image(path + 1);
        if (fd < 0)
@@ -304,9 +304,9 @@ static int rbdfs_open(const char *path, struct fuse_file_info *fi)
        if (path[0] == 0)
                return -ENOENT;
 
-       readdir_lock.Lock();
+       readdir_lock.lock();
        enumerate_images(&rbd_image_data);
-       readdir_lock.Unlock();
+       readdir_lock.unlock();
        fd = open_rbd_image(path + 1);
        if (fd < 0)
                return -ENOENT;
@@ -402,9 +402,9 @@ static int rbdfs_statfs(const char *path, struct statvfs *buf)
 
        num[0] = 1;
        num[1] = 0;
-       readdir_lock.Lock();
+       readdir_lock.lock();
        enumerate_images(&rbd_image_data);
-       readdir_lock.Unlock();
+       readdir_lock.unlock();
        iter_images(num, rbdfs_statfs_image_cb);
 
 #define        RBDFS_BSIZE     4096
@@ -435,10 +435,10 @@ static int rbdfs_fsync(const char *path, int datasync,
 static int rbdfs_opendir(const char *path, struct fuse_file_info *fi)
 {
        // only one directory, so global "in_opendir" flag should be fine
-       readdir_lock.Lock();
+       readdir_lock.lock();
        in_opendir++;
        enumerate_images(&rbd_image_data);
-       readdir_lock.Unlock();
+       readdir_lock.unlock();
        return 0;
 }
 
@@ -476,9 +476,9 @@ static int rbdfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
 static int rbdfs_releasedir(const char *path, struct fuse_file_info *fi)
 {
        // see opendir comments
-       readdir_lock.Lock();
+       readdir_lock.lock();
        in_opendir--;
-       readdir_lock.Unlock();
+       readdir_lock.unlock();
        return 0;
 }
 
index c5017c7654e9aebba382853d3b41653de9d7c4a3..fc3a33a0fdfd3279980fdb7633bcc2a4a16d7a13 100644 (file)
@@ -110,6 +110,7 @@ set(rgw_a_srcs
   rgw_rest_usage.cc
   rgw_rest_user.cc
   rgw_role.cc
+  rgw_string.cc
   rgw_swift_auth.cc
   rgw_tag.cc
   rgw_tag_s3.cc
index 9bb0b766aa29ebafb18257db3de95fb7dcf65f07..fb820e6384d06c044271d3dc131af8f5ae9e1657 100644 (file)
@@ -53,7 +53,7 @@ using namespace std;
 
 static RGWRados *store = NULL;
 
-void _usage()
+void usage()
 {
   cout << "usage: radosgw-admin <cmd> [options...]" << std::endl;
   cout << "commands:\n";
@@ -327,12 +327,6 @@ void _usage()
   generic_client_usage();
 }
 
-int usage()
-{
-  _usage();
-  return 1;
-}
-
 enum {
   OPT_NO_CMD = 0,
   OPT_USER_CREATE,
@@ -2215,7 +2209,6 @@ static void sync_status(Formatter *formatter)
 
   for (auto iter : store->zone_conn_map) {
     const string& source_id = iter.first;
-    string zone_name;
     string source_str = "source: ";
     string s = source_str + source_id;
     auto siter = store->zone_by_id.find(source_id);
@@ -2478,7 +2471,7 @@ int main(int argc, const char **argv)
       break;
     } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
       usage();
-      return 0;
+      assert(false);
     } else if (ceph_argparse_witharg(args, i, &val, "-i", "--uid", (char*)NULL)) {
       user_id.from_str(val);
     } else if (ceph_argparse_witharg(args, i, &val, "--tenant", (char*)NULL)) {
@@ -2519,7 +2512,8 @@ int main(int argc, const char **argv)
         key_type = KEY_TYPE_S3;
       } else {
         cerr << "bad key type: " << key_type_str << std::endl;
-        return usage();
+        usage();
+       assert(false);
       }
     } else if (ceph_argparse_witharg(args, i, &val, "--job-id", (char*)NULL)) {
       job_id = val;
@@ -2632,7 +2626,8 @@ int main(int argc, const char **argv)
       bucket_id = val;
       if (bucket_id.empty()) {
         cerr << "bad bucket-id" << std::endl;
-        return usage();
+        usage();
+       assert(false);
       }
     } else if (ceph_argparse_witharg(args, i, &val, "--format", (char*)NULL)) {
       format = val;
@@ -2806,7 +2801,8 @@ int main(int argc, const char **argv)
   }
 
   if (args.empty()) {
-    return usage();
+    usage();
+    assert(false);
   }
   else {
     const char *prev_cmd = NULL;
@@ -2816,7 +2812,8 @@ int main(int argc, const char **argv)
       opt_cmd = get_cmd(*i, prev_cmd, prev_prev_cmd, &need_more);
       if (opt_cmd < 0) {
        cerr << "unrecognized arg " << *i << std::endl;
-       return usage();
+       usage();
+       assert(false);
       }
       if (!need_more) {
        ++i;
@@ -2826,8 +2823,10 @@ int main(int argc, const char **argv)
       prev_cmd = *i;
     }
 
-    if (opt_cmd == OPT_NO_CMD)
-      return usage();
+    if (opt_cmd == OPT_NO_CMD) {
+      usage();
+      assert(false);
+    }
 
     /* some commands may have an optional extra param */
     if (i != args.end()) {
@@ -2883,7 +2882,8 @@ int main(int argc, const char **argv)
     formatter = new JSONFormatter(pretty_format);
   else {
     cerr << "unrecognized format: " << format << std::endl;
-    return usage();
+    usage();
+    assert(false);
   }
 
   realm_name = g_conf->rgw_realm;
@@ -4436,6 +4436,9 @@ int main(int argc, const char **argv)
     ret = user.add(user_op, &err_msg);
     if (ret < 0) {
       cerr << "could not create user: " << err_msg << std::endl;
+      if (ret == -ERR_INVALID_TENANT_NAME)
+       ret = -EINVAL;
+
       return -ret;
     }
     if (!subuser.empty()) {
@@ -5013,7 +5016,8 @@ int main(int argc, const char **argv)
   if (opt_cmd == OPT_LOG_SHOW || opt_cmd == OPT_LOG_RM) {
     if (object.empty() && (date.empty() || bucket_name.empty() || bucket_id.empty())) {
       cerr << "specify an object or a date, bucket and bucket-id" << std::endl;
-      return usage();
+      usage();
+      assert(false);
     }
 
     string oid;
@@ -5111,7 +5115,8 @@ next:
   if (opt_cmd == OPT_POOL_ADD) {
     if (pool_name.empty()) {
       cerr << "need to specify pool to add!" << std::endl;
-      return usage();
+      usage();
+      assert(false);
     }
 
     int ret = store->add_bucket_placement(pool);
@@ -5122,7 +5127,8 @@ next:
   if (opt_cmd == OPT_POOL_RM) {
     if (pool_name.empty()) {
       cerr << "need to specify pool to remove!" << std::endl;
-      return usage();
+      usage();
+      assert(false);
     }
 
     int ret = store->remove_bucket_placement(pool);
index a8c0ec86ad5a04d5b200b2acabb86fe335f2230b..65a8b034f97932b121b68709eebefe90ac067d5e 100644 (file)
@@ -528,9 +528,9 @@ rgw::auth::AnonymousEngine::authenticate(const req_state* const s) const
     RGWUserInfo user_info;
     rgw_get_anon_user(user_info);
 
-    // FIXME: over 80 columns
-    auto apl = apl_factory->create_apl_local(cct, s, user_info,
-                                             rgw::auth::LocalApplier::NO_SUBUSER);
+    auto apl = \
+      apl_factory->create_apl_local(cct, s, user_info,
+                                    rgw::auth::LocalApplier::NO_SUBUSER);
     return result_t::grant(std::move(apl));
   }
 }
index 2b918f4fc3d208503809c9d091ffd439b03945d3..08a93c73dac2f4c9bb52ad87a2b6135c209ab85d 100644 (file)
@@ -21,14 +21,16 @@ namespace auth {
 /* A class aggregating the knowledge about all Strategies in RadosGW. It is
  * responsible for handling the dynamic reconfiguration on e.g. realm update. */
 class StrategyRegistry {
-  template <class AbstractorT>
-  using s3_strategy_t = rgw::auth::s3::AWSAuthStrategy<AbstractorT>;
+  template <class AbstractorT,
+            bool AllowAnonAccessT = false>
+  using s3_strategy_t = \
+    rgw::auth::s3::AWSAuthStrategy<AbstractorT, AllowAnonAccessT>;
 
   struct s3_main_strategy_t : public Strategy {
     using s3_main_strategy_plain_t = \
-      s3_strategy_t<rgw::auth::s3::AWSGeneralAbstractor>;
+      s3_strategy_t<rgw::auth::s3::AWSGeneralAbstractor, true>;
     using s3_main_strategy_boto2_t = \
-      s3_strategy_t<rgw::auth::s3::AWSGeneralBoto2Abstractor>;
+      s3_strategy_t<rgw::auth::s3::AWSGeneralBoto2Abstractor, true>;
 
     s3_main_strategy_plain_t s3_main_strategy_plain;
     s3_main_strategy_boto2_t s3_main_strategy_boto2;
index d82fd7b02207c93a09c09b5df39e3705689f4e02..9369864259f045a00465cc37f6dee059c24c5fd4 100644 (file)
@@ -80,7 +80,8 @@ public:
 };
 
 
-template <class AbstractorT>
+template <class AbstractorT,
+          bool AllowAnonAccessT = false>
 class AWSAuthStrategy : public rgw::auth::Strategy,
                         public rgw::auth::LocalApplier::Factory {
   typedef rgw::auth::IdentityApplier::aplptr_t aplptr_t;
@@ -92,6 +93,7 @@ class AWSAuthStrategy : public rgw::auth::Strategy,
   RGWRados* const store;
   AbstractorT ver_abstractor;
 
+  S3AnonymousEngine anonymous_engine;
   ExternalAuthStrategy external_engines;
   LocalEngine local_engine;
 
@@ -110,10 +112,17 @@ public:
                   RGWRados* const store)
     : store(store),
       ver_abstractor(cct),
+      anonymous_engine(cct,
+                       static_cast<rgw::auth::LocalApplier::Factory*>(this)),
       external_engines(cct, store, &ver_abstractor),
       local_engine(cct, store, ver_abstractor,
                    static_cast<rgw::auth::LocalApplier::Factory*>(this)) {
+    /* The anynoymous auth. */
+    if (AllowAnonAccessT) {
+      add_engine(Control::SUFFICIENT, anonymous_engine);
+    }
 
+    /* The external auth. */
     Control local_engine_mode;
     if (! external_engines.is_empty()) {
       add_engine(Control::SUFFICIENT, external_engines);
@@ -123,6 +132,7 @@ public:
       local_engine_mode = Control::SUFFICIENT;
     }
 
+    /* The local auth. */
     if (cct->_conf->rgw_s3_auth_use_rados) {
       add_engine(local_engine_mode, local_engine);
     }
index 4648d94aee18b67e46c43a1574ebafff0dd7d33f..41b7d12315066a5ffd7188bd4a9e275e6806069c 100644 (file)
@@ -422,16 +422,9 @@ int rgw_bucket_set_attrs(RGWRados *store, RGWBucketInfo& bucket_info,
 static void dump_mulipart_index_results(list<rgw_obj_index_key>& objs_to_unlink,
         Formatter *f)
 {
-  // make sure that an appropiately titled header has been opened previously
-  auto oiter = objs_to_unlink.begin();
-
-  f->open_array_section("invalid_multipart_entries");
-
-  for ( ; oiter != objs_to_unlink.end(); ++oiter) {
-    f->dump_string("object",  oiter->name);
+  for (const auto& o : objs_to_unlink) {
+    f->dump_string("object",  o.name);
   }
-
-  f->close_section();
 }
 
 void check_bad_user_bucket_mapping(RGWRados *store, const rgw_user& user_id,
@@ -1005,12 +998,12 @@ static void dump_index_check(map<RGWObjCategory, RGWStorageStats> existing_stats
 }
 
 int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state,
-        list<rgw_obj_index_key>& objs_to_unlink, std::string *err_msg)
+               RGWFormatterFlusher& flusher ,std::string *err_msg)
 {
   bool fix_index = op_state.will_fix_index();
   rgw_bucket bucket = op_state.get_bucket();
 
-  int max = 1000;
+  size_t max = 1000;
 
   map<string, bool> common_prefixes;
 
@@ -1067,16 +1060,33 @@ int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state,
 
   } while (is_truncated);
 
+  list<rgw_obj_index_key> objs_to_unlink;
+  Formatter *f =  flusher.get_formatter();
+
+  f->open_array_section("invalid_multipart_entries");
+
   for (auto aiter = all_objs.begin(); aiter != all_objs.end(); ++aiter) {
     string& name = aiter->second;
 
     if (meta_objs.find(name) == meta_objs.end()) {
       objs_to_unlink.push_back(aiter->first);
     }
-  }
 
-  if (objs_to_unlink.empty())
-    return 0;
+    if (objs_to_unlink.size() > max) {
+      if (fix_index) {
+       int r = store->remove_objs_from_index(bucket_info, objs_to_unlink);
+       if (r < 0) {
+         set_err_msg(err_msg, "ERROR: remove_obj_from_index() returned error: " +
+                     cpp_strerror(-r));
+         return r;
+       }
+      }
+
+      dump_mulipart_index_results(objs_to_unlink, flusher.get_formatter());
+      flusher.flush();
+      objs_to_unlink.clear();
+    }
+  }
 
   if (fix_index) {
     int r = store->remove_objs_from_index(bucket_info, objs_to_unlink);
@@ -1088,6 +1098,10 @@ int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState& op_state,
     }
   }
 
+  dump_mulipart_index_results(objs_to_unlink, f);
+  f->close_section();
+  flusher.flush();
+
   return 0;
 }
 
@@ -1309,10 +1323,9 @@ int RGWBucketAdminOp::check_index(RGWRados *store, RGWBucketAdminOpState& op_sta
                   RGWFormatterFlusher& flusher)
 {
   int ret;
-  map<string, rgw_bucket_dir_entry> result;
   map<RGWObjCategory, RGWStorageStats> existing_stats;
   map<RGWObjCategory, RGWStorageStats> calculated_stats;
-  list<rgw_obj_index_key> objs_to_unlink;
+
 
   RGWBucket bucket;
 
@@ -1323,13 +1336,10 @@ int RGWBucketAdminOp::check_index(RGWRados *store, RGWBucketAdminOpState& op_sta
   Formatter *formatter = flusher.get_formatter();
   flusher.start(0);
 
-  ret = bucket.check_bad_index_multipart(op_state, objs_to_unlink);
+  ret = bucket.check_bad_index_multipart(op_state, flusher);
   if (ret < 0)
     return ret;
 
-  dump_mulipart_index_results(objs_to_unlink, formatter);
-  flusher.flush();
-
   ret = bucket.check_object_index(op_state, flusher);
   if (ret < 0)
     return ret;
index b4c32b2c570e26f2e7e315ce7c8637d7b030e1be..6cc20eb8f6149107e5b6900fc1b85f2c3accb978 100644 (file)
@@ -274,7 +274,7 @@ public:
   int init(RGWRados *storage, RGWBucketAdminOpState& op_state);
 
   int check_bad_index_multipart(RGWBucketAdminOpState& op_state,
-          list<rgw_obj_index_key>& objs_to_unlink, std::string *err_msg = NULL);
+              RGWFormatterFlusher& flusher, std::string *err_msg = NULL);
 
   int check_object_index(RGWBucketAdminOpState& op_state,
                          RGWFormatterFlusher& flusher,
index caad672c120c6857e837e1d538ba683e7aa1b8a4..02b807efc1e4281da9362dff523ce39737032694 100644 (file)
@@ -999,6 +999,18 @@ const string& RGWHTTPArgs::get(const string& name, bool *exists) const
   return empty_str;
 }
 
+boost::optional<const std::string&>
+RGWHTTPArgs::get_optional(const std::string& name) const
+{
+  bool exists;
+  const std::string& value = get(name, &exists);
+  if (exists) {
+    return value;
+  } else {
+    return boost::none;
+  }
+}
+
 int RGWHTTPArgs::get_bool(const string& name, bool *val, bool *exists)
 {
   map<string, string>::iterator iter;
@@ -1849,77 +1861,28 @@ int rgw_parse_op_type_list(const string& str, uint32_t *perm)
   return parse_list_of_flags(op_type_mapping, str, perm);
 }
 
-static int match_internal(boost::string_ref pattern, boost::string_ref input, int (*function)(const char&, const char&))
-{
-  boost::string_ref::iterator it1 = pattern.begin();
-  boost::string_ref::iterator it2 = input.begin();
-  while(true) {
-    if (it1 == pattern.end() && it2 == input.end())
-        return 1;
-    if (it1 == pattern.end() || it2 == input.end())
-        return 0;
-    if (*it1 == '*' && (it1 + 1) == pattern.end() && it2 != input.end())
-      return 1;
-    if (*it1 == '*' && (it1 + 1) == pattern.end() && it2 == input.end())
-      return 0;
-    if (function(*it1, *it2) || *it1 == '?') {
-      ++it1;
-      ++it2;
-      continue;
-    }
-    if (*it1 == '*') {
-      if (function(*(it1 + 1), *it2))
-        ++it1;
-      else
-        ++it2;
-      continue;
-    }
-    return 0;
-  }
-  return 0;
-}
-
-static int matchcase(const char& c1, const char& c2)
-{
-  if (c1 == c2)
-      return 1;
-  return 0;
-}
-
-static int matchignorecase(const char& c1, const char& c2)
-{
-  if (tolower(c1) == tolower(c2))
-      return 1;
-  return 0;
-}
-
-int match(const string& pattern, const string& input, uint32_t flag)
+bool match_policy(boost::string_view pattern, boost::string_view input,
+                  uint32_t flag)
 {
-  auto last_pos_input = 0, last_pos_pattern = 0;
+  const uint32_t flag2 = flag & (MATCH_POLICY_ACTION|MATCH_POLICY_ARN) ?
+      MATCH_CASE_INSENSITIVE : 0;
 
-  while(true) {
+  const auto npos = boost::string_view::npos;
+  boost::string_view::size_type last_pos_input = 0, last_pos_pattern = 0;
+  while (true) {
     auto cur_pos_input = input.find(":", last_pos_input);
     auto cur_pos_pattern = pattern.find(":", last_pos_pattern);
 
-    string substr_input = input.substr(last_pos_input, cur_pos_input);
-    string substr_pattern = pattern.substr(last_pos_pattern, cur_pos_pattern);
+    auto substr_input = input.substr(last_pos_input, cur_pos_input);
+    auto substr_pattern = pattern.substr(last_pos_pattern, cur_pos_pattern);
 
-    int res;
-    if (substr_pattern == "*") {
-      res = 1;
-    } else if (flag & MATCH_POLICY_ACTION || flag & MATCH_POLICY_ARN) {
-      res = match_internal(substr_pattern, substr_input, &matchignorecase);
-    } else {
-      res = match_internal(substr_pattern, substr_input, &matchcase);
-    }
-    if (res == 0)
-      return 0;
-
-    if (cur_pos_pattern == string::npos && cur_pos_input == string::npos)
-      return 1;
-    else if ((cur_pos_pattern == string::npos && cur_pos_input != string::npos) ||
-            (cur_pos_pattern != string::npos && cur_pos_input == string::npos))
-      return 0;
+    if (!match_wildcards(substr_pattern, substr_input, flag2))
+      return false;
+
+    if (cur_pos_pattern == npos)
+      return cur_pos_input == npos;
+    if (cur_pos_input == npos)
+      return false;
 
     last_pos_pattern = cur_pos_pattern + 1;
     last_pos_input = cur_pos_input + 1;
index 6129d24e86b6e58b4cbc7795fde9ee346ed2c9ea..4fb476859e70b3baec11ce99fc87358247e63882 100644 (file)
@@ -337,6 +337,8 @@ class RGWHTTPArgs
   void append(const string& name, const string& val);
   /** Get the value for a specific argument parameter */
   const string& get(const string& name, bool *exists = NULL) const;
+  boost::optional<const std::string&>
+  get_optional(const std::string& name) const;
   int get_bool(const string& name, bool *val, bool *exists);
   int get_bool(const char *name, bool *val, bool *exists);
   void get_bool(const char *name, bool *val, bool def_val);
@@ -484,6 +486,9 @@ enum RGWOpType {
   RGW_OP_PUT_OBJ_TAGGING,
   RGW_OP_GET_OBJ_TAGGING,
   RGW_OP_DELETE_OBJ_TAGGING,
+  RGW_OP_PUT_LC,
+  RGW_OP_GET_LC,
+  RGW_OP_DELETE_LC,
   /* rgw specific */
   RGW_OP_ADMIN_SET_METADATA,
   RGW_OP_GET_OBJ_LAYOUT,
@@ -1630,7 +1635,6 @@ struct rgw_obj_key {
    * part of the given namespace, it returns false.
    */
   static bool oid_to_key_in_ns(const string& oid, rgw_obj_key *key, const string& ns) {
-    string obj_ns;
     bool ret = parse_raw_oid(oid, key);
     if (!ret) {
       return ret;
@@ -2307,12 +2311,12 @@ extern std::string calc_hash_sha256_restart_stream(ceph::crypto::SHA256** phash)
 
 extern int rgw_parse_op_type_list(const string& str, uint32_t *perm);
 
-namespace {
-  constexpr uint32_t MATCH_POLICY_ACTION = 0x01;
-  constexpr uint32_t MATCH_POLICY_RESOURCE = 0x02;
-  constexpr uint32_t MATCH_POLICY_ARN = 0x04;
-  constexpr uint32_t MATCH_POLICY_STRING = 0x08;
-}
+static constexpr uint32_t MATCH_POLICY_ACTION = 0x01;
+static constexpr uint32_t MATCH_POLICY_RESOURCE = 0x02;
+static constexpr uint32_t MATCH_POLICY_ARN = 0x04;
+static constexpr uint32_t MATCH_POLICY_STRING = 0x08;
+
+extern bool match_policy(boost::string_view pattern, boost::string_view input,
+                         uint32_t flag);
 
-int match(const std::string& pattern, const std::string& input, uint32_t flag);
 #endif
index dabc0be97a8ca2d6a2e0ff90c9857700b6516647..88abd3b833d687c782b5f3c45073842d91f12b17 100644 (file)
@@ -1022,21 +1022,27 @@ int rgw_s3_prepare_encrypt(struct req_state* s,
         get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM);
     if (! req_sse_ca.empty()) {
       if (req_sse_ca != "AES256") {
+        ldout(s->cct, 5) << "ERROR: Invalid value for header "
+                         << "x-amz-server-side-encryption-customer-algorithm"
+                         << dendl;
         return -ERR_INVALID_REQUEST;
       }
       if (s->cct->_conf->rgw_crypt_require_ssl &&
           !s->info.env->exists("SERVER_PORT_SECURE")) {
+        ldout(s->cct, 5) << "ERROR: Insecure request, rgw_crypt_require_ssl is set" << dendl;
         return -ERR_INVALID_REQUEST;
       }
       std::string key_bin = from_base64(
           get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY) );
       if (key_bin.size() != AES_256_CBC::AES_256_KEYSIZE) {
+        ldout(s->cct, 5) << "ERROR: invalid encryption key size" << dendl;
         return -ERR_INVALID_REQUEST;
       }
       boost::string_view keymd5 =
           get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5);
       std::string keymd5_bin = from_base64(keymd5);
       if (keymd5_bin.size() != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+        ldout(s->cct, 5) << "ERROR: Invalid key md5 size" << dendl;
         return -ERR_INVALID_DIGEST;
       }
       MD5 key_hash;
@@ -1045,6 +1051,7 @@ int rgw_s3_prepare_encrypt(struct req_state* s,
       key_hash.Final(key_hash_res);
 
       if (memcmp(key_hash_res, keymd5_bin.c_str(), CEPH_CRYPTO_MD5_DIGESTSIZE) != 0) {
+        ldout(s->cct, 5) << "ERROR: Invalid key md5 hash" << dendl;
         return -ERR_INVALID_DIGEST;
       }
 
@@ -1066,10 +1073,13 @@ int rgw_s3_prepare_encrypt(struct req_state* s,
         get_crypt_attribute(s->info.env, parts, X_AMZ_SERVER_SIDE_ENCRYPTION);
     if (! req_sse.empty()) {
       if (req_sse != "aws:kms") {
+        ldout(s->cct, 5) << "ERROR: Invalid value for header x-amz-server-side-encryption"
+                         << dendl;
         return -ERR_INVALID_REQUEST;
       }
       if (s->cct->_conf->rgw_crypt_require_ssl &&
           !s->info.env->exists("SERVER_PORT_SECURE")) {
+        ldout(s->cct, 5) << "ERROR: insecure request, rgw_crypt_require_ssl is set" << dendl;
         return -ERR_INVALID_REQUEST;
       }
       boost::string_view key_id =
@@ -1148,18 +1158,23 @@ int rgw_s3_prepare_decrypt(struct req_state* s,
   if (stored_mode == "SSE-C-AES256") {
     if (s->cct->_conf->rgw_crypt_require_ssl &&
         !s->info.env->exists("SERVER_PORT_SECURE")) {
+      ldout(s->cct, 5) << "ERROR: Insecure request, rgw_crypt_require_ssl is set" << dendl;
       return -ERR_INVALID_REQUEST;
     }
     const char *req_cust_alg =
         s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM", NULL);
 
     if ((nullptr == req_cust_alg) || (strcmp(req_cust_alg, "AES256") != 0)) {
+      ldout(s->cct, 5) << "ERROR: Invalid value for header "
+                       << "x-amz-server-side-encryption-customer-algorithm"
+                       << dendl;
       return -ERR_INVALID_REQUEST;
     }
 
     std::string key_bin =
         from_base64(s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY", ""));
     if (key_bin.size() != AES_256_CBC::AES_256_KEYSIZE) {
+      ldout(s->cct, 5) << "ERROR: Invalid encryption key size" << dendl;
       return -ERR_INVALID_REQUEST;
     }
 
@@ -1167,6 +1182,7 @@ int rgw_s3_prepare_decrypt(struct req_state* s,
         s->info.env->get("HTTP_X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5", "");
     std::string keymd5_bin = from_base64(keymd5);
     if (keymd5_bin.size() != CEPH_CRYPTO_MD5_DIGESTSIZE) {
+      ldout(s->cct, 5) << "ERROR: Invalid key md5 size " << dendl;
       return -ERR_INVALID_DIGEST;
     }
 
@@ -1191,6 +1207,7 @@ int rgw_s3_prepare_decrypt(struct req_state* s,
   if (stored_mode == "SSE-KMS") {
     if (s->cct->_conf->rgw_crypt_require_ssl &&
         !s->info.env->exists("SERVER_PORT_SECURE")) {
+      ldout(s->cct, 5) << "ERROR: Insecure request, rgw_crypt_require_ssl is set" << dendl;
       return -ERR_INVALID_REQUEST;
     }
     /* try to retrieve actual key */
index 78b2865ed3327d8186f4c4ac9829f50e9cc6a7dc..43145835b5831161f4592b0e3bf54a80f92e0978 100644 (file)
@@ -22,8 +22,6 @@ int main(int argc, char *argv[])
 
   common_init_finish(g_ceph_context);
 
-  list<string> infix;
-
   string expr;
 
   if (argc > 1) {
index e79468e79cb5f94d4ed8c4655d632cd3412d656b..28b97d04d5c3ee2918abd50d3c1243f83b6176a7 100644 (file)
@@ -11,6 +11,7 @@
 #include <iostream>
 #include "rapidjson/reader.h"
 
+#include "common/backport14.h"
 #include "rgw_auth.h"
 #include <arpa/inet.h>
 #include "rgw_iam_policy.h"
@@ -389,15 +390,15 @@ bool ARN::match(const ARN& candidate) const {
     return false;
   }
 
-  if (!::match(region, candidate.region, MATCH_POLICY_ARN)) {
+  if (!match_policy(region, candidate.region, MATCH_POLICY_ARN)) {
     return false;
   }
 
-  if (!::match(account, candidate.account, MATCH_POLICY_ARN)) {
+  if (!match_policy(account, candidate.account, MATCH_POLICY_ARN)) {
     return false;
   }
 
-  if (!::match(resource, candidate.resource, MATCH_POLICY_ARN)) {
+  if (!match_policy(resource, candidate.resource, MATCH_POLICY_ARN)) {
     return false;
   }
 
@@ -506,7 +507,7 @@ struct PolicyParser : public BaseReaderHandler<UTF8<>, PolicyParser> {
   CephContext* cct;
   const string& tenant;
   Policy& policy;
-  std::set<TokenID> v;
+  uint32_t v = 0;
 
   uint32_t seen = 0;
 
@@ -553,49 +554,59 @@ struct PolicyParser : public BaseReaderHandler<UTF8<>, PolicyParser> {
   }
   void set(TokenID in) {
     seen |= dex(in);
-    if (in == TokenID::Sid || in == TokenID::Effect || in == TokenID::Principal || in == TokenID::NotPrincipal ||
-         in == TokenID::Action || in == TokenID::NotAction || in == TokenID::Resource || in == TokenID::NotResource ||
-            in == TokenID::Condition || in == TokenID::AWS || in == TokenID::Federated || in == TokenID::Service ||
-              in == TokenID::CanonicalUser) {
-      v.insert(in);
+    if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+                  dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+                  dex(TokenID::Action) | dex(TokenID::NotAction) |
+                  dex(TokenID::Resource) | dex(TokenID::NotResource) |
+                  dex(TokenID::Condition) | dex(TokenID::AWS) |
+                  dex(TokenID::Federated) | dex(TokenID::Service) |
+                  dex(TokenID::CanonicalUser))) {
+      v |= dex(in);
     }
   }
   void set(std::initializer_list<TokenID> l) {
     for (auto in : l) {
       seen |= dex(in);
-      if (in == TokenID::Sid || in == TokenID::Effect || in == TokenID::Principal || in == TokenID::NotPrincipal ||
-         in == TokenID::Action || in == TokenID::NotAction || in == TokenID::Resource || in == TokenID::NotResource ||
-            in == TokenID::Condition || in == TokenID::AWS || in == TokenID::Federated || in == TokenID::Service ||
-              in == TokenID::CanonicalUser) {
-        v.insert(in);
+      if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+                    dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+                    dex(TokenID::Action) | dex(TokenID::NotAction) |
+                    dex(TokenID::Resource) | dex(TokenID::NotResource) |
+                    dex(TokenID::Condition) | dex(TokenID::AWS) |
+                    dex(TokenID::Federated) | dex(TokenID::Service) |
+                    dex(TokenID::CanonicalUser))) {
+       v |= dex(in);
       }
     }
   }
   void reset(TokenID in) {
     seen &= ~dex(in);
-    if (in == TokenID::Sid || in == TokenID::Effect || in == TokenID::Principal || in == TokenID::NotPrincipal ||
-         in == TokenID::Action || in == TokenID::NotAction || in == TokenID::Resource || in == TokenID::NotResource ||
-            in == TokenID::Condition || in == TokenID::AWS || in == TokenID::Federated || in == TokenID::Service ||
-              in == TokenID::CanonicalUser) {
-      v.erase(in);
+    if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+                  dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+                  dex(TokenID::Action) | dex(TokenID::NotAction) |
+                  dex(TokenID::Resource) | dex(TokenID::NotResource) |
+                  dex(TokenID::Condition) | dex(TokenID::AWS) |
+                  dex(TokenID::Federated) | dex(TokenID::Service) |
+                  dex(TokenID::CanonicalUser))) {
+      v &= ~dex(in);
     }
   }
   void reset(std::initializer_list<TokenID> l) {
     for (auto in : l) {
       seen &= ~dex(in);
-      if (in == TokenID::Sid || in == TokenID::Effect || in == TokenID::Principal || in == TokenID::NotPrincipal ||
-         in == TokenID::Action || in == TokenID::NotAction || in == TokenID::Resource || in == TokenID::NotResource ||
-            in == TokenID::Condition || in == TokenID::AWS || in == TokenID::Federated || in == TokenID::Service ||
-              in == TokenID::CanonicalUser) {
-        v.erase(in);
+      if (dex(in) & (dex(TokenID::Sid) | dex(TokenID::Effect) |
+                    dex(TokenID::Principal) | dex(TokenID::NotPrincipal) |
+                    dex(TokenID::Action) | dex(TokenID::NotAction) |
+                    dex(TokenID::Resource) | dex(TokenID::NotResource) |
+                    dex(TokenID::Condition) | dex(TokenID::AWS) |
+                    dex(TokenID::Federated) | dex(TokenID::Service) |
+                    dex(TokenID::CanonicalUser))) {
+       v &= ~dex(in);
       }
     }
   }
-  void reset(std::set<TokenID> v) {
-    for (auto in : v) {
-      seen &= ~dex(in);
-      v.erase(in);
-    }
+  void reset(uint32_t& v) {
+    seen &= ~v;
+    v = 0;
   }
 
   PolicyParser(CephContext* cct, const string& tenant, Policy& policy)
@@ -687,9 +698,11 @@ bool ParseState::key(const char* s, size_t l) {
 
   if (!k) {
     if (w->kind == TokenKind::cond_op) {
+      auto id = w->id;
       auto& t = pp->policy.statements.back();
+      auto c_ife =  cond_ifexists;
       pp->s.emplace_back(pp, cond_key);
-      t.conditions.emplace_back(w->id, s, l, cond_ifexists);
+      t.conditions.emplace_back(id, s, l, c_ife);
       return true;
     } else {
       return false;
@@ -802,7 +815,7 @@ bool ParseState::do_string(CephContext* cct, const char* s, size_t l) {
   } else if ((w->id == TokenID::Action) ||
             (w->id == TokenID::NotAction)) {
     for (auto& p : actpairs) {
-      if (match({s, l}, p.name, MATCH_POLICY_ACTION)) {
+      if (match_policy({s, l}, p.name, MATCH_POLICY_ACTION)) {
        (w->id == TokenID::Action ? t->action : t->notaction) |= p.bit;
       }
     }
@@ -947,28 +960,27 @@ bool Condition::eval(const Environment& env) const {
     return orrible(std::equal_to<std::string>(), s, vals);
 
   case TokenID::StringNotEquals:
-    return orrible(std::not2(std::equal_to<std::string>()),
+    return orrible(ceph::not_fn(std::equal_to<std::string>()),
                   s, vals);
 
   case TokenID::StringEqualsIgnoreCase:
     return orrible(ci_equal_to(), s, vals);
 
   case TokenID::StringNotEqualsIgnoreCase:
-    return orrible(std::not2(ci_equal_to()), s, vals);
+    return orrible(ceph::not_fn(ci_equal_to()), s, vals);
 
-    // Implement actual StringLike with wildcarding later
   case TokenID::StringLike:
-    return orrible(std::equal_to<std::string>(), s, vals);
+    return orrible(string_like(), s, vals);
+
   case TokenID::StringNotLike:
-    return orrible(std::not2(std::equal_to<std::string>()),
-                  s, vals);
+    return orrible(ceph::not_fn(string_like()), s, vals);
 
     // Numeric
   case TokenID::NumericEquals:
     return shortible(std::equal_to<double>(), as_number, s, vals);
 
   case TokenID::NumericNotEquals:
-    return shortible(std::not2(std::equal_to<double>()),
+    return shortible(ceph::not_fn(std::equal_to<double>()),
                     as_number, s, vals);
 
 
@@ -990,7 +1002,7 @@ bool Condition::eval(const Environment& env) const {
     return shortible(std::equal_to<ceph::real_time>(), as_date, s, vals);
 
   case TokenID::DateNotEquals:
-    return shortible(std::not2(std::equal_to<ceph::real_time>()),
+    return shortible(ceph::not_fn(std::equal_to<ceph::real_time>()),
                     as_date, s, vals);
 
   case TokenID::DateLessThan:
@@ -1021,7 +1033,7 @@ bool Condition::eval(const Environment& env) const {
     return shortible(std::equal_to<MaskedIP>(), as_network, s, vals);
 
   case TokenID::NotIpAddress:
-    return shortible(std::not2(std::equal_to<MaskedIP>()), as_network, s,
+    return shortible(ceph::not_fn(std::equal_to<MaskedIP>()), as_network, s,
                     vals);
 
 #if 0
index aa121f5d0add7007010ed671d93a30c8d0ce6471..5236e6b02192179af17101d59518cffa19e5bbf1 100644 (file)
 #include "rapidjson/error/error.h"
 #include "rapidjson/error/en.h"
 
-#include "fnmatch.h"
-
 #include "rgw_acl.h"
 #include "rgw_basic_types.h"
 #include "rgw_iam_policy_keywords.h"
+#include "rgw_string.h"
 
 #include "include/assert.h" // razzin' frazzin' ...grrr.
 
@@ -353,15 +352,19 @@ struct Condition {
   static boost::optional<MaskedIP> as_network(const std::string& s);
 
 
-  struct ci_equal_to : public std::binary_function<const std::string,
-                                                  const std::string,
-                                                  bool> {
+  struct ci_equal_to {
     bool operator ()(const std::string& s1,
                     const std::string& s2) const {
       return boost::iequals(s1, s2);
     }
   };
 
+  struct string_like {
+    bool operator ()(const std::string& input,
+                     const std::string& pattern) const {
+      return match_wildcards(pattern, input, 0);
+    }
+  };
 
   template<typename F>
   static bool orrible(F&& f, const std::string& c,
index 72a4ec86fe28cdb889e6f6409486a4e8cddc3e45..a4694e3df1209c9e0d09a93792a0bd604f96608e 100644 (file)
@@ -2523,7 +2523,7 @@ void RGWCreateBucket::execute()
       return;
   }
 
-  if (!store->get_zonegroup().is_master_zonegroup() &&
+  if (!store->get_zonegroup().is_master_zonegroup() && !location_constraint.empty() &&
       store->get_zonegroup().api_name != location_constraint) {
     ldout(s->cct, 0) << "location constraint (" << location_constraint << ")"
                      << " doesn't match zonegroup" << " (" << store->get_zonegroup().api_name << ")"
@@ -5276,6 +5276,33 @@ void RGWCompleteMultipart::execute()
   meta_obj.set_in_extra_data(true);
   meta_obj.index_hash_source = s->object.name;
 
+  /*take a cls lock on meta_obj to prevent racing completions (or retries)
+    from deleting the parts*/
+  rgw_pool meta_pool;
+  rgw_raw_obj raw_obj;
+  librados::ObjectWriteOperation op;
+  librados::IoCtx ioctx;
+  rados::cls::lock::Lock l("RGWCompleteMultipart");
+  int max_lock_secs_mp = s->cct->_conf->get_val<int64_t>("rgw_mp_lock_max_time");
+
+  op.assert_exists();
+  store->obj_to_raw((s->bucket_info).placement_rule, meta_obj, &raw_obj);
+  store->get_obj_data_pool((s->bucket_info).placement_rule,meta_obj,&meta_pool);
+  store->open_pool_ctx(meta_pool, ioctx);
+
+  const string raw_meta_oid = raw_obj.oid;
+  utime_t time(max_lock_secs_mp, 0);
+  l.set_duration(time);
+  l.lock_exclusive(&op);
+  op_ret = ioctx.operate(raw_meta_oid, &op);
+
+  if (op_ret < 0) {
+    dout(0) << "RGWCompleteMultipart::execute() failed to acquire lock " << dendl;
+    op_ret = -ERR_INTERNAL_ERROR;
+    s->err.message = "This multipart completion is already in progress";
+    return;
+  }
+
   op_ret = get_obj_attrs(store, s, meta_obj, attrs);
 
   if (op_ret < 0) {
@@ -5426,6 +5453,10 @@ void RGWCompleteMultipart::execute()
                            s->bucket_info, meta_obj, 0);
   if (r < 0) {
     ldout(store->ctx(), 0) << "WARNING: failed to remove object " << meta_obj << dendl;
+    r = l.unlock(&ioctx, raw_meta_oid);
+    if (r < 0) {
+      ldout(store->ctx(), 0) << "WARNING: failed to unlock " << raw_meta_oid << dendl;
+    }
   }
 }
 
index ed91951aba9dd33e6f61482c5264ad2d625012d0..d3a63aee73d17c0818d3e7ea1afec1ade642f944 100644 (file)
@@ -1390,6 +1390,7 @@ public:
 
   void send_response() override = 0;
   const string name() override { return "get_lifecycle"; }
+  RGWOpType get_type() override { return RGW_OP_GET_LC; }
   uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
 };
 
@@ -1425,6 +1426,7 @@ public:
   virtual int get_params() = 0;
   void send_response() override = 0;
   const string name() override { return "put_lifecycle"; }
+  RGWOpType get_type() override { return RGW_OP_PUT_LC; }
   uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
 };
 
@@ -1448,6 +1450,7 @@ public:
 
   void send_response() override = 0;
   const string name() override { return "delete_lifecycle"; }
+  RGWOpType get_type() override { return RGW_OP_DELETE_LC; }
   uint32_t op_mask() override { return RGW_OP_TYPE_WRITE; }
 };
 
index 297d7f90e66ec290bafa1a8648f4a3b149b71088..7a1fce857151b99077ef251e0afaa9ba02cfd1cd 100644 (file)
@@ -3115,7 +3115,7 @@ class RGWDataNotifier : public RGWRadosThread {
   RGWDataNotifierManager notify_mgr;
 
   uint64_t interval_msec() override {
-    return cct->_conf->rgw_md_notify_interval_msec;
+    return cct->_conf->get_val<int64_t>("rgw_data_notify_interval_msec");
   }
 public:
   RGWDataNotifier(RGWRados *_store) : RGWRadosThread(_store, "data-notifier"), notify_mgr(_store) {}
@@ -8122,7 +8122,6 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx,
     if (tail_placement.bucket.name.empty()) {
       manifest.set_tail_placement(tail_placement.placement_rule, src_obj.bucket);
     }
-    string oid, key;
     for (; miter != astate->manifest.obj_end(); ++miter) {
       ObjectWriteOperation op;
       cls_refcount_get(op, tag, true);
@@ -8176,8 +8175,6 @@ done_ret:
   if (!copy_itself) {
     vector<rgw_raw_obj>::iterator riter;
 
-    string oid, key;
-
     /* rollback reference */
     for (riter = ref_objs.begin(); riter != ref_objs.end(); ++riter) {
       ObjectWriteOperation op;
@@ -10092,7 +10089,6 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl)
   RGWRados *store = source->get_store();
   CephContext *cct = store->ctx();
 
-  std::string oid, key;
   rgw_raw_obj read_obj;
   uint64_t read_ofs = ofs;
   uint64_t len, read_len;
@@ -11588,7 +11584,6 @@ int RGWRados::get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string
   BucketIndexShardsManager ver_mgr;
   BucketIndexShardsManager master_ver_mgr;
   BucketIndexShardsManager marker_mgr;
-  string shard_marker;
   char buf[64];
   for(; iter != headers.end(); ++iter, ++viter) {
     accumulate_raw_stats(iter->second, stats);
index 15a3d247dad1287ecf58bd5e76fa15367fd3b7e2..23caafd6314f48a58ba33ee624ec8ca836b58c65 100644 (file)
@@ -1254,7 +1254,7 @@ struct RGWZoneParams : RGWSystemMetaObj {
     if (struct_v >= 7) {
       ::decode(lc_pool, bl);
     } else {
-      lc_pool.init(name + ".rgw.lc");
+      lc_pool = log_pool.name + ":lc";
     }
     if (struct_v >= 8) {
       ::decode(tier_config, bl);
@@ -1262,7 +1262,7 @@ struct RGWZoneParams : RGWSystemMetaObj {
     if (struct_v >= 9) {
       ::decode(roles_pool, bl);
     } else {
-      roles_pool = name + ".rgw.roles";
+      roles_pool = name + ".rgw.meta:roles";
     }
     if (struct_v >= 10) {
       ::decode(reshard_pool, bl);
@@ -2213,6 +2213,7 @@ class RGWRados
   friend class RGWReshard;
   friend class RGWBucketReshard;
   friend class BucketIndexLockGuard;
+  friend class RGWCompleteMultipart;
 
   /** Open the pool used as root for this gateway */
   int open_root_pool_ctx();
index 505596242aa394095da75f767667b956d0efa869..f67a076c6222acf8b37dd521e999e969cd722b6f 100644 (file)
@@ -1834,18 +1834,6 @@ int RGWHandler_REST::allocate_formatter(struct req_state *s,
   return 0;
 }
 
-int RGWHandler_REST::validate_tenant_name(string const& t)
-{
-  struct tench {
-    static bool is_good(char ch) {
-      return isalnum(ch) || ch == '_';
-    }
-  };
-  std::string::const_iterator it =
-    std::find_if_not(t.begin(), t.end(), tench::is_good);
-  return (it == t.end())? 0: -ERR_INVALID_TENANT_NAME;
-}
-
 // This function enforces Amazon's spec for bucket names.
 // (The requirements, not the recommendations.)
 int RGWHandler_REST::validate_bucket_name(const string& bucket)
index 615ececc595c6d5c320ea170c43ab25b5bdc5606..f780ab4abacd75b55d2171bb438d9ede5ca69439 100644 (file)
@@ -501,7 +501,6 @@ public:
   RGWHandler_REST() {}
   ~RGWHandler_REST() override {}
 
-  static int validate_tenant_name(const string& bucket);
   static int validate_bucket_name(const string& bucket);
   static int validate_object_name(const string& object);
 
index 2652792bb753f1bb96d704ae4978ae7a7c2b99d2..9c504ca591023275963e9991c1d6d0568755576e 100644 (file)
@@ -3206,7 +3206,7 @@ int RGWHandler_REST_S3::postauth_init()
            << " s->bucket=" << rgw_make_bucket_entry_name(s->bucket_tenant, s->bucket_name) << dendl;
 
   int ret;
-  ret = validate_tenant_name(s->bucket_tenant);
+  ret = rgw_validate_tenant_name(s->bucket_tenant);
   if (ret)
     return ret;
   if (!s->bucket_name.empty()) {
@@ -3221,7 +3221,7 @@ int RGWHandler_REST_S3::postauth_init()
   if (!t->src_bucket.empty()) {
     rgw_parse_url_bucket(t->src_bucket, s->user->user_id.tenant,
                        s->src_tenant_name, s->src_bucket_name);
-    ret = validate_tenant_name(s->src_tenant_name);
+    ret = rgw_validate_tenant_name(s->src_tenant_name);
     if (ret)
       return ret;
     ret = valid_s3_bucket_name(s->src_bucket_name, relaxed_names);
@@ -3237,8 +3237,8 @@ int RGWHandler_REST_S3::init(RGWRados *store, struct req_state *s,
   int ret;
 
   s->dialect = "s3";
-  
-  ret = validate_tenant_name(s->bucket_tenant);
+
+  ret = rgw_validate_tenant_name(s->bucket_tenant);
   if (ret)
     return ret;
   bool relaxed_names = s->cct->_conf->rgw_relaxed_s3_bucket_names;
@@ -3345,32 +3345,6 @@ int RGW_Auth_S3::authorize(RGWRados* const store,
     return -EPERM;
   }
 
-  if (s->op == OP_OPTIONS) {
-    init_anon_user(s);
-    return 0;
-  }
-
-  AwsVersion version;
-  AwsRoute route;
-  std::tie(version, route) = discover_aws_flavour(s->info);
-
-  if (route == AwsRoute::QUERY_STRING && version == AwsVersion::UNKOWN) {
-    /* FIXME(rzarzynski): handle anon user. */
-    init_anon_user(const_cast<req_state*>(s));
-    return 0;
-  }
-
-  return authorize_v2(store, auth_registry, s);
-}
-
-
-/*
- * handle v2 signatures
- */
-int RGW_Auth_S3::authorize_v2(RGWRados* const store,
-                              const rgw::auth::StrategyRegistry& auth_registry,
-                              struct req_state* const s)
-{
   const auto ret = rgw::auth::Strategy::apply(auth_registry.get_s3_main(), s);
   if (ret == 0) {
     /* Populate the owner info. */
@@ -3847,6 +3821,7 @@ AWSGeneralAbstractor::get_auth_data_v4(const req_state* const s,
         case RGW_OP_SET_BUCKET_WEBSITE:
         case RGW_OP_PUT_BUCKET_POLICY:
         case RGW_OP_PUT_OBJ_TAGGING:
+        case RGW_OP_PUT_LC:
           break;
         default:
           dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED" << dendl;
@@ -4206,3 +4181,17 @@ rgw::auth::s3::LocalEngine::authenticate(
   auto apl = apl_factory->create_apl_local(cct, s, user_info, k.subuser);
   return result_t::grant(std::move(apl), completer_factory(k.key));
 }
+
+bool rgw::auth::s3::S3AnonymousEngine::is_applicable(
+  const req_state* s
+) const noexcept {
+  if (s->op == OP_OPTIONS) {
+    return true;
+  }
+
+  AwsVersion version;
+  AwsRoute route;
+  std::tie(version, route) = discover_aws_flavour(s->info);
+
+  return route == AwsRoute::QUERY_STRING && version == AwsVersion::UNKOWN;
+}
index 75615b9b868f93b5360c9fda3a931a9eee3edf2c..d0aa098fa1b82fc74533aec9fb4f51fdfd5a2b19 100644 (file)
@@ -470,10 +470,6 @@ public:
 };
 
 class RGW_Auth_S3 {
-private:
-  static int authorize_v2(RGWRados *store,
-                          const rgw::auth::StrategyRegistry& auth_registry,
-                          struct req_state *s);
 public:
   static int authorize(RGWRados *store,
                        const rgw::auth::StrategyRegistry& auth_registry,
@@ -887,6 +883,19 @@ public:
 };
 
 
+class S3AnonymousEngine : public rgw::auth::AnonymousEngine {
+  bool is_applicable(const req_state* s) const noexcept override;
+
+public:
+  /* Let's reuse the parent class' constructor. */
+  using rgw::auth::AnonymousEngine::AnonymousEngine;
+
+  const char* get_name() const noexcept override {
+    return "rgw::auth::s3::S3AnonymousEngine";
+  }
+};
+
+
 class S3AuthFactory : public rgw::auth::RemoteApplier::Factory,
                       public rgw::auth::LocalApplier::Factory {
   typedef rgw::auth::IdentityApplier::aplptr_t aplptr_t;
index f1b47a4fa54547bb9b084073bcfc0faa0c24aa00..96f7cb7e5053201709969eba074f9a5f632a00f8 100644 (file)
@@ -2558,7 +2558,7 @@ int RGWHandler_REST_SWIFT::postauth_init()
           << dendl;
 
   int ret;
-  ret = validate_tenant_name(s->bucket_tenant);
+  ret = rgw_validate_tenant_name(s->bucket_tenant);
   if (ret)
     return ret;
   ret = validate_bucket_name(s->bucket_name);
index 711fd3a0c1dca6c0d593d2b73ec1e1e437a30844..8539c3ed766409a923b70e836b88a1429e68d0b8 100644 (file)
@@ -76,6 +76,7 @@ void RGWOp_User_Create::execute()
   std::string secret_key;
   std::string key_type_str;
   std::string caps;
+  std::string tenant_name;
 
   bool gen_key;
   bool suspended;
@@ -96,6 +97,7 @@ void RGWOp_User_Create::execute()
   RESTArgs::get_string(s, "secret-key", secret_key, &secret_key);
   RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
   RESTArgs::get_string(s, "user-caps", caps, &caps);
+  RESTArgs::get_string(s, "tenant", tenant_name, &tenant_name);
   RESTArgs::get_bool(s, "generate-key", true, &gen_key);
   RESTArgs::get_bool(s, "suspended", false, &suspended);
   RESTArgs::get_int32(s, "max-buckets", default_max_buckets, &max_buckets);
@@ -108,6 +110,10 @@ void RGWOp_User_Create::execute()
     return;
   }
 
+  if (!tenant_name.empty()) {
+    uid.tenant = tenant_name;
+  }
+
   // TODO: validate required args are passed in. (for eg. uid and display_name here)
   op_state.set_user_id(uid);
   op_state.set_display_name(display_name);
diff --git a/ceph/src/rgw/rgw_string.cc b/ceph/src/rgw/rgw_string.cc
new file mode 100644 (file)
index 0000000..d49bba7
--- /dev/null
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "rgw_string.h"
+
+static bool char_eq(char c1, char c2)
+{
+  return c1 == c2;
+}
+
+static bool ci_char_eq(char c1, char c2)
+{
+  return tolower(c1) == tolower(c2);
+}
+
+bool match_wildcards(boost::string_view pattern, boost::string_view input,
+                     uint32_t flags)
+{
+  const auto eq = (flags & MATCH_CASE_INSENSITIVE) ? &ci_char_eq : &char_eq;
+
+  auto it1 = pattern.begin();
+  auto it2 = input.begin();
+  while (true) {
+    if (it1 == pattern.end())
+      return it2 == input.end();
+    if (*it1 == '*') {
+      if (it1 + 1 == pattern.end())
+        return true;
+      if (it2 == input.end() || eq(*(it1 + 1), *it2))
+        ++it1;
+      else
+        ++it2;
+      continue;
+    }
+    if (it2 == input.end())
+      return false;
+    if (*it1 == '?' || eq(*it1, *it2)) {
+      ++it1;
+      ++it2;
+      continue;
+    }
+    return false;
+  }
+  return false;
+}
index 062880cc1acac23830ae88a4911eb730c7f895ce..c56667533c099571ace87fb4dbc43d4d07278ffc 100644 (file)
@@ -223,4 +223,14 @@ std::string string_join_reserve(char delim, const Args&... args)
   return string_join_reserve(boost::string_view{&delim, 1}, args...);
 }
 
+
+/// use case-insensitive comparison in match_wildcards()
+static constexpr uint32_t MATCH_CASE_INSENSITIVE = 0x01;
+
+/// attempt to match the given input string with the pattern, which may contain
+/// the wildcard characters * and ?
+extern bool match_wildcards(boost::string_view pattern,
+                            boost::string_view input,
+                            uint32_t flags = 0);
+
 #endif
index 9674424b03539f88bdea71dc18ac2752924e4e71..525671c34b4ad0c96b3fb6eead805cad69157596 100644 (file)
@@ -3,8 +3,10 @@
 
 #include <array>
 
-#include <boost/utility/string_ref.hpp>
+#include <boost/utility/string_view.hpp>
 #include <boost/container/static_vector.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/algorithm/string.hpp>
 
 #include "rgw_swift_auth.h"
 #include "rgw_rest.h"
@@ -160,8 +162,8 @@ public:
   SignatureHelper() = default;
 
   const char* calc(const std::string& key,
-                   const boost::string_ref& method,
-                   const boost::string_ref& path,
+                   const boost::string_view& method,
+                   const boost::string_view& path,
                    const std::string& expires) {
 
     using ceph::crypto::HMACSHA1;
@@ -190,6 +192,53 @@ public:
 
 }; /* TempURLEngine::SignatureHelper */
 
+class TempURLEngine::PrefixableSignatureHelper
+    : private TempURLEngine::SignatureHelper {
+  using base_t = SignatureHelper;
+
+  const boost::string_view decoded_uri;
+  const boost::string_view object_name;
+  boost::string_view no_obj_uri;
+
+  const boost::optional<const std::string&> prefix;
+
+public:
+  PrefixableSignatureHelper(const std::string& decoded_uri,
+                           const std::string& object_name,
+                            const boost::optional<const std::string&> prefix)
+    : decoded_uri(decoded_uri),
+      object_name(object_name),
+      prefix(prefix) {
+    /* Transform: v1/acct/cont/obj - > v1/acct/cont/ */
+    no_obj_uri = \
+      decoded_uri.substr(0, decoded_uri.length() - object_name.length());
+  }
+
+  const char* calc(const std::string& key,
+                   const boost::string_view& method,
+                   const boost::string_view& path,
+                   const std::string& expires) {
+    if (!prefix) {
+      return base_t::calc(key, method, path, expires);
+    } else {
+      const auto prefixed_path = \
+        string_cat_reserve("prefix:", no_obj_uri, *prefix);
+      return base_t::calc(key, method, prefixed_path, expires);
+    }
+  }
+
+  bool is_equal_to(const std::string& rhs) const {
+    bool is_auth_ok = base_t::is_equal_to(rhs);
+
+    if (prefix && is_auth_ok) {
+      const auto prefix_uri = string_cat_reserve(no_obj_uri, *prefix);
+      is_auth_ok = boost::algorithm::starts_with(decoded_uri, prefix_uri);
+    }
+
+    return is_auth_ok;
+  }
+}; /* TempURLEngine::PrefixableSignatureHelper */
+
 TempURLEngine::result_t
 TempURLEngine::authenticate(const req_state* const s) const
 {
@@ -197,13 +246,22 @@ TempURLEngine::authenticate(const req_state* const s) const
     return result_t::deny();
   }
 
-  const string& temp_url_sig = s->info.args.get("temp_url_sig");
-  const string& temp_url_expires = s->info.args.get("temp_url_expires");
+  /* NOTE(rzarzynski): RGWHTTPArgs::get(), in contrast to RGWEnv::get(),
+   * never returns nullptr. If the requested parameter is absent, we will
+   * get the empty string. */
+  const std::string& temp_url_sig = s->info.args.get("temp_url_sig");
+  const std::string& temp_url_expires = s->info.args.get("temp_url_expires");
 
   if (temp_url_sig.empty() || temp_url_expires.empty()) {
     return result_t::deny();
   }
 
+  /* Though, for prefixed tempurls we need to differentiate between empty
+   * prefix and lack of prefix. Empty prefix means allowance for whole
+   * container. */
+  const boost::optional<const std::string&> temp_url_prefix = \
+    s->info.args.get_optional("temp_url_prefix");
+
   RGWUserInfo owner_info;
   try {
     get_owner_info(s, owner_info);
@@ -228,14 +286,14 @@ TempURLEngine::authenticate(const req_state* const s) const
 
   /* XXX can we search this ONCE? */
   const size_t pos = g_conf->rgw_swift_url_prefix.find_last_not_of('/') + 1;
-  boost::string_ref ref_uri = s->decoded_uri;
-  const std::array<boost::string_ref, 2> allowed_paths = {
+  const boost::string_view ref_uri = s->decoded_uri;
+  const std::array<boost::string_view, 2> allowed_paths = {
     ref_uri,
     ref_uri.substr(pos + 1)
   };
 
   /* Account owner calculates the signature also against a HTTP method. */
-  boost::container::static_vector<boost::string_ref, 3> allowed_methods;
+  boost::container::static_vector<boost::string_view, 3> allowed_methods;
   if (strcmp("HEAD", s->info.method) == 0) {
     /* HEAD requests are specially handled. */
     /* TODO: after getting a newer boost (with static_vector supporting
@@ -250,7 +308,12 @@ TempURLEngine::authenticate(const req_state* const s) const
   }
 
   /* Need to try each combination of keys, allowed path and methods. */
-  SignatureHelper sig_helper;
+  PrefixableSignatureHelper sig_helper {
+    s->decoded_uri,
+    s->object.name,
+    temp_url_prefix
+  };
+
   for (const auto& kv : owner_info.temp_url_keys) {
     const int temp_url_key_num = kv.first;
     const string& temp_url_key = kv.second;
index 055d541d3273a0837a42c597a8f6473b2edae82e..afab8e069745a38db28b741c95b339295bccd330 100644 (file)
@@ -49,6 +49,7 @@ class TempURLEngine : public rgw::auth::Engine {
   bool is_expired(const std::string& expires) const;
 
   class SignatureHelper;
+  class PrefixableSignatureHelper;
 
 public:
   TempURLEngine(CephContext* const cct,
index 0809548f6da85953514bf6fbcbadd6dd83bfcce3..41a5492b93089129c22db4f626d12d7f3c4e76b4 100644 (file)
@@ -578,6 +578,18 @@ uint32_t rgw_str_to_perm(const char *str)
   return RGW_PERM_INVALID;
 }
 
+int rgw_validate_tenant_name(const string& t)
+{
+  struct tench {
+    static bool is_good(char ch) {
+      return isalnum(ch) || ch == '_';
+    }
+  };
+  std::string::const_iterator it =
+    std::find_if_not(t.begin(), t.end(), tench::is_good);
+  return (it == t.end())? 0: -ERR_INVALID_TENANT_NAME;
+}
+
 static bool validate_access_key(string& key)
 {
   const char *p = key.c_str();
@@ -1886,6 +1898,13 @@ int RGWUser::check_op(RGWUserAdminOpState& op_state, std::string *err_msg)
     return -EINVAL;
   }
 
+  int ret = rgw_validate_tenant_name(op_id.tenant);
+  if (ret) {
+    set_err_msg(err_msg,
+               "invalid tenant only alphanumeric and _ characters are allowed");
+    return ret;
+  }
+
   //set key type when it not set or set by context
   if ((op_state.get_key_type() < 0) || op_state.key_type_setbycontext) {
     op_state.set_key_type(KEY_TYPE_S3);
index 35738ce7d284b2a90ad8b4b9748832b16298f625..5e6754b0380ccee2fef8fc5a7f0af8e5311b7a51 100644 (file)
@@ -115,9 +115,6 @@ extern int rgw_get_user_attrs_by_uid(RGWRados *store,
  * Given an RGWUserInfo, deletes the user and its bucket ACLs.
  */
 extern int rgw_delete_user(RGWRados *store, RGWUserInfo& user, RGWObjVersionTracker& objv_tracker);
-/**
- * Store a list of the user's buckets, with associated functinos.
- */
 
 /*
  * remove the different indexes
@@ -127,14 +124,11 @@ extern int rgw_remove_uid_index(RGWRados *store, rgw_user& uid);
 extern int rgw_remove_email_index(RGWRados *store, string& email);
 extern int rgw_remove_swift_name_index(RGWRados *store, string& swift_name);
 
-/*
- * An RGWUser class along with supporting classes created
- * to support the creation of an RESTful administrative API
- */
-
 extern void rgw_perm_to_str(uint32_t mask, char *buf, int len);
 extern uint32_t rgw_str_to_perm(const char *str);
 
+extern int rgw_validate_tenant_name(const string& t);
+
 enum ObjectKeyType {
   KEY_TYPE_SWIFT,
   KEY_TYPE_S3,
@@ -153,6 +147,10 @@ enum RGWUserId {
   RGW_ACCESS_KEY,
 };
 
+/*
+ * An RGWUser class along with supporting classes created
+ * to support the creation of an RESTful administrative API
+ */
 struct RGWUserAdminOpState {
   // user attributes
   RGWUserInfo info;
@@ -201,7 +199,7 @@ struct RGWUserAdminOpState {
   bool op_mask_specified;
   bool caps_specified;
   bool suspension_op;
-  bool admin_specified;
+  bool admin_specified = false;
   bool system_specified;
   bool key_op;
   bool temp_url_key_specified;
index 189438acfd772445490002c185648331e890f756..07f3238a4e4d1271e811a4992e4d8eb5b9400212 100644 (file)
@@ -548,6 +548,9 @@ add_dependencies(tests
   cython_modules)
 if(WITH_RBD)
   add_dependencies(tests unittest_librbd rbd)
+  if(FREEBSD)
+    add_dependencies(tests rbd-ggate)
+  endif(FREEBSD)
 endif(WITH_RBD)
 if(WITH_RADOSGW)
   add_dependencies(tests radosgw-admin)
@@ -558,6 +561,9 @@ endif(NOT FREEBSD)
 
 if(WITH_RBD)
   add_ceph_test(run-rbd-unit-tests.sh ${CMAKE_CURRENT_SOURCE_DIR}/run-rbd-unit-tests.sh)
+  if(FREEBSD)
+    add_ceph_test(rbd-ggate.sh ${CMAKE_CURRENT_SOURCE_DIR}/rbd-ggate.sh)
+  endif(FREEBSD)
 endif(WITH_RBD)
 add_ceph_test(run-cli-tests ${CMAKE_CURRENT_SOURCE_DIR}/run-cli-tests)
 add_ceph_test(test_objectstore_memstore.sh ${CMAKE_CURRENT_SOURCE_DIR}/test_objectstore_memstore.sh)
index aefdbf91e344a7016c8c03d1df4a00fc39fa1051..69d7679d9d7e910e63535b7484e10e149853e312 100644 (file)
@@ -2,6 +2,7 @@ ls on empty pool never containing images
 ========================================
   $ ceph osd pool create rbd_other 8
   pool 'rbd_other' created
+  $ rbd pool init rbd_other
   $ rados -p rbd rm rbd_directory >/dev/null 2>&1 || true
   $ rbd ls
   $ rbd ls --format json
index 540bee1807a5467ab2580fd325ae11ad3d1c4158..a83e37196c4fa41e1602fff27e08ebd0dcd88594 100644 (file)
@@ -6,43 +6,43 @@
   $ map="$TESTDIR/foo"
   $ crushtool --outfn "$map" --build --set-chooseleaf-vary-r 0 --set-chooseleaf-stable 0 --num_osds 25 node straw 5 rack straw 1 root straw 0 --reweight-item osd.2 99 -o "$map" --tree
   crushtool reweighting item osd.2 to 99
-  ID  WEIGHT    TYPE NAME          
-  -11 123.00000 root root          
-   -6 103.00000     rack rack0     
-   -1 103.00000         node node0 
-    0   1.00000             osd.0  
-    1   1.00000             osd.1  
-    2  99.00000             osd.2  
-    3   1.00000             osd.3  
-    4   1.00000             osd.4  
-   -7   5.00000     rack rack1     
-   -2   5.00000         node node1 
-    5   1.00000             osd.5  
-    6   1.00000             osd.6  
-    7   1.00000             osd.7  
-    8   1.00000             osd.8  
-    9   1.00000             osd.9  
-   -8   5.00000     rack rack2     
-   -3   5.00000         node node2 
-   10   1.00000             osd.10 
-   11   1.00000             osd.11 
-   12   1.00000             osd.12 
-   13   1.00000             osd.13 
-   14   1.00000             osd.14 
-   -9   5.00000     rack rack3     
-   -4   5.00000         node node3 
-   15   1.00000             osd.15 
-   16   1.00000             osd.16 
-   17   1.00000             osd.17 
-   18   1.00000             osd.18 
-   19   1.00000             osd.19 
-  -10   5.00000     rack rack4     
-   -5   5.00000         node node4 
-   20   1.00000             osd.20 
-   21   1.00000             osd.21 
-   22   1.00000             osd.22 
-   23   1.00000             osd.23 
-   24   1.00000             osd.24 
+  ID  CLASS WEIGHT    TYPE NAME          
+  -11       123.00000 root root          
+   -6       103.00000     rack rack0     
+   -1       103.00000         node node0 
+    0         1.00000             osd.0  
+    1         1.00000             osd.1  
+    2        99.00000             osd.2  
+    3         1.00000             osd.3  
+    4         1.00000             osd.4  
+   -7         5.00000     rack rack1     
+   -2         5.00000         node node1 
+    5         1.00000             osd.5  
+    6         1.00000             osd.6  
+    7         1.00000             osd.7  
+    8         1.00000             osd.8  
+    9         1.00000             osd.9  
+   -8         5.00000     rack rack2     
+   -3         5.00000         node node2 
+   10         1.00000             osd.10 
+   11         1.00000             osd.11 
+   12         1.00000             osd.12 
+   13         1.00000             osd.13 
+   14         1.00000             osd.14 
+   -9         5.00000     rack rack3     
+   -4         5.00000         node node3 
+   15         1.00000             osd.15 
+   16         1.00000             osd.16 
+   17         1.00000             osd.17 
+   18         1.00000             osd.18 
+   19         1.00000             osd.19 
+  -10         5.00000     rack rack4     
+   -5         5.00000         node node4 
+   20         1.00000             osd.20 
+   21         1.00000             osd.21 
+   22         1.00000             osd.22 
+   23         1.00000             osd.23 
+   24         1.00000             osd.24 
   $ crushtool -d "$map"
   # begin crush map
   tunable choose_local_tries 0
index 7c38ae6025505964dcf163d31365448033dd5183..a509ffd4e2d3d3aa9afecfc458daaf581c63146b 100644 (file)
@@ -1,3 +1,7 @@
+Skip test on FreeBSD as it generates different output there.
+
+  $ test "$(uname)" = "FreeBSD" && exit 80 || true
+
   $ rbd --help
   usage: rbd <command> ...
   
       feature enable              Enable the specified image feature.
       flatten                     Fill clone with parent data (make it
                                   independent).
-      group create                Create a consistency group.
-      group image add             Add an image to a consistency group.
-      group image list            List images in a consistency group.
-      group image remove          Remove an image from a consistency group.
-      group list (group ls)       List rbd consistency groups.
-      group remove (group rm)     Delete a consistency group.
       image-meta get              Image metadata get the value associated with
                                   the key.
       image-meta list             Image metadata list keys with values.
     --image arg          image name
     --no-progress        disable progress output
   
-  rbd help group create
-  usage: rbd group create [--pool <pool>] [--group <group>] 
-                          <group-spec> 
-  
-  Create a consistency group.
-  
-  Positional arguments
-    <group-spec>         group specification
-                         (example: [<pool-name>/]<group-name>)
-  
-  Optional arguments
-    -p [ --pool ] arg    pool name
-    --group arg          group name
-  
-  rbd help group image add
-  usage: rbd group image add [--group-pool <group-pool>] [--group <group>] 
-                             [--image-pool <image-pool>] [--image <image>] 
-                             [--pool <pool>] 
-                             <group-spec> <image-spec> 
-  
-  Add an image to a consistency group.
-  
-  Positional arguments
-    <group-spec>         group specification
-                         (example: [<pool-name>/]<group-name>)
-    <image-spec>         image specification
-                         (example: [<pool-name>/]<image-name>)
-  
-  Optional arguments
-    --group-pool arg     group pool name
-    --group arg          group name
-    --image-pool arg     image pool name
-    --image arg          image name
-    -p [ --pool ] arg    pool name unless overridden
-  
-  rbd help group image list
-  usage: rbd group image list [--format <format>] [--pretty-format] 
-                              [--pool <pool>] [--group <group>] 
-                              <group-spec> 
-  
-  List images in a consistency group.
-  
-  Positional arguments
-    <group-spec>         group specification
-                         (example: [<pool-name>/]<group-name>)
-  
-  Optional arguments
-    --format arg         output format [plain, json, or xml]
-    --pretty-format      pretty formatting (json and xml)
-    -p [ --pool ] arg    pool name
-    --group arg          group name
-  
-  rbd help group image remove
-  usage: rbd group image remove [--group-pool <group-pool>] [--group <group>] 
-                                [--image-pool <image-pool>] [--image <image>] 
-                                [--pool <pool>] [--image-id <image-id>] 
-                                <group-spec> <image-spec> 
-  
-  Remove an image from a consistency group.
-  
-  Positional arguments
-    <group-spec>         group specification
-                         (example: [<pool-name>/]<group-name>)
-    <image-spec>         image specification
-                         (example: [<pool-name>/]<image-name>)
-  
-  Optional arguments
-    --group-pool arg     group pool name
-    --group arg          group name
-    --image-pool arg     image pool name
-    --image arg          image name
-    -p [ --pool ] arg    pool name unless overridden
-    --image-id arg       image id
-  
-  rbd help group list
-  usage: rbd group list [--pool <pool>] [--format <format>] [--pretty-format] 
-  
-  List rbd consistency groups.
-  
-  Optional arguments
-    -p [ --pool ] arg    pool name
-    --format arg         output format [plain, json, or xml]
-    --pretty-format      pretty formatting (json and xml)
-  
-  rbd help group remove
-  usage: rbd group remove [--pool <pool>] [--group <group>] 
-                          <group-spec> 
-  
-  Delete a consistency group.
-  
-  Positional arguments
-    <group-spec>         group specification
-                         (example: [<pool-name>/]<group-name>)
-  
-  Optional arguments
-    -p [ --pool ] arg    pool name
-    --group arg          group name
-  
   rbd help image-meta get
   usage: rbd image-meta get [--pool <pool>] [--image <image>] 
                             <image-spec> <key> 
index fb274391471ae0ac4c7fccf90b681fc5bf42e94f..50c59cf058b199fd891d6553bbdc38fddd03b9f8 100644 (file)
@@ -1013,7 +1013,6 @@ TEST_F(TestClsRbd, snapshots_namespaces)
 
   ASSERT_EQ(0, create_image(&ioctx, oid, 10, 22, 0, oid, -1));
 
-  vector<string> snap_names;
   vector<cls::rbd::SnapshotNamespace> snap_namespaces;
   SnapContext snapc;
 
@@ -1051,7 +1050,6 @@ TEST_F(TestClsRbd, snapshots_timestamps)
 
   ASSERT_EQ(0, create_image(&ioctx, oid, 10, 22, 0, oid, -1));
 
-  vector<string> snap_names;
   vector<utime_t> snap_timestamps;
   SnapContext snapc;
 
index 17034b67aee39b6a758970fdf89014eb69cb1f86..9a2be3df293cc96ca29e8f10b53085086b7b51e9 100644 (file)
@@ -271,4 +271,8 @@ add_executable(unittest_iso_8601
     test_iso_8601.cc)
 target_link_libraries(unittest_iso_8601 ceph-common)
 add_ceph_unittest(unittest_iso_8601
-  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_hostname)
+  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_iso_8601)
+
+add_executable(unittest_backport14 test_backport14.cc)
+add_ceph_unittest(unittest_backport14
+  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_backport14)
diff --git a/ceph/src/test/common/test_backport14.cc b/ceph/src/test/common/test_backport14.cc
new file mode 100644 (file)
index 0000000..63ef5d0
--- /dev/null
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Casey Bodley <cbodley@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "common/backport14.h" // include first: tests that header is standalone
+#include <gtest/gtest.h>
+
+int int_func() { return 1; }
+bool bool_func0() { return true; }
+bool bool_func1(int a) { return true; }
+bool bool_func2(const std::string& a, int b) { return true; }
+
+// given a callable and argument list, test that the result of ceph::not_fn
+// evaluates to false as both an lvalue and rvalue
+template <typename F, typename ...Args>
+void test_not(F&& fn, Args&&... args)
+{
+  auto res = ceph::not_fn(std::forward<F>(fn));
+  // test res as lvalue
+  EXPECT_FALSE(res(std::forward<Args>(args)...));
+  // test res as rvalue
+  // note: this forwards args twice, but it's okay if none are rvalues
+  EXPECT_FALSE(std::move(res)(std::forward<Args>(args)...));
+}
+
+TEST(Backport14, not_fn)
+{
+  // function pointers
+  test_not(int_func);
+  test_not(&int_func);
+  test_not(bool_func0);
+  test_not(&bool_func0);
+  test_not(bool_func1, 5);
+  test_not(bool_func2, "foo", 5);
+
+  // lambdas
+  auto int_lambda = [] { return 1; };
+  auto bool_lambda0 = [] { return true; };
+  auto bool_lambda1 = [] (int a) { return true; };
+  auto bool_lambda2 = [] (const std::string& a, int b) { return true; };
+
+  test_not(int_lambda);
+  test_not(bool_lambda0);
+  test_not(bool_lambda1, 5);
+  test_not(bool_lambda2, "foo", 5);
+
+  // functors
+  struct int_functor {
+    int operator()() { return 1; }
+  };
+  test_not(int_functor{});
+
+  struct bool_functor {
+    bool operator()() { return true; }
+    bool operator()(int a) { return true; }
+    bool operator()(const std::string& a, int b) { return true; }
+  };
+
+  test_not(bool_functor{});
+  test_not(bool_functor{}, 5);
+  test_not(bool_functor{}, "foo", 5);
+
+  // lvalue-only overload
+  struct lvalue_only_functor {
+    bool operator()() & { return true; } // no overload for rvalue
+  };
+  auto lvalue_result = ceph::not_fn(lvalue_only_functor{});
+  EXPECT_FALSE(lvalue_result());
+  // should not compile:
+  //   EXPECT_FALSE(std::move(lvalue_result)());
+
+  // rvalue-only overload
+  struct rvalue_only_functor {
+    bool operator()() && { return true; } // no overload for lvalue
+  };
+  EXPECT_FALSE(ceph::not_fn(rvalue_only_functor{})());
+  auto lvalue_functor = rvalue_only_functor{};
+  EXPECT_FALSE(ceph::not_fn(lvalue_functor)()); // lvalue functor, rvalue result
+  // should not compile:
+  //   auto lvalue_result2 = ceph::not_fn(rvalue_only_functor{});
+  //   EXPECT_FALSE(lvalue_result2());
+}
index fd85d55bd8a69ef08a3bb81cf7b1c7e91c4c953f..4502d0d0e69b379984f75437651bbeb8fbf0555d 100644 (file)
@@ -1110,7 +1110,11 @@ TEST(CrushWrapper, trim_roots_with_class) {
 
   int root_id = c.get_item_id("default");
   int clone_id;
-  ASSERT_EQ(c.device_class_clone(root_id, cl, &clone_id), 0);
+  map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+  set<int32_t> used_ids;
+
+  ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
+                                &clone_id), 0);
 
   ASSERT_TRUE(c.name_exists("default"));
   ASSERT_TRUE(c.name_exists("default~ssd"));
@@ -1144,9 +1148,12 @@ TEST(CrushWrapper, device_class_clone) {
 
   c.reweight(g_ceph_context);
 
+  map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+  set<int32_t> used_ids;
   int root_id = c.get_item_id("default");
   int clone_id;
-  ASSERT_EQ(c.device_class_clone(root_id, cl, &clone_id), 0);
+  ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
+                                &clone_id), 0);
   ASSERT_TRUE(c.name_exists("default~ssd"));
   ASSERT_EQ(clone_id, c.get_item_id("default~ssd"));
   ASSERT_TRUE(c.subtree_contains(clone_id, item));
@@ -1156,11 +1163,14 @@ TEST(CrushWrapper, device_class_clone) {
   ASSERT_EQ(c.get_item_weightf(clone_id), 1);
   // cloning again does nothing and returns the existing one
   int other_clone_id;
-  ASSERT_EQ(c.device_class_clone(root_id, cl, &other_clone_id), 0);
+  ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
+                                &other_clone_id), 0);
   ASSERT_EQ(clone_id, other_clone_id);
   // invalid arguments
-  ASSERT_EQ(c.device_class_clone(12345, cl, &other_clone_id), -ECHILD);
-  ASSERT_EQ(c.device_class_clone(root_id, 12345, &other_clone_id), -EBADF);
+  ASSERT_EQ(c.device_class_clone(12345, cl, old_class_bucket, used_ids,
+                                &other_clone_id), -ECHILD);
+  ASSERT_EQ(c.device_class_clone(root_id, 12345, old_class_bucket, used_ids,
+                                &other_clone_id), -EBADF);
 }
 
 TEST(CrushWrapper, split_id_class) {
@@ -1177,9 +1187,12 @@ TEST(CrushWrapper, split_id_class) {
   int class_id = c.get_or_create_class_id("ssd");
   c.class_map[item] = class_id;
 
+  map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+  set<int32_t> used_ids;
   int item_id = c.get_item_id("default");
   int clone_id;
-  ASSERT_EQ(c.device_class_clone(item_id, class_id, &clone_id), 0);
+  ASSERT_EQ(c.device_class_clone(item_id, class_id, old_class_bucket, used_ids,
+                                &clone_id), 0);
   int retrieved_item_id;
   int retrieved_class_id;
   ASSERT_EQ(c.split_id_class(clone_id, &retrieved_item_id, &retrieved_class_id), 0);
@@ -1191,7 +1204,7 @@ TEST(CrushWrapper, split_id_class) {
   ASSERT_EQ(-1, retrieved_class_id);
 }
 
-TEST(CrushWrapper, populate_and_cleanup_classes) {
+TEST(CrushWrapper, populate_classes) {
   CrushWrapper c;
   c.create();
   c.set_type_name(1, "root");
@@ -1205,13 +1218,14 @@ TEST(CrushWrapper, populate_and_cleanup_classes) {
   int class_id = c.get_or_create_class_id("ssd");
   c.class_map[item] = class_id;
 
-  ASSERT_EQ(c.populate_classes(), 0);
+  map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+  ASSERT_EQ(c.populate_classes(old_class_bucket), 0);
 
   ASSERT_TRUE(c.name_exists("default~ssd"));
 
-  c.class_bucket.clear();
-  ASSERT_EQ(c.cleanup_classes(), 0);
-  ASSERT_FALSE(c.name_exists("default~ssd"));
+  old_class_bucket = c.class_bucket;
+  ASSERT_EQ(c.populate_classes(old_class_bucket), 0);
+  ASSERT_EQ(old_class_bucket, c.class_bucket);
 }
 
 TEST(CrushWrapper, remove_class_name) {
index ee0bd26223e2d5d16729bb1ac625d0e21a7a3222..5f546918788c1e04ad3b9f052b734b00b3bce053 100755 (executable)
@@ -29,8 +29,8 @@ test_object() {
     local failed=0
     local numtests=0
 
-    tmp1=`mktemp /tmp/typ-XXXXXXXXX`
-    tmp2=`mktemp /tmp/typ-XXXXXXXXX`
+    tmp1=`mktemp /tmp/test_object_1-XXXXXXXXX`
+    tmp2=`mktemp /tmp/test_object_2-XXXXXXXXX`
 
     rm -f $output_file
     if $CEPH_DENCODER type $type 2>/dev/null; then
@@ -62,6 +62,7 @@ test_object() {
         fi
 
         if [ "$iv" = "$version" ]; then
+          rm -rf $tmp1 $tmp2
           break
         fi
       done
@@ -69,6 +70,7 @@ test_object() {
       if [ -n "$incompat" ]; then
         if [ -z "$incompat_paths" ]; then
           echo "skipping incompat $type version $arversion, changed at $incompat < code $myversion"
+          rm -rf $tmp1 $tmp2
          return
         else
           # If we are ignoring not whole type, but objects that are in $incompat_path,
@@ -132,14 +134,15 @@ test_object() {
           failed=$(($failed + 1))
         fi
         numtests=$(($numtests + 1))
+       rm -f $tmp1 $tmp2
       done
     else
       echo "skipping unrecognized type $type"
+      rm -f $tmp1 $tmp2
     fi
 
     echo "failed=$failed" > $output_file
     echo "numtests=$numtests" >> $output_file
-    rm -f $tmp1 $tmp2
 }
 
 waitall() { # PID...
@@ -197,7 +200,7 @@ else
   max_parallel_jobs=${MAX_PARALLEL_JOBS:-$(nproc)}
 fi
 
-output_file=`mktemp /tmp/typ-XXXXXXXXX`
+output_file=`mktemp /tmp/output_file-XXXXXXXXX`
 running_jobs=0
 
 for arversion in `ls $dir/archive | sort -n`; do
@@ -219,6 +222,7 @@ for arversion in `ls $dir/archive | sort -n`; do
     if [ "$running_jobs" -eq "$max_parallel_jobs" ]; then
        do_join
     fi
+    rm -f ${output_file}*
   done
 done
 
index e37d3550ba61233ca6a4a4653fa2a8cc2cc76459..2182c8db4fc1a3471d3404b5b7fd14e18f82ee27 100644 (file)
@@ -13,9 +13,7 @@
 #include <string>
 
 class Context;
-class ContextWQ;
 class Mutex;
-class SafeTimer;
 
 namespace journal {
 
@@ -146,8 +144,10 @@ struct MockJournalerProxy {
     MockJournaler::get_instance().construct();
   }
 
-  MockJournalerProxy(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock,
-                     librados::IoCtx &header_ioctx, const std::string &journal_id,
+  template <typename WorkQueue, typename Timer>
+  MockJournalerProxy(WorkQueue *work_queue, Timer *timer, Mutex *timer_lock,
+                     librados::IoCtx &header_ioctx,
+                     const std::string &journal_id,
                      const std::string &client_id, const Settings&) {
     MockJournaler::get_instance().construct();
   }
index ff5064ba353e1244b8112c07cef680ebcbe3af43..d795b84683fc50d2a50b37e3a3f7bd50b2e3f26f 100644 (file)
@@ -68,7 +68,7 @@ public:
     return "";
   }
 
-  sem_t *m_sem;
+  sem_t *m_sem = nullptr;
   rados_t m_cluster;
   rados_ioctx_t m_ioctx;
   std::string m_pool_name;
@@ -131,7 +131,7 @@ public:
     return "";
   }
 
-  sem_t *m_sem;
+  sem_t *m_sem = nullptr;
   Rados m_cluster;
   IoCtx m_ioctx;
   std::string m_pool_name;
@@ -2570,7 +2570,7 @@ public:
     return "";
   }
 
-  sem_t *m_sem;
+  sem_t *m_sem = nullptr;
   Rados m_cluster;
   IoCtx m_ioctx;
   std::string m_pool_name;
index c76fb3cf8cb208ac4842c7b744e032ccbff5b5d5..c0f0c6f54556c5b3ec3210af263c0c9b7fd42f9e 100644 (file)
@@ -78,8 +78,8 @@ protected:
 private:
   TestMemIoCtxImpl(const TestMemIoCtxImpl&);
 
-  TestMemRadosClient *m_client;
-  TestMemCluster::Pool *m_pool;
+  TestMemRadosClient *m_client = nullptr;
+  TestMemCluster::Pool *m_pool = nullptr;
 
   void append_clone(bufferlist& src, bufferlist* dest);
   size_t clip_io(size_t off, size_t len, size_t bl_len);
index a73f2e634bb0ae92bfefb5b51705f05e03c0b4b7..2833d22e68a2629f17a9b8763ee22112773df589 100644 (file)
@@ -6,7 +6,6 @@ set(librbd_test
   test_internal.cc
   test_mirroring.cc
   test_BlockGuard.cc
-  test_Groups.cc
   test_MirroringWatcher.cc
   test_ObjectMap.cc
   test_Operations.cc
index 940edf6397b56c8f201fad6a7f80e47081b3ef50..85596ef575628d7b64a9018705b6e57b1de2f861 100644 (file)
@@ -2393,12 +2393,14 @@ test(void)
                        log4(OP_SKIPPED, OP_WRITESAME, offset, size);
                        goto out;
                }
+               break;
         case OP_COMPARE_AND_WRITE:
                 /* compare_and_write not implemented */
                 if (!ops->compare_and_write) {
                         log4(OP_SKIPPED, OP_COMPARE_AND_WRITE, offset, size);
                         goto out;
                 }
+               break;
        }
 
        switch (op) {
diff --git a/ceph/src/test/librbd/test_Groups.cc b/ceph/src/test/librbd/test_Groups.cc
deleted file mode 100644 (file)
index 39f3095..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-#include "test/librbd/test_fixture.h"
-#include "test/librbd/test_support.h"
-#include "include/int_types.h"
-#include "include/stringify.h"
-#include "include/rados/librados.h"
-#include "include/rbd/librbd.hpp"
-#include "common/Cond.h"
-#include "common/errno.h"
-#include "common/Mutex.h"
-#include "common/RWLock.h"
-#include "cls/lock/cls_lock_client.h"
-#include "cls/lock/cls_lock_types.h"
-#include "librbd/internal.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/WatchNotifyTypes.h"
-#include "librbd/io/AioCompletion.h"
-#include "librbd/io/ImageRequestWQ.h"
-#include "test/librados/test.h"
-#include "gtest/gtest.h"
-#include <boost/assign/std/set.hpp>
-#include <boost/assign/std/map.hpp>
-#include <boost/bind.hpp>
-#include <boost/scope_exit.hpp>
-#include <boost/thread/thread.hpp>
-#include <iostream>
-#include <map>
-#include <set>
-#include <sstream>
-#include <vector>
-
-using namespace ceph;
-using namespace boost::assign;
-using namespace librbd::watch_notify;
-
-void register_test_groups() {
-}
-
-class TestLibCG : public TestFixture {
-
-};
-
-TEST_F(TestLibCG, group_create)
-{
-  librados::IoCtx ioctx;
-  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
-
-  librbd::RBD rbd;
-  ASSERT_EQ(0, rbd.group_create(ioctx, "mygroup"));
-
-  vector<string> groups;
-  ASSERT_EQ(0, rbd.group_list(ioctx, &groups));
-  ASSERT_EQ(1U, groups.size());
-  ASSERT_EQ("mygroup", groups[0]);
-
-  ASSERT_EQ(0, rbd.group_remove(ioctx, "mygroup"));
-
-  groups.clear();
-  ASSERT_EQ(0, rbd.group_list(ioctx, &groups));
-  ASSERT_EQ(0U, groups.size());
-}
-
-TEST_F(TestLibCG, add_image)
-{
-  librados::IoCtx ioctx;
-  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
-
-  const char *group_name = "mycg";
-  const char *image_name = "myimage";
-  librbd::RBD rbd;
-  ASSERT_EQ(0, rbd.group_create(ioctx, group_name));
-  int order = 14;
-  ASSERT_EQ(0, rbd.create2(ioctx, image_name, 65535,
-                          RBD_FEATURE_LAYERING, &order)); // Specified features make image of new format.
-
-  ASSERT_EQ(0, rbd.group_image_add(ioctx, group_name, ioctx, image_name));
-
-  vector<librbd::group_image_status_t> images;
-  ASSERT_EQ(0, rbd.group_image_list(ioctx, group_name, &images));
-  ASSERT_EQ(1U, images.size());
-  ASSERT_EQ("myimage", images[0].name);
-  ASSERT_EQ(ioctx.get_id(), images[0].pool);
-
-  ASSERT_EQ(0, rbd.group_image_remove(ioctx, group_name, ioctx, image_name));
-
-  images.clear();
-  ASSERT_EQ(0, rbd.group_image_list(ioctx, group_name, &images));
-  ASSERT_EQ(0U, images.size());
-}
index 130a699a96b2167ccad340f23ef769363d778337..dd8d5a8cb0c63ca383fc836e2276bc21190bbc27 100644 (file)
@@ -17,6 +17,7 @@
 #include "librbd/io/ImageRequestWQ.h"
 #include "osdc/Striper.h"
 #include <boost/scope_exit.hpp>
+#include <boost/algorithm/string/predicate.hpp>
 #include <boost/assign/list_of.hpp>
 #include <utility>
 #include <vector>
@@ -1063,6 +1064,8 @@ TEST_F(TestInternal, TestCoR)
     ASSERT_EQ(TEST_IO_SIZE, image.write(itr->second, TEST_IO_SIZE, bl));
   }
 
+  ASSERT_EQ(0, image.flush());
+
   bufferlist readbl;
   printf("verify written data by reading\n");
   {
@@ -1074,15 +1077,18 @@ TEST_F(TestInternal, TestCoR)
 
   int64_t data_pool_id = image.get_data_pool_id();
   rados_ioctx_t d_ioctx;
-  rados_ioctx_create2(_cluster, data_pool_id, &d_ioctx);
+  ASSERT_EQ(0, rados_wait_for_latest_osdmap(_cluster));
+  ASSERT_EQ(0, rados_ioctx_create2(_cluster, data_pool_id, &d_ioctx));
+
+  std::string block_name_prefix = image.get_block_name_prefix() + ".";
 
   const char *entry;
   rados_list_ctx_t list_ctx;
   set<string> obj_checker;
   ASSERT_EQ(0, rados_nobjects_list_open(d_ioctx, &list_ctx));
   while (rados_nobjects_list_next(list_ctx, &entry, NULL, NULL) != -ENOENT) {
-    if (strstr(entry, info.block_name_prefix)) {
-      const char *block_name_suffix = entry + strlen(info.block_name_prefix) + 1;
+    if (boost::starts_with(entry, block_name_prefix)) {
+      const char *block_name_suffix = entry + block_name_prefix.length();
       obj_checker.insert(block_name_suffix);
     }
   }
@@ -1130,12 +1136,12 @@ TEST_F(TestInternal, TestCoR)
 
   printf("check whether child image has the same set of objects as parent\n");
   ASSERT_EQ(0, m_rbd.open(m_ioctx, image, clonename.c_str(), NULL));
-  ASSERT_EQ(0, image.stat(info, sizeof(info)));
+  block_name_prefix = image.get_block_name_prefix() + ".";
 
   ASSERT_EQ(0, rados_nobjects_list_open(d_ioctx, &list_ctx));
   while (rados_nobjects_list_next(list_ctx, &entry, NULL, NULL) != -ENOENT) {
-    if (strstr(entry, info.block_name_prefix)) {
-      const char *block_name_suffix = entry + strlen(info.block_name_prefix) + 1;
+    if (boost::starts_with(entry, block_name_prefix)) {
+      const char *block_name_suffix = entry + block_name_prefix.length();
       set<string>::iterator it = obj_checker.find(block_name_suffix);
       ASSERT_TRUE(it != obj_checker.end());
       obj_checker.erase(it);
@@ -1200,6 +1206,8 @@ TEST_F(TestInternal, FlattenNoEmptyObjects)
     ASSERT_EQ(TEST_IO_SIZE, image.write(itr->second, TEST_IO_SIZE, bl));
   }
 
+  ASSERT_EQ(0, image.flush());
+
   bufferlist readbl;
   printf("verify written data by reading\n");
   {
@@ -1211,15 +1219,18 @@ TEST_F(TestInternal, FlattenNoEmptyObjects)
 
   int64_t data_pool_id = image.get_data_pool_id();
   rados_ioctx_t d_ioctx;
-  rados_ioctx_create2(_cluster, data_pool_id, &d_ioctx);
+  ASSERT_EQ(0, rados_wait_for_latest_osdmap(_cluster));
+  ASSERT_EQ(0, rados_ioctx_create2(_cluster, data_pool_id, &d_ioctx));
+
+  std::string block_name_prefix = image.get_block_name_prefix() + ".";
 
   const char *entry;
   rados_list_ctx_t list_ctx;
   set<string> obj_checker;
   ASSERT_EQ(0, rados_nobjects_list_open(d_ioctx, &list_ctx));
   while (rados_nobjects_list_next(list_ctx, &entry, NULL, NULL) != -ENOENT) {
-    if (strstr(entry, info.block_name_prefix)) {
-      const char *block_name_suffix = entry + strlen(info.block_name_prefix) + 1;
+    if (boost::starts_with(entry, block_name_prefix)) {
+      const char *block_name_suffix = entry + block_name_prefix.length();
       obj_checker.insert(block_name_suffix);
     }
   }
@@ -1244,12 +1255,12 @@ TEST_F(TestInternal, FlattenNoEmptyObjects)
   ASSERT_EQ(0, image.flatten());
 
   printf("check whether child image has the same set of objects as parent\n");
-  ASSERT_EQ(0, image.stat(info, sizeof(info)));
+  block_name_prefix = image.get_block_name_prefix() + ".";
 
   ASSERT_EQ(0, rados_nobjects_list_open(d_ioctx, &list_ctx));
   while (rados_nobjects_list_next(list_ctx, &entry, NULL, NULL) != -ENOENT) {
-    if (strstr(entry, info.block_name_prefix)) {
-      const char *block_name_suffix = entry + strlen(info.block_name_prefix) + 1;
+    if (boost::starts_with(entry, block_name_prefix)) {
+      const char *block_name_suffix = entry + block_name_prefix.length();
       set<string>::iterator it = obj_checker.find(block_name_suffix);
       ASSERT_TRUE(it != obj_checker.end());
       obj_checker.erase(it);
index d1184bbd8b8a81e9658e94c2c5ba81047e90f900..92a9b5c2cc112d3616771b2404bd74db694f71d0 100644 (file)
@@ -4457,7 +4457,7 @@ TEST_F(TestLibRBD, Metadata)
   ASSERT_STREQ(vals + strlen(vals) + 1, "value2");
 
   ASSERT_EQ(0, rbd_metadata_remove(image1, "key1"));
-  ASSERT_EQ(0, rbd_metadata_remove(image1, "key3"));
+  ASSERT_EQ(-ENOENT, rbd_metadata_remove(image1, "key3"));
   value_len = sizeof(value);
   ASSERT_EQ(-ENOENT, rbd_metadata_get(image1, "key3", value, &value_len));
   ASSERT_EQ(0, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals,
@@ -4623,7 +4623,7 @@ TEST_F(TestLibRBD, MetadataPP)
 
   pairs.clear();
   ASSERT_EQ(0, image1.metadata_remove("key1"));
-  ASSERT_EQ(0, image1.metadata_remove("key3"));
+  ASSERT_EQ(-ENOENT, image1.metadata_remove("key3"));
   ASSERT_TRUE(image1.metadata_get("key3", &value) < 0);
   ASSERT_EQ(0, image1.metadata_list("", 0, &pairs));
   ASSERT_EQ(1U, pairs.size());
index 0aa6aeeb81c9603ac4d07b1569fa97aa40292007..be6145c4b2b3edfa89e00b903848b460b4308e53 100644 (file)
@@ -10,7 +10,6 @@
 
 extern void register_test_librbd();
 #ifdef TEST_LIBRBD_INTERNALS
-extern void register_test_groups();
 extern void register_test_image_watcher();
 extern void register_test_internal();
 extern void register_test_journal_entries();
@@ -25,7 +24,6 @@ int main(int argc, char **argv)
 {
   register_test_librbd();
 #ifdef TEST_LIBRBD_INTERNALS
-  register_test_groups();
   register_test_image_watcher();
   register_test_internal();
   register_test_journal_entries();
index 126bd45dee1593293430ccc88178de6c77bbcbc9..38ecfca7f37814fe4098b00f5916924d8fae3874 100644 (file)
@@ -260,3 +260,30 @@ TEST(MonCap, CommandRegEx) {
   ASSERT_FALSE(cap.is_capable(nullptr, CEPH_ENTITY_TYPE_OSD, name, "",
                               "abc", {{"arg", ""}}, true, true, true));
 }
+
+TEST(MonCap, ProfileBootstrapRBD) {
+  MonCap cap;
+  ASSERT_FALSE(cap.is_allow_all());
+  ASSERT_TRUE(cap.parse("profile bootstrap-rbd", NULL));
+
+  EntityName name;
+  name.from_str("mon.a");
+  ASSERT_TRUE(cap.is_capable(nullptr, CEPH_ENTITY_TYPE_MON, name, "",
+                             "auth get-or-create", {
+                               {"entity", "client.rbd"},
+                               {"caps_mon", "profile rbd"},
+                               {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"},
+                             }, true, true, true));
+  ASSERT_FALSE(cap.is_capable(nullptr, CEPH_ENTITY_TYPE_MON, name, "",
+                              "auth get-or-create", {
+                                {"entity", "client.rbd"},
+                                {"caps_mon", "allow *"},
+                                {"caps_osd", "profile rbd"},
+                              }, true, true, true));
+  ASSERT_FALSE(cap.is_capable(nullptr, CEPH_ENTITY_TYPE_MON, name, "",
+                              "auth get-or-create", {
+                                {"entity", "client.rbd"},
+                                {"caps_mon", "profile rbd"},
+                                {"caps_osd", "profile rbd pool=foo, allow *, profile rbd-read-only"},
+                              }, true, true, true));
+}
index a372ff602794050ab3d12186f09ca6aa0364e966..66fab30d07ac20d562fe06cae1eb0333dccf49ed 100644 (file)
@@ -63,6 +63,7 @@ public:
   explicit MonClientHelper(CephContext *cct_)
     : Dispatcher(cct_),
       cct(cct_),
+      msg(NULL),
       monc(cct_),
       lock("mon-msg-test::lock")
   { }
index fb7a0c9f97f1779efd8e1e5affd9d520a8083e52..77919999117f49e7c3a7d165ff9c3f36f0bb0572 100644 (file)
@@ -452,6 +452,10 @@ TEST_F(TestLFNIndex, get_mangled_name) {
 
 int main(int argc, char **argv) {
   int fd = ::creat("detect", 0600);
+  if (fd < 0){
+    cerr << "failed to create file detect" << std::endl;
+    return EXIT_FAILURE;
+  }
   int ret = chain_fsetxattr(fd, "user.test", "A", 1);
   ::close(fd);
   ::unlink("detect");
index 70a71328a848d77d1300795ed4f2f42cb13766e9..bf29f87c4e55fda6d174835da9f36b3ade15f24e 100644 (file)
@@ -93,6 +93,16 @@ struct PGLogTestBase {
     e.reqid = reqid;
     return e;
   }
+  static pg_log_entry_t mk_ple_err(
+    const hobject_t &hoid, eversion_t v, osd_reqid_t reqid) {
+    pg_log_entry_t e;
+    e.op = pg_log_entry_t::ERROR;
+    e.soid = hoid;
+    e.version = v;
+    e.prior_version = eversion_t(0, 0);
+    e.reqid = reqid;
+    return e;
+  }
   static pg_log_entry_t mk_ple_mod(
     const hobject_t &hoid, eversion_t v, eversion_t pv) {
     return mk_ple_mod(hoid, v, pv, osd_reqid_t());
@@ -109,6 +119,10 @@ struct PGLogTestBase {
     const hobject_t &hoid, eversion_t v, eversion_t pv) {
     return mk_ple_dt_rb(hoid, v, pv, osd_reqid_t());
   }
+  static pg_log_entry_t mk_ple_err(
+    const hobject_t &hoid, eversion_t v) {
+    return mk_ple_err(hoid, v, osd_reqid_t());
+  }
 }; // PGLogTestBase
 
 
@@ -2909,6 +2923,62 @@ TEST_F(PGLogTrimTest, TestGetRequest) {
   EXPECT_FALSE(result);
 }
 
+TEST_F(PGLogTest, _merge_object_divergent_entries) {
+  {
+    // Test for issue 20843
+    clear();
+    hobject_t hoid(object_t(/*name*/"notify.7"),
+                   /*key*/string(""),
+                   /*snap*/7,
+                   /*hash*/77,
+                   /*pool*/5,
+                   /*nspace*/string(""));
+    mempool::osd_pglog::list<pg_log_entry_t> orig_entries;
+    orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952)));
+    orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 958)));
+    orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 959)));
+    orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 960), eversion_t(8336, 957)));
+    log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070)));
+    missing.add(hoid,
+                /*need*/eversion_t(8971, 1070),
+                /*have*/eversion_t(8336, 952),
+                false);
+    pg_info_t oinfo;
+    LogHandler rollbacker;
+    _merge_object_divergent_entries(log, hoid,
+                                    orig_entries, oinfo,
+                                    log.get_can_rollback_to(),
+                                    missing, &rollbacker,
+                                    this);
+    // No core dump
+  }
+  {
+    // skip leading error entries
+    clear();
+    hobject_t hoid(object_t(/*name*/"notify.7"),
+                   /*key*/string(""),
+                   /*snap*/7,
+                   /*hash*/77,
+                   /*pool*/5,
+                   /*nspace*/string(""));
+    mempool::osd_pglog::list<pg_log_entry_t> orig_entries;
+    orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 956)));
+    orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952)));
+    log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070)));
+    missing.add(hoid,
+                /*need*/eversion_t(8971, 1070),
+                /*have*/eversion_t(8336, 952),
+                false);
+    pg_info_t oinfo;
+    LogHandler rollbacker;
+    _merge_object_divergent_entries(log, hoid,
+                                    orig_entries, oinfo,
+                                    log.get_can_rollback_to(),
+                                    missing, &rollbacker,
+                                    this);
+    // No core dump
+  }
+}
 
 // Local Variables:
 // compile-command: "cd ../.. ; make unittest_pglog ; ./unittest_pglog --log-to-stderr=true  --debug-osd=20 # --gtest_filter=*.* "
index 5e8e18d19043366503cda6b27e8fff1a96f29fa4..e72045a5918887c5f56d6e8658aa7665d7f53f0f 100644 (file)
@@ -42,6 +42,7 @@ def setup_module():
     rados.create_pool(pool_name)
     global ioctx
     ioctx = rados.open_ioctx(pool_name)
+    ioctx.application_enable('rbd')
     global features
     features = os.getenv("RBD_FEATURES")
     features = int(features) if features is not None else 61
@@ -949,6 +950,7 @@ class TestClone(object):
         pool_name2 = get_temp_pool_name()
         rados.create_pool(pool_name2)
         other_ioctx = rados.open_ioctx(pool_name2)
+        other_ioctx.application_enable('rbd')
 
         # ...with a clone of the same parent
         other_clone_name = get_temp_image_name()
diff --git a/ceph/src/test/rbd-ggate.sh b/ceph/src/test/rbd-ggate.sh
new file mode 100755 (executable)
index 0000000..397a9ae
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash -ex
+#
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $(dirname $0)/detect-build-env-vars.sh
+
+test `uname` = FreeBSD
+
+CEPH_CLI_TEST_DUP_COMMAND=1 \
+MON=1 OSD=3 MDS=0 MGR=1 CEPH_PORT=7206 $CEPH_ROOT/src/test/vstart_wrapper.sh \
+    $CEPH_ROOT/qa/workunits/rbd/rbd-ggate.sh \
index ba5e13f3e1f7eb03c5a11e29862e84cbc53c2144..47029cd9034cfbe1533dd37be9da3c90202a57dc 100644 (file)
@@ -26,7 +26,9 @@ add_executable(unittest_rbd_mirror
   image_replayer/test_mock_BootstrapRequest.cc
   image_replayer/test_mock_CreateImageRequest.cc
   image_replayer/test_mock_EventPreprocessor.cc
+  image_replayer/test_mock_GetMirrorImageIdRequest.cc
   image_replayer/test_mock_PrepareLocalImageRequest.cc
+  image_replayer/test_mock_PrepareRemoteImageRequest.cc
   image_sync/test_mock_ImageCopyRequest.cc
   image_sync/test_mock_ObjectCopyRequest.cc
   image_sync/test_mock_SnapshotCopyRequest.cc
index c7b995387e1d3b8e8f0dcf9d3064c028fb7ba008..c372a4b9fc3970ca15403304d019e4e2d99ef78e 100644 (file)
@@ -36,6 +36,18 @@ struct TypeTraits<librbd::MockTestImageCtx> {
 };
 
 } // namespace journal
+
+namespace util {
+
+static std::string s_image_id;
+
+template <>
+std::string generate_image_id<MockTestImageCtx>(librados::IoCtx&) {
+  assert(!s_image_id.empty());
+  return s_image_id;
+}
+
+} // namespace util
 } // namespace librbd
 
 namespace rbd {
@@ -114,7 +126,6 @@ struct CloseImageRequest<librbd::MockTestImageCtx> {
 template<>
 struct CreateImageRequest<librbd::MockTestImageCtx> {
   static CreateImageRequest* s_instance;
-  std::string *local_image_id = nullptr;
   Context *on_finish = nullptr;
 
   static CreateImageRequest* create(librados::IoCtx &local_io_ctx,
@@ -122,12 +133,12 @@ struct CreateImageRequest<librbd::MockTestImageCtx> {
                                     const std::string &global_image_id,
                                     const std::string &remote_mirror_uuid,
                                     const std::string &local_image_name,
+                                   const std::string &local_image_id,
                                     librbd::MockTestImageCtx *remote_image_ctx,
-                                   std::string *local_image_id,
                                     Context *on_finish) {
     assert(s_instance != nullptr);
-    s_instance->local_image_id = local_image_id;
     s_instance->on_finish = on_finish;
+    s_instance->construct(local_image_id);
     return s_instance;
   }
 
@@ -139,6 +150,7 @@ struct CreateImageRequest<librbd::MockTestImageCtx> {
     s_instance = nullptr;
   }
 
+  MOCK_METHOD1(construct, void(const std::string&));
   MOCK_METHOD0(send, void());
 };
 
@@ -325,6 +337,14 @@ public:
                                   })));
   }
 
+  void expect_journaler_unregister_client(::journal::MockJournaler &mock_journaler,
+                                          int r) {
+    EXPECT_CALL(mock_journaler, unregister_client(_))
+      .WillOnce(Invoke([this, r](Context *on_finish) {
+                  m_threads->work_queue->queue(on_finish, r);
+                }));
+  }
+
   void expect_journaler_update_client(::journal::MockJournaler &mock_journaler,
                                       const librbd::journal::ClientData &client_data,
                                       int r) {
@@ -399,9 +419,9 @@ public:
 
   void expect_create_image(MockCreateImageRequest &mock_create_image_request,
                            const std::string &image_id, int r) {
+    EXPECT_CALL(mock_create_image_request, construct(image_id));
     EXPECT_CALL(mock_create_image_request, send())
-      .WillOnce(Invoke([this, &mock_create_image_request, image_id, r]() {
-          *mock_create_image_request.local_image_id = image_id;
+      .WillOnce(Invoke([this, &mock_create_image_request, r]() {
           m_threads->work_queue->queue(mock_create_image_request.on_finish, r);
         }));
   }
@@ -468,6 +488,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, NonPrimaryRemoteSyncingState) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
@@ -479,12 +506,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, NonPrimaryRemoteSyncingState) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, false, 0);
 
@@ -521,6 +543,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, RemoteDemotePromote) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
@@ -532,12 +561,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, RemoteDemotePromote) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
@@ -595,6 +619,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, MultipleRemoteDemotePromotes) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
@@ -606,12 +637,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, MultipleRemoteDemotePromotes) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
@@ -679,6 +705,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, LocalDemoteRemotePromote) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
@@ -690,12 +723,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, LocalDemoteRemotePromote) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
@@ -751,6 +779,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, SplitBrainForcePromote) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
@@ -762,12 +797,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, SplitBrainForcePromote) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
@@ -823,6 +853,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, ResyncRequested) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
@@ -834,12 +871,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, ResyncRequested) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
@@ -883,6 +915,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemote) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   client = {};
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
@@ -893,19 +932,23 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemote) {
   client_data.client_meta = mirror_peer_client_meta;
   expect_journaler_register_client(mock_journaler, client_data, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
-  // create the local image
+  // update client state back to syncing
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   mock_local_image_ctx.journal = &mock_journal;
 
+  librbd::util::s_image_id = mock_local_image_ctx.id;
+  mirror_peer_client_meta = {mock_local_image_ctx.id};
+  mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
+  client_data.client_meta = mirror_peer_client_meta;
+  client.data.clear();
+  ::encode(client_data, client.data);
+  expect_journaler_update_client(mock_journaler, client_data, 0);
+
+  // create the local image
   MockCreateImageRequest mock_create_image_request;
   expect_create_image(mock_create_image_request, mock_local_image_ctx.id, 0);
 
@@ -915,14 +958,6 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemote) {
                           mock_local_image_ctx.id, &mock_local_image_ctx, 0);
   expect_is_resync_requested(mock_journal, false, 0);
 
-  // update client state back to syncing
-  mirror_peer_client_meta = {mock_local_image_ctx.id};
-  mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
-  client_data.client_meta = mirror_peer_client_meta;
-  client.data.clear();
-  ::encode(client_data, client.data);
-  expect_journaler_update_client(mock_journaler, client_data, 0);
-
   // sync the remote image to the local image
   MockImageSync mock_image_sync;
   expect_image_sync(mock_image_sync, 0);
@@ -955,6 +990,13 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemoteLocalDeleted) {
                               librbd::Journal<>::IMAGE_CLIENT_ID,
                               client, 0);
 
+  // open the remote image
+  librbd::MockJournal mock_journal;
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  MockOpenImageRequest mock_open_image_request;
+  expect_open_image(mock_open_image_request, m_remote_io_ctx,
+                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+
   // lookup local peer in remote journal
   librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{
     "missing image id"};
@@ -965,12 +1007,7 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemoteLocalDeleted) {
   expect_journaler_get_client(mock_journaler, "local mirror uuid",
                               client, 0);
 
-  // open the remote image
-  librbd::MockJournal mock_journal;
-  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  MockOpenImageRequest mock_open_image_request;
-  expect_open_image(mock_open_image_request, m_remote_io_ctx,
-                    mock_remote_image_ctx.id, mock_remote_image_ctx, 0);
+  // test if remote image is primary
   MockIsPrimaryRequest mock_is_primary_request;
   expect_is_primary(mock_is_primary_request, true, 0);
 
@@ -979,10 +1016,28 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemoteLocalDeleted) {
   expect_open_local_image(mock_open_local_image_request, m_local_io_ctx,
                           "missing image id", nullptr, -ENOENT);
 
-  // create the missing local image
+  // re-register the client
+  expect_journaler_unregister_client(mock_journaler, 0);
+  mirror_peer_client_meta = {};
+  client_data.client_meta = mirror_peer_client_meta;
+  expect_journaler_register_client(mock_journaler, client_data, 0);
+
+  // test if remote image is primary
+  expect_is_primary(mock_is_primary_request, true, 0);
+
+  // update client state back to syncing
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   mock_local_image_ctx.journal = &mock_journal;
 
+  librbd::util::s_image_id = mock_local_image_ctx.id;
+  mirror_peer_client_meta = {mock_local_image_ctx.id};
+  mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
+  client_data.client_meta = mirror_peer_client_meta;
+  client.data.clear();
+  ::encode(client_data, client.data);
+  expect_journaler_update_client(mock_journaler, client_data, 0);
+
+  // create the missing local image
   MockCreateImageRequest mock_create_image_request;
   expect_create_image(mock_create_image_request, mock_local_image_ctx.id, 0);
 
@@ -991,14 +1046,6 @@ TEST_F(TestMockImageReplayerBootstrapRequest, PrimaryRemoteLocalDeleted) {
                           mock_local_image_ctx.id, &mock_local_image_ctx, 0);
   expect_is_resync_requested(mock_journal, false, 0);
 
-  // update client state back to syncing
-  mirror_peer_client_meta = {mock_local_image_ctx.id};
-  mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
-  client_data.client_meta = mirror_peer_client_meta;
-  client.data.clear();
-  ::encode(client_data, client.data);
-  expect_journaler_update_client(mock_journaler, client_data, 0);
-
   // sync the remote image to the local image
   MockImageSync mock_image_sync;
   expect_image_sync(mock_image_sync, 0);
index 9abbbcbfdf0ab133a6c9adfef6b014f4a551f4fc..df0c4b2eac6f89c93b1ecee3196dcbf1c59d4a09 100644 (file)
@@ -313,13 +313,13 @@ public:
   MockCreateImageRequest *create_request(const std::string &global_image_id,
                                          const std::string &remote_mirror_uuid,
                                          const std::string &local_image_name,
+                                        const std::string &local_image_id,
                                          librbd::MockTestImageCtx &mock_remote_image_ctx,
-                                        std::string *local_image_id,
                                          Context *on_finish) {
     return new MockCreateImageRequest(m_local_io_ctx, m_threads->work_queue,
                                       global_image_id, remote_mirror_uuid,
-                                      local_image_name, &mock_remote_image_ctx,
-                                      local_image_id, on_finish);
+                                      local_image_name, local_image_id,
+                                      &mock_remote_image_ctx, on_finish);
   }
 
   librbd::ImageCtx *m_remote_image_ctx;
@@ -333,14 +333,11 @@ TEST_F(TestMockImageReplayerCreateImageRequest, Create) {
   expect_create_image(mock_create_request, m_local_io_ctx, 0);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
-                                                   mock_remote_image_ctx,
-                                                  &local_image_id, &ctx);
+                                                   "image name", "101241a7c4c9",
+                                                   mock_remote_image_ctx, &ctx);
   request->send();
   ASSERT_EQ(0, ctx.wait());
-  ASSERT_FALSE(local_image_id.empty());
 }
 
 TEST_F(TestMockImageReplayerCreateImageRequest, CreateError) {
@@ -351,11 +348,9 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CreateError) {
   expect_create_image(mock_create_request, m_local_io_ctx, -EINVAL);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
-                                                   mock_remote_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   "image name", "101241a7c4c9",
+                                                   mock_remote_image_ctx, &ctx);
   request->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -396,14 +391,12 @@ TEST_F(TestMockImageReplayerCreateImageRequest, Clone) {
   expect_close_image(mock_close_image_request, mock_remote_parent_image_ctx, 0);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(0, ctx.wait());
-  ASSERT_FALSE(local_image_id.empty());
 }
 
 TEST_F(TestMockImageReplayerCreateImageRequest, CloneGetGlobalImageIdError) {
@@ -423,11 +416,10 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneGetGlobalImageIdError) {
   expect_get_parent_global_image_id(m_remote_io_ctx, "global uuid", -ENOENT);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -450,11 +442,10 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneGetLocalParentImageIdError)
   expect_mirror_image_get_image_id(m_local_io_ctx, "local parent id", -ENOENT);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -482,11 +473,10 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneOpenRemoteParentError) {
                     m_remote_image_ctx->id, mock_remote_parent_image_ctx, -ENOENT);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -524,11 +514,10 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneOpenLocalParentError) {
   expect_close_image(mock_close_image_request, mock_remote_parent_image_ctx, 0);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -568,11 +557,10 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneSnapSetError) {
   expect_close_image(mock_close_image_request, mock_remote_parent_image_ctx, 0);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(-ENOENT, ctx.wait());
 }
@@ -613,11 +601,10 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneError) {
   expect_close_image(mock_close_image_request, mock_remote_parent_image_ctx, 0);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(-EINVAL, ctx.wait());
 }
@@ -658,14 +645,12 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneLocalParentCloseError) {
   expect_close_image(mock_close_image_request, mock_remote_parent_image_ctx, 0);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(0, ctx.wait());
-  ASSERT_FALSE(local_image_id.empty());
 }
 
 TEST_F(TestMockImageReplayerCreateImageRequest, CloneRemoteParentCloseError) {
@@ -704,14 +689,12 @@ TEST_F(TestMockImageReplayerCreateImageRequest, CloneRemoteParentCloseError) {
   expect_close_image(mock_close_image_request, mock_remote_parent_image_ctx, -EINVAL);
 
   C_SaferCond ctx;
-  std::string local_image_id;
   MockCreateImageRequest *request = create_request("global uuid", "remote uuid",
-                                                   "image name",
+                                                   "image name", "101241a7c4c9",
                                                    mock_remote_clone_image_ctx,
-                                                   &local_image_id, &ctx);
+                                                   &ctx);
   request->send();
   ASSERT_EQ(0, ctx.wait());
-  ASSERT_FALSE(local_image_id.empty());
 }
 
 } // namespace image_replayer
diff --git a/ceph/src/test/rbd_mirror/image_replayer/test_mock_GetMirrorImageIdRequest.cc b/ceph/src/test/rbd_mirror/image_replayer/test_mock_GetMirrorImageIdRequest.cc
new file mode 100644 (file)
index 0000000..eaa9882
--- /dev/null
@@ -0,0 +1,106 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/rbd_mirror/test_mock_fixture.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "test/journal/mock/MockJournaler.h"
+#include "test/librados_test_stub/MockTestMemIoCtxImpl.h"
+#include "test/librbd/mock/MockImageCtx.h"
+#include "test/librbd/mock/MockJournal.h"
+
+namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+} // namespace librbd
+
+// template definitions
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc"
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using ::testing::_;
+using ::testing::DoAll;
+using ::testing::InSequence;
+using ::testing::Invoke;
+using ::testing::Return;
+using ::testing::StrEq;
+using ::testing::WithArg;
+using ::testing::WithArgs;
+
+class TestMockImageReplayerGetMirrorImageIdRequest : public TestMockFixture {
+public:
+  typedef GetMirrorImageIdRequest<librbd::MockTestImageCtx> MockGetMirrorImageIdRequest;
+
+  void expect_mirror_image_get_image_id(librados::IoCtx &io_ctx,
+                                        const std::string &image_id, int r) {
+    bufferlist bl;
+    ::encode(image_id, bl);
+
+    EXPECT_CALL(get_mock_io_ctx(io_ctx),
+                exec(RBD_MIRRORING, _, StrEq("rbd"), StrEq("mirror_image_get_image_id"), _, _, _))
+      .WillOnce(DoAll(WithArg<5>(Invoke([bl](bufferlist *out_bl) {
+                                          *out_bl = bl;
+                                        })),
+                      Return(r)));
+  }
+
+};
+
+TEST_F(TestMockImageReplayerGetMirrorImageIdRequest, Success) {
+  InSequence seq;
+  expect_mirror_image_get_image_id(m_local_io_ctx, "image id", 0);
+
+  std::string image_id;
+  C_SaferCond ctx;
+  auto req = MockGetMirrorImageIdRequest::create(m_local_io_ctx,
+                                                 "global image id",
+                                                 &image_id, &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+  ASSERT_EQ(std::string("image id"), image_id);
+}
+
+TEST_F(TestMockImageReplayerGetMirrorImageIdRequest, MirrorImageIdDNE) {
+  InSequence seq;
+  expect_mirror_image_get_image_id(m_local_io_ctx, "", -ENOENT);
+
+  std::string image_id;
+  C_SaferCond ctx;
+  auto req = MockGetMirrorImageIdRequest::create(m_local_io_ctx,
+                                                 "global image id",
+                                                 &image_id, &ctx);
+  req->send();
+
+  ASSERT_EQ(-ENOENT, ctx.wait());
+}
+
+TEST_F(TestMockImageReplayerGetMirrorImageIdRequest, MirrorImageIdError) {
+  InSequence seq;
+  expect_mirror_image_get_image_id(m_local_io_ctx, "", -EINVAL);
+
+  std::string image_id;
+  C_SaferCond ctx;
+  auto req = MockGetMirrorImageIdRequest::create(m_local_io_ctx,
+                                                 "global image id",
+                                                 &image_id, &ctx);
+  req->send();
+
+  ASSERT_EQ(-EINVAL, ctx.wait());
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
index b79d4de9e18d5f468225595e56d001e2a2762a11..279debead9bf0c39b8a626b2652c1d85cf42013c 100644 (file)
@@ -4,6 +4,7 @@
 #include "test/rbd_mirror/test_mock_fixture.h"
 #include "cls/rbd/cls_rbd_types.h"
 #include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
 #include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
 #include "test/journal/mock/MockJournaler.h"
 #include "test/librados_test_stub/MockTestMemIoCtxImpl.h"
@@ -23,6 +24,39 @@ struct MockTestImageCtx : public librbd::MockImageCtx {
 } // anonymous namespace
 } // namespace librbd
 
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <>
+struct GetMirrorImageIdRequest<librbd::MockTestImageCtx> {
+  static GetMirrorImageIdRequest* s_instance;
+  std::string* image_id = nullptr;
+  Context* on_finish = nullptr;
+
+  static GetMirrorImageIdRequest* create(librados::IoCtx& io_ctx,
+                                         const std::string& global_image_id,
+                                         std::string* image_id,
+                                         Context* on_finish) {
+    assert(s_instance != nullptr);
+    s_instance->image_id = image_id;
+    s_instance->on_finish = on_finish;
+    return s_instance;
+  }
+
+  GetMirrorImageIdRequest() {
+    s_instance = this;
+  }
+
+  MOCK_METHOD0(send, void());
+};
+
+GetMirrorImageIdRequest<librbd::MockTestImageCtx>* GetMirrorImageIdRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
 // template definitions
 #include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc"
 
@@ -42,18 +76,15 @@ using ::testing::WithArgs;
 class TestMockImageReplayerPrepareLocalImageRequest : public TestMockFixture {
 public:
   typedef PrepareLocalImageRequest<librbd::MockTestImageCtx> MockPrepareLocalImageRequest;
-
-  void expect_mirror_image_get_image_id(librados::IoCtx &io_ctx,
-                                        const std::string &image_id, int r) {
-    bufferlist bl;
-    ::encode(image_id, bl);
-
-    EXPECT_CALL(get_mock_io_ctx(io_ctx),
-                exec(RBD_MIRRORING, _, StrEq("rbd"), StrEq("mirror_image_get_image_id"), _, _, _))
-      .WillOnce(DoAll(WithArg<5>(Invoke([bl](bufferlist *out_bl) {
-                                          *out_bl = bl;
-                                        })),
-                      Return(r)));
+  typedef GetMirrorImageIdRequest<librbd::MockTestImageCtx> MockGetMirrorImageIdRequest;
+
+  void expect_get_mirror_image_id(MockGetMirrorImageIdRequest& mock_get_mirror_image_id_request,
+                                  const std::string& image_id, int r) {
+    EXPECT_CALL(mock_get_mirror_image_id_request, send())
+      .WillOnce(Invoke([&mock_get_mirror_image_id_request, image_id, r]() {
+                  *mock_get_mirror_image_id_request.image_id = image_id;
+                  mock_get_mirror_image_id_request.on_finish->complete(r);
+                }));
   }
 
   void expect_mirror_image_get(librados::IoCtx &io_ctx,
@@ -88,7 +119,9 @@ public:
 
 TEST_F(TestMockImageReplayerPrepareLocalImageRequest, Success) {
   InSequence seq;
-  expect_mirror_image_get_image_id(m_local_io_ctx, "local image id", 0);
+  MockGetMirrorImageIdRequest mock_get_mirror_image_id_request;
+  expect_get_mirror_image_id(mock_get_mirror_image_id_request, "local image id",
+                             0);
   expect_mirror_image_get(m_local_io_ctx, cls::rbd::MIRROR_IMAGE_STATE_ENABLED,
                           "global image id", 0);
 
@@ -111,27 +144,10 @@ TEST_F(TestMockImageReplayerPrepareLocalImageRequest, Success) {
   ASSERT_EQ(std::string("remote mirror uuid"), tag_owner);
 }
 
-TEST_F(TestMockImageReplayerPrepareLocalImageRequest, MirrorImageIdDNE) {
-  InSequence seq;
-  expect_mirror_image_get_image_id(m_local_io_ctx, "", -ENOENT);
-
-  std::string local_image_id;
-  std::string tag_owner;
-  C_SaferCond ctx;
-  auto req = MockPrepareLocalImageRequest::create(m_local_io_ctx,
-                                                  "global image id",
-                                                  &local_image_id,
-                                                  &tag_owner,
-                                                  m_threads->work_queue,
-                                                  &ctx);
-  req->send();
-
-  ASSERT_EQ(-ENOENT, ctx.wait());
-}
-
 TEST_F(TestMockImageReplayerPrepareLocalImageRequest, MirrorImageIdError) {
   InSequence seq;
-  expect_mirror_image_get_image_id(m_local_io_ctx, "", -EINVAL);
+  MockGetMirrorImageIdRequest mock_get_mirror_image_id_request;
+  expect_get_mirror_image_id(mock_get_mirror_image_id_request, "", -EINVAL);
 
   std::string local_image_id;
   std::string tag_owner;
@@ -149,7 +165,9 @@ TEST_F(TestMockImageReplayerPrepareLocalImageRequest, MirrorImageIdError) {
 
 TEST_F(TestMockImageReplayerPrepareLocalImageRequest, MirrorImageError) {
   InSequence seq;
-  expect_mirror_image_get_image_id(m_local_io_ctx, "local image id", 0);
+  MockGetMirrorImageIdRequest mock_get_mirror_image_id_request;
+  expect_get_mirror_image_id(mock_get_mirror_image_id_request, "local image id",
+                             0);
   expect_mirror_image_get(m_local_io_ctx, cls::rbd::MIRROR_IMAGE_STATE_DISABLED,
                           "", -EINVAL);
 
@@ -169,7 +187,9 @@ TEST_F(TestMockImageReplayerPrepareLocalImageRequest, MirrorImageError) {
 
 TEST_F(TestMockImageReplayerPrepareLocalImageRequest, TagOwnerError) {
   InSequence seq;
-  expect_mirror_image_get_image_id(m_local_io_ctx, "local image id", 0);
+  MockGetMirrorImageIdRequest mock_get_mirror_image_id_request;
+  expect_get_mirror_image_id(mock_get_mirror_image_id_request, "local image id",
+                             0);
   expect_mirror_image_get(m_local_io_ctx, cls::rbd::MIRROR_IMAGE_STATE_ENABLED,
                           "global image id", 0);
 
diff --git a/ceph/src/test/rbd_mirror/image_replayer/test_mock_PrepareRemoteImageRequest.cc b/ceph/src/test/rbd_mirror/image_replayer/test_mock_PrepareRemoteImageRequest.cc
new file mode 100644 (file)
index 0000000..7938118
--- /dev/null
@@ -0,0 +1,163 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/rbd_mirror/test_mock_fixture.h"
+#include "cls/rbd/cls_rbd_types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "test/librados_test_stub/MockTestMemIoCtxImpl.h"
+#include "test/librbd/mock/MockImageCtx.h"
+
+namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <>
+struct GetMirrorImageIdRequest<librbd::MockTestImageCtx> {
+  static GetMirrorImageIdRequest* s_instance;
+  std::string* image_id = nullptr;
+  Context* on_finish = nullptr;
+
+  static GetMirrorImageIdRequest* create(librados::IoCtx& io_ctx,
+                                         const std::string& global_image_id,
+                                         std::string* image_id,
+                                         Context* on_finish) {
+    assert(s_instance != nullptr);
+    s_instance->image_id = image_id;
+    s_instance->on_finish = on_finish;
+    return s_instance;
+  }
+
+  GetMirrorImageIdRequest() {
+    s_instance = this;
+  }
+
+  MOCK_METHOD0(send, void());
+};
+
+GetMirrorImageIdRequest<librbd::MockTestImageCtx>* GetMirrorImageIdRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+// template definitions
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc"
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using ::testing::_;
+using ::testing::DoAll;
+using ::testing::InSequence;
+using ::testing::Invoke;
+using ::testing::Return;
+using ::testing::StrEq;
+using ::testing::WithArg;
+
+class TestMockImageReplayerPrepareRemoteImageRequest : public TestMockFixture {
+public:
+  typedef PrepareRemoteImageRequest<librbd::MockTestImageCtx> MockPrepareRemoteImageRequest;
+  typedef GetMirrorImageIdRequest<librbd::MockTestImageCtx> MockGetMirrorImageIdRequest;
+
+  void expect_get_mirror_image_id(MockGetMirrorImageIdRequest& mock_get_mirror_image_id_request,
+                                  const std::string& image_id, int r) {
+    EXPECT_CALL(mock_get_mirror_image_id_request, send())
+      .WillOnce(Invoke([&mock_get_mirror_image_id_request, image_id, r]() {
+                  *mock_get_mirror_image_id_request.image_id = image_id;
+                  mock_get_mirror_image_id_request.on_finish->complete(r);
+                }));
+  }
+
+  void expect_mirror_uuid_get(librados::IoCtx &io_ctx,
+                              const std::string &mirror_uuid, int r) {
+    bufferlist bl;
+    ::encode(mirror_uuid, bl);
+
+    EXPECT_CALL(get_mock_io_ctx(io_ctx),
+                exec(RBD_MIRRORING, _, StrEq("rbd"), StrEq("mirror_uuid_get"), _, _, _))
+      .WillOnce(DoAll(WithArg<5>(Invoke([bl](bufferlist *out_bl) {
+                                          *out_bl = bl;
+                                        })),
+                      Return(r)));
+  }
+};
+
+TEST_F(TestMockImageReplayerPrepareRemoteImageRequest, Success) {
+  InSequence seq;
+  expect_mirror_uuid_get(m_remote_io_ctx, "remote mirror uuid", 0);
+  MockGetMirrorImageIdRequest mock_get_mirror_image_id_request;
+  expect_get_mirror_image_id(mock_get_mirror_image_id_request,
+                             "remote image id", 0);
+
+  std::string remote_mirror_uuid;
+  std::string remote_image_id;
+  C_SaferCond ctx;
+  auto req = MockPrepareRemoteImageRequest::create(m_remote_io_ctx,
+                                                   "global image id",
+                                                   &remote_mirror_uuid,
+                                                   &remote_image_id,
+                                                   &ctx);
+  req->send();
+
+  ASSERT_EQ(0, ctx.wait());
+  ASSERT_EQ(std::string("remote mirror uuid"), remote_mirror_uuid);
+  ASSERT_EQ(std::string("remote image id"), remote_image_id);
+}
+
+TEST_F(TestMockImageReplayerPrepareRemoteImageRequest, MirrorUuidError) {
+  InSequence seq;
+  expect_mirror_uuid_get(m_remote_io_ctx, "", -EINVAL);
+
+  std::string remote_mirror_uuid;
+  std::string remote_image_id;
+  C_SaferCond ctx;
+  auto req = MockPrepareRemoteImageRequest::create(m_remote_io_ctx,
+                                                   "global image id",
+                                                   &remote_mirror_uuid,
+                                                   &remote_image_id,
+                                                   &ctx);
+  req->send();
+
+  ASSERT_EQ(-EINVAL, ctx.wait());
+  ASSERT_EQ(std::string(""), remote_mirror_uuid);
+}
+
+TEST_F(TestMockImageReplayerPrepareRemoteImageRequest, MirrorImageIdError) {
+  InSequence seq;
+  expect_mirror_uuid_get(m_remote_io_ctx, "remote mirror uuid", 0);
+  MockGetMirrorImageIdRequest mock_get_mirror_image_id_request;
+  expect_get_mirror_image_id(mock_get_mirror_image_id_request, "", -EINVAL);
+
+  std::string remote_mirror_uuid;
+  std::string remote_image_id;
+  C_SaferCond ctx;
+  auto req = MockPrepareRemoteImageRequest::create(m_remote_io_ctx,
+                                                   "global image id",
+                                                   &remote_mirror_uuid,
+                                                   &remote_image_id,
+                                                   &ctx);
+  req->send();
+
+  ASSERT_EQ(-EINVAL, ctx.wait());
+  ASSERT_EQ(std::string("remote mirror uuid"), remote_mirror_uuid);
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
index 4926660b9ea36ced357245796b4f96b0f6542367..3de5fbcdbf5cc4e24a5c83fc2fba6bfc63a1300b 100644 (file)
@@ -10,7 +10,7 @@ struct Context;
 
 struct MockSafeTimer {
   MOCK_METHOD2(add_event_after, void(double, Context*));
-  MOCK_METHOD1(cancel_event, void(Context *));
+  MOCK_METHOD1(cancel_event, bool(Context *));
 };
 
 #endif // CEPH_MOCK_SAFE_TIMER_H
index 5ac673314233f717adecb0509bd21d5353a10d92..c06a4662532a45dc25ad04cee79c2c8fcdf26815 100644 (file)
@@ -17,7 +17,6 @@
 #include "cls/rbd/cls_rbd_types.h"
 #include "cls/rbd/cls_rbd_client.h"
 #include "tools/rbd_mirror/ImageDeleter.h"
-#include "tools/rbd_mirror/ImageReplayer.h"
 #include "tools/rbd_mirror/ServiceDaemon.h"
 #include "tools/rbd_mirror/Threads.h"
 #include "librbd/ImageCtx.h"
index 7b30e30cabf0df7239e7fcb5d7cd720069cb46c7..d8e159f5f3d4e540321f31c8f5d3229434102758 100644 (file)
@@ -115,6 +115,7 @@ public:
     EXPECT_EQ(0, librbd::create(m_remote_ioctx, m_image_name.c_str(), 1 << 22,
                                false, features, &order, 0, 0));
     m_remote_image_id = get_image_id(m_remote_ioctx, m_image_name);
+    m_global_image_id = get_global_image_id(m_remote_ioctx, m_remote_image_id);
 
     m_threads.reset(new rbd::mirror::Threads<>(reinterpret_cast<CephContext*>(
       m_local_ioctx.cct())));
@@ -148,9 +149,8 @@ public:
     m_replayer = new ImageReplayerT(
         m_threads.get(), m_image_deleter.get(), m_instance_watcher,
         rbd::mirror::RadosRef(new librados::Rados(m_local_ioctx)),
-        m_local_mirror_uuid, m_local_ioctx.get_id(), "global image id");
-    m_replayer->add_remote_image(m_remote_mirror_uuid, m_remote_image_id,
-                                 m_remote_ioctx);
+        m_local_mirror_uuid, m_local_ioctx.get_id(), m_global_image_id);
+    m_replayer->add_peer("peer uuid", m_remote_ioctx);
   }
 
   void start()
@@ -205,6 +205,14 @@ public:
     return id;
   }
 
+  std::string get_global_image_id(librados::IoCtx& io_ctx,
+                                  const std::string& image_id) {
+    cls::rbd::MirrorImage mirror_image;
+    EXPECT_EQ(0, librbd::cls_client::mirror_image_get(&io_ctx, image_id,
+                                                      &mirror_image));
+    return mirror_image.global_image_id;
+  }
+
   void open_image(librados::IoCtx &ioctx, const std::string &image_name,
                  bool readonly, librbd::ImageCtx **ictxp)
   {
@@ -385,6 +393,7 @@ public:
   std::string m_image_name;
   int64_t m_remote_pool_id;
   std::string m_remote_image_id;
+  std::string m_global_image_id;
   rbd::mirror::ImageReplayer<> *m_replayer;
   C_WatchCtx *m_watch_ctx;
   uint64_t m_watch_handle;
@@ -412,14 +421,7 @@ TEST_F(TestImageReplayer, BootstrapErrorLocalImageExists)
 
 TEST_F(TestImageReplayer, BootstrapErrorNoJournal)
 {
-  // disable remote image journaling
-  librbd::ImageCtx *ictx;
-  open_remote_image(&ictx);
-  uint64_t features;
-  ASSERT_EQ(0, librbd::get_features(ictx, &features));
-  ASSERT_EQ(0, ictx->operations->update_features(RBD_FEATURE_JOURNALING,
-                                                 false));
-  close_image(ictx);
+  ASSERT_EQ(0, librbd::Journal<>::remove(m_remote_ioctx, m_remote_image_id));
 
   create_replayer<>();
   C_SaferCond cond;
@@ -522,7 +524,7 @@ TEST_F(TestImageReplayer, ErrorNoJournal)
 
   C_SaferCond cond;
   m_replayer->start(&cond);
-  ASSERT_EQ(-ENOENT, cond.wait());
+  ASSERT_EQ(0, cond.wait());
 }
 
 TEST_F(TestImageReplayer, StartStop)
@@ -944,7 +946,7 @@ TEST_F(TestImageReplayer, Disconnect)
   close_image(ictx);
   C_SaferCond cond2;
   m_replayer->start(&cond2);
-  ASSERT_EQ(-ENOTCONN, cond2.wait());
+  ASSERT_EQ(0, cond2.wait());
   C_SaferCond delete_cond;
   m_image_deleter->wait_for_scheduled_deletion(
     m_local_ioctx.get_id(), m_replayer->get_global_image_id(), &delete_cond);
index 221d618d4923c1d2ce950ba655459f53d2560e9e..67a7932d8f885f07b8d75384f61b62f9e8d66c71 100644 (file)
@@ -141,12 +141,19 @@ int TestFixture::create_image_data_pool(std::string &data_pool) {
   }
 
   r = _rados->pool_create(pool.c_str());
-  if (r == 0) {
-    data_pool = pool;
-    return 0;
+  if (r < 0) {
+    return r;
+  }
+
+  librados::IoCtx data_ioctx;
+  r = _rados->ioctx_create(pool.c_str(), data_ioctx);
+  if (r < 0) {
+    return r;
   }
 
-  return r;
+  data_ioctx.application_enable("rbd", true);
+  data_pool = pool;
+  return 0;
 }
 
 } // namespace mirror
index 71a94e5896b3edc60d7029e2a8c19b1da27ab7a5..7a0bb6706f64393596a75baff2e60b00e728ecf6 100644 (file)
@@ -7,14 +7,18 @@
 #include "tools/rbd_mirror/ImageDeleter.h"
 #include "tools/rbd_mirror/ImageReplayer.h"
 #include "tools/rbd_mirror/InstanceWatcher.h"
+#include "tools/rbd_mirror/Threads.h"
 #include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
 #include "tools/rbd_mirror/image_replayer/EventPreprocessor.h"
 #include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
 #include "test/rbd_mirror/test_mock_fixture.h"
 #include "test/journal/mock/MockJournaler.h"
 #include "test/librbd/mock/MockImageCtx.h"
 #include "test/librbd/mock/MockJournal.h"
+#include "test/rbd_mirror/mock/MockContextWQ.h"
+#include "test/rbd_mirror/mock/MockSafeTimer.h"
 
 namespace librbd {
 
@@ -61,10 +65,31 @@ struct MirrorPeerClientMeta;
 namespace rbd {
 namespace mirror {
 
+template <>
+struct Threads<librbd::MockTestImageCtx> {
+  MockSafeTimer *timer;
+  Mutex &timer_lock;
+
+  MockContextWQ *work_queue;
+
+  Threads(Threads<librbd::ImageCtx> *threads)
+    : timer(new MockSafeTimer()),
+      timer_lock(threads->timer_lock),
+      work_queue(new MockContextWQ()) {
+  }
+  ~Threads() {
+    delete timer;
+    delete work_queue;
+  }
+};
+
 template <>
 struct ImageDeleter<librbd::MockTestImageCtx> {
   MOCK_METHOD4(schedule_image_delete, void(RadosRef, int64_t,
                                            const std::string&, bool));
+  MOCK_METHOD4(wait_for_scheduled_deletion,
+               void(int64_t, const std::string&, Context*, bool));
+  MOCK_METHOD2(cancel_waiter, void(int64_t, const std::string&));
 };
 
 template<>
@@ -94,7 +119,7 @@ struct PrepareLocalImageRequest<librbd::MockTestImageCtx> {
                                           const std::string &global_image_id,
                                           std::string *local_image_id,
                                           std::string *tag_owner,
-                                          ContextWQ *work_queue,
+                                          MockContextWQ *work_queue,
                                           Context *on_finish) {
     assert(s_instance != nullptr);
     s_instance->local_image_id = local_image_id;
@@ -110,6 +135,32 @@ struct PrepareLocalImageRequest<librbd::MockTestImageCtx> {
   MOCK_METHOD0(send, void());
 };
 
+template<>
+struct PrepareRemoteImageRequest<librbd::MockTestImageCtx> {
+  static PrepareRemoteImageRequest* s_instance;
+  std::string *remote_mirror_uuid = nullptr;
+  std::string *remote_image_id = nullptr;
+  Context *on_finish = nullptr;
+
+  static PrepareRemoteImageRequest* create(librados::IoCtx &,
+                                           const std::string &global_image_id,
+                                           std::string *remote_mirror_uuid,
+                                           std::string *remote_image_id,
+                                           Context *on_finish) {
+    assert(s_instance != nullptr);
+    s_instance->remote_mirror_uuid = remote_mirror_uuid;
+    s_instance->remote_image_id = remote_image_id;
+    s_instance->on_finish = on_finish;
+    return s_instance;
+  }
+
+  PrepareRemoteImageRequest() {
+    s_instance = this;
+  }
+
+  MOCK_METHOD0(send, void());
+};
+
 template<>
 struct BootstrapRequest<librbd::MockTestImageCtx> {
   static BootstrapRequest* s_instance;
@@ -122,8 +173,9 @@ struct BootstrapRequest<librbd::MockTestImageCtx> {
       rbd::mirror::InstanceWatcher<librbd::MockTestImageCtx> *instance_watcher,
       librbd::MockTestImageCtx **local_image_ctx,
       const std::string &local_image_name, const std::string &remote_image_id,
-      const std::string &global_image_id, ContextWQ *work_queue,
-      SafeTimer *timer, Mutex *timer_lock, const std::string &local_mirror_uuid,
+      const std::string &global_image_id, MockContextWQ *work_queue,
+      MockSafeTimer *timer, Mutex *timer_lock,
+      const std::string &local_mirror_uuid,
       const std::string &remote_mirror_uuid,
       ::journal::MockJournalerProxy *journaler,
       librbd::journal::MirrorPeerClientMeta *client_meta,
@@ -195,7 +247,7 @@ struct EventPreprocessor<librbd::MockTestImageCtx> {
                                    ::journal::MockJournalerProxy &remote_journaler,
                                    const std::string &local_mirror_uuid,
                                    librbd::journal::MirrorPeerClientMeta *client_meta,
-                                   ContextWQ *work_queue) {
+                                   MockContextWQ *work_queue) {
     assert(s_instance != nullptr);
     return s_instance;
   }
@@ -247,6 +299,7 @@ BootstrapRequest<librbd::MockTestImageCtx>* BootstrapRequest<librbd::MockTestIma
 CloseImageRequest<librbd::MockTestImageCtx>* CloseImageRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
 EventPreprocessor<librbd::MockTestImageCtx>* EventPreprocessor<librbd::MockTestImageCtx>::s_instance = nullptr;
 PrepareLocalImageRequest<librbd::MockTestImageCtx>* PrepareLocalImageRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+PrepareRemoteImageRequest<librbd::MockTestImageCtx>* PrepareRemoteImageRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
 ReplayStatusFormatter<librbd::MockTestImageCtx>* ReplayStatusFormatter<librbd::MockTestImageCtx>::s_instance = nullptr;
 
 } // namespace image_replayer
@@ -261,11 +314,13 @@ namespace mirror {
 
 class TestMockImageReplayer : public TestMockFixture {
 public:
+  typedef Threads<librbd::MockTestImageCtx> MockThreads;
   typedef ImageDeleter<librbd::MockTestImageCtx> MockImageDeleter;
   typedef BootstrapRequest<librbd::MockTestImageCtx> MockBootstrapRequest;
   typedef CloseImageRequest<librbd::MockTestImageCtx> MockCloseImageRequest;
   typedef EventPreprocessor<librbd::MockTestImageCtx> MockEventPreprocessor;
   typedef PrepareLocalImageRequest<librbd::MockTestImageCtx> MockPrepareLocalImageRequest;
+  typedef PrepareRemoteImageRequest<librbd::MockTestImageCtx> MockPrepareRemoteImageRequest;
   typedef ReplayStatusFormatter<librbd::MockTestImageCtx> MockReplayStatusFormatter;
   typedef librbd::journal::Replay<librbd::MockTestImageCtx> MockReplay;
   typedef ImageReplayer<librbd::MockTestImageCtx> MockImageReplayer;
@@ -291,6 +346,48 @@ public:
     ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
   }
 
+  void expect_work_queue_repeatedly(MockThreads &mock_threads) {
+    EXPECT_CALL(*mock_threads.work_queue, queue(_, _))
+      .WillRepeatedly(Invoke([this](Context *ctx, int r) {
+          m_threads->work_queue->queue(ctx, r);
+        }));
+  }
+
+  void expect_add_event_after_repeatedly(MockThreads &mock_threads) {
+    EXPECT_CALL(*mock_threads.timer, add_event_after(_, _))
+      .WillRepeatedly(
+        Invoke([this](double seconds, Context *ctx) {
+          m_threads->timer->add_event_after(seconds, ctx);
+        }));
+    EXPECT_CALL(*mock_threads.timer, cancel_event(_))
+      .WillRepeatedly(
+        Invoke([this](Context *ctx) {
+          return m_threads->timer->cancel_event(ctx);
+        }));
+  }
+
+  void expect_wait_for_scheduled_deletion(MockImageDeleter& mock_image_deleter,
+                                          const std::string& global_image_id,
+                                          int r) {
+    EXPECT_CALL(mock_image_deleter,
+                wait_for_scheduled_deletion(_, global_image_id, _, false))
+      .WillOnce(WithArg<2>(Invoke([this, r](Context *ctx) {
+                             m_threads->work_queue->queue(ctx, r);
+                           })));
+  }
+
+  void expect_cancel_waiter(MockImageDeleter& mock_image_deleter) {
+    EXPECT_CALL(mock_image_deleter, cancel_waiter(m_local_io_ctx.get_id(),
+                                                  "global image id"));
+  }
+
+  void expect_schedule_image_delete(MockImageDeleter& mock_image_deleter,
+                                    const std::string& global_image_id,
+                                    bool ignore_orphan) {
+    EXPECT_CALL(mock_image_deleter,
+                schedule_image_delete(_, _, global_image_id, ignore_orphan));
+  }
+
   bufferlist encode_tag_data(const librbd::journal::TagData &tag_data) {
     bufferlist bl;
     ::encode(tag_data, bl);
@@ -318,6 +415,17 @@ public:
         }));
   }
 
+  void expect_send(MockPrepareRemoteImageRequest& mock_request,
+                   const std::string& mirror_uuid, const std::string& image_id,
+                   int r) {
+    EXPECT_CALL(mock_request, send())
+      .WillOnce(Invoke([&mock_request, image_id, mirror_uuid, r]() {
+                  *mock_request.remote_mirror_uuid = mirror_uuid;
+                  *mock_request.remote_image_id = image_id;
+                  mock_request.on_finish->complete(r);
+                }));
+  }
+
   void expect_send(MockBootstrapRequest &mock_bootstrap_request,
                    librbd::MockTestImageCtx &mock_local_image_ctx,
                    bool do_resync, int r) {
@@ -448,13 +556,13 @@ public:
                       WithArg<2>(CompleteContext(on_commit_r))));
   }
 
-  void create_image_replayer(MockImageDeleter &mock_image_deleter) {
+  void create_image_replayer(MockThreads &mock_threads,
+                             MockImageDeleter &mock_image_deleter) {
     m_image_replayer = new MockImageReplayer(
-      m_threads, &mock_image_deleter, &m_instance_watcher,
+      &mock_threads, &mock_image_deleter, &m_instance_watcher,
       rbd::mirror::RadosRef(new librados::Rados(m_local_io_ctx)),
       "local_mirror_uuid", m_local_io_ctx.get_id(), "global image id");
-    m_image_replayer->add_remote_image(
-      "remote_mirror_uuid", m_remote_image_ctx->id, m_remote_io_ctx);
+    m_image_replayer->add_peer("peer_uuid", m_remote_io_ctx);
   }
 
   librbd::ImageCtx *m_remote_image_ctx;
@@ -473,7 +581,13 @@ TEST_F(TestMockImageReplayer, StartStop) {
   mock_local_image_ctx.journal = &mock_local_journal;
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplay mock_local_replay;
   MockEventPreprocessor mock_event_preprocessor;
@@ -482,8 +596,11 @@ TEST_F(TestMockImageReplayer, StartStop) {
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, 0);
 
@@ -498,8 +615,7 @@ TEST_F(TestMockImageReplayer, StartStop) {
 
   EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _));
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -511,6 +627,7 @@ TEST_F(TestMockImageReplayer, StartStop) {
 
   MockCloseImageRequest mock_close_local_image_request;
 
+  expect_cancel_waiter(mock_image_deleter);
   expect_shut_down(mock_local_replay, true, 0);
   EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
@@ -531,17 +648,22 @@ TEST_F(TestMockImageReplayer, LocalImagePrimary) {
   create_local_image();
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
   MockReplayStatusFormatter mock_replay_status_formatter;
 
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "", 0);
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -553,22 +675,30 @@ TEST_F(TestMockImageReplayer, LocalImageDNE) {
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplayStatusFormatter mock_replay_status_formatter;
 
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, "", "", -ENOENT);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, -EREMOTEIO);
 
   EXPECT_CALL(mock_remote_journaler, remove_listener(_));
   expect_shut_down(mock_remote_journaler, 0);
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -579,17 +709,110 @@ TEST_F(TestMockImageReplayer, PrepareLocalImageError) {
   create_local_image();
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
   MockReplayStatusFormatter mock_replay_status_formatter;
 
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", -EINVAL);
 
+  create_image_replayer(mock_threads, mock_image_deleter);
+
+  C_SaferCond start_ctx;
+  m_image_replayer->start(&start_ctx);
+  ASSERT_EQ(-EINVAL, start_ctx.wait());
+}
+
+TEST_F(TestMockImageReplayer, GetRemoteImageIdDNE) {
+  create_local_image();
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
   MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
+  MockReplayStatusFormatter mock_replay_status_formatter;
+
+  expect_get_or_send_update(mock_replay_status_formatter);
+
+  InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
+  expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
+              "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              "", -ENOENT);
+  expect_schedule_image_delete(mock_image_deleter, "global image id", false);
+
+  create_image_replayer(mock_threads, mock_image_deleter);
+
+  C_SaferCond start_ctx;
+  m_image_replayer->start(&start_ctx);
+  ASSERT_EQ(0, start_ctx.wait());
+}
+
+TEST_F(TestMockImageReplayer, GetRemoteImageIdNonLinkedDNE) {
+  create_local_image();
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
+  MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
+  MockReplayStatusFormatter mock_replay_status_formatter;
+
+  expect_get_or_send_update(mock_replay_status_formatter);
+
+  InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
+  expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
+              "some other mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              "", -ENOENT);
+
+  create_image_replayer(mock_threads, mock_image_deleter);
+
+  C_SaferCond start_ctx;
+  m_image_replayer->start(&start_ctx);
+  ASSERT_EQ(-ENOENT, start_ctx.wait());
+}
+
+TEST_F(TestMockImageReplayer, GetRemoteImageIdError) {
+  create_local_image();
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
+  MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
+  MockReplayStatusFormatter mock_replay_status_formatter;
+
+  expect_get_or_send_update(mock_replay_status_formatter);
+
+  InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
+  expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
+              "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, -EINVAL);
+
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -602,23 +825,31 @@ TEST_F(TestMockImageReplayer, BootstrapError) {
   librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplayStatusFormatter mock_replay_status_formatter;
 
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, -EINVAL);
 
   EXPECT_CALL(mock_remote_journaler, remove_listener(_));
   expect_shut_down(mock_remote_journaler, 0);
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -635,7 +866,13 @@ TEST_F(TestMockImageReplayer, StartExternalReplayError) {
   mock_local_image_ctx.journal = &mock_local_journal;
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplay mock_local_replay;
   MockEventPreprocessor mock_event_preprocessor;
@@ -644,8 +881,11 @@ TEST_F(TestMockImageReplayer, StartExternalReplayError) {
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, 0);
 
@@ -665,8 +905,7 @@ TEST_F(TestMockImageReplayer, StartExternalReplayError) {
   EXPECT_CALL(mock_remote_journaler, remove_listener(_));
   expect_shut_down(mock_remote_journaler, 0);
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -685,7 +924,13 @@ TEST_F(TestMockImageReplayer, StopError) {
   mock_local_image_ctx.journal = &mock_local_journal;
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplay mock_local_replay;
   MockEventPreprocessor mock_event_preprocessor;
@@ -694,8 +939,11 @@ TEST_F(TestMockImageReplayer, StopError) {
   expect_get_or_send_update(mock_replay_status_formatter);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, 0);
 
@@ -710,8 +958,7 @@ TEST_F(TestMockImageReplayer, StopError) {
 
   EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _));
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -721,6 +968,7 @@ TEST_F(TestMockImageReplayer, StopError) {
 
   MockCloseImageRequest mock_close_local_image_request;
 
+  expect_cancel_waiter(mock_image_deleter);
   expect_shut_down(mock_local_replay, true, -EINVAL);
   EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
@@ -745,7 +993,13 @@ TEST_F(TestMockImageReplayer, Replay) {
   mock_local_image_ctx.journal = &mock_local_journal;
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplay mock_local_replay;
   MockEventPreprocessor mock_event_preprocessor;
@@ -757,8 +1011,11 @@ TEST_F(TestMockImageReplayer, Replay) {
   expect_committed(mock_remote_journaler, 2);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, 0);
 
@@ -773,8 +1030,7 @@ TEST_F(TestMockImageReplayer, Replay) {
 
   EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _));
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -822,6 +1078,7 @@ TEST_F(TestMockImageReplayer, Replay) {
   // STOP
 
   MockCloseImageRequest mock_close_local_image_request;
+  expect_cancel_waiter(mock_image_deleter);
   expect_shut_down(mock_local_replay, true, 0);
   EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
@@ -846,7 +1103,13 @@ TEST_F(TestMockImageReplayer, DecodeError) {
   mock_local_image_ctx.journal = &mock_local_journal;
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplay mock_local_replay;
   MockEventPreprocessor mock_event_preprocessor;
@@ -857,8 +1120,11 @@ TEST_F(TestMockImageReplayer, DecodeError) {
   expect_get_commit_tid_in_debug(mock_replay_entry);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, 0);
 
@@ -873,8 +1139,7 @@ TEST_F(TestMockImageReplayer, DecodeError) {
 
   EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _));
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -939,7 +1204,13 @@ TEST_F(TestMockImageReplayer, DelayedReplay) {
   mock_local_image_ctx.journal = &mock_local_journal;
 
   journal::MockJournaler mock_remote_journaler;
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+  expect_add_event_after_repeatedly(mock_threads);
+
+  MockImageDeleter mock_image_deleter;
   MockPrepareLocalImageRequest mock_prepare_local_image_request;
+  MockPrepareRemoteImageRequest mock_prepare_remote_image_request;
   MockBootstrapRequest mock_bootstrap_request;
   MockReplay mock_local_replay;
   MockEventPreprocessor mock_event_preprocessor;
@@ -951,8 +1222,11 @@ TEST_F(TestMockImageReplayer, DelayedReplay) {
   expect_committed(mock_remote_journaler, 1);
 
   InSequence seq;
+  expect_wait_for_scheduled_deletion(mock_image_deleter, "global image id", 0);
   expect_send(mock_prepare_local_image_request, mock_local_image_ctx.id,
               "remote mirror uuid", 0);
+  expect_send(mock_prepare_remote_image_request, "remote mirror uuid",
+              m_remote_image_ctx->id, 0);
   EXPECT_CALL(mock_remote_journaler, construct());
   expect_send(mock_bootstrap_request, mock_local_image_ctx, false, 0);
 
@@ -967,8 +1241,7 @@ TEST_F(TestMockImageReplayer, DelayedReplay) {
 
   EXPECT_CALL(mock_remote_journaler, start_live_replay(_, _));
 
-  MockImageDeleter mock_image_deleter;
-  create_image_replayer(mock_image_deleter);
+  create_image_replayer(mock_threads, mock_image_deleter);
 
   C_SaferCond start_ctx;
   m_image_replayer->start(&start_ctx);
@@ -1028,6 +1301,7 @@ TEST_F(TestMockImageReplayer, DelayedReplay) {
 
   MockCloseImageRequest mock_close_local_image_request;
 
+  expect_cancel_waiter(mock_image_deleter);
   expect_shut_down(mock_local_replay, true, 0);
   EXPECT_CALL(mock_local_journal, remove_listener(_));
   EXPECT_CALL(mock_local_journal, stop_external_replay());
@@ -1042,5 +1316,6 @@ TEST_F(TestMockImageReplayer, DelayedReplay) {
   ASSERT_EQ(0, stop_ctx.wait());
 }
 
+
 } // namespace mirror
 } // namespace rbd
index 6e2d58c0aa0e234e60f2dbf8afebc3a429c16744..1903c55f2c9032b84c585614bf534155cea12c4c 100644 (file)
@@ -3,6 +3,8 @@
 
 #include "test/librbd/mock/MockImageCtx.h"
 #include "test/rbd_mirror/test_mock_fixture.h"
+#include "test/rbd_mirror/mock/MockContextWQ.h"
+#include "test/rbd_mirror/mock/MockSafeTimer.h"
 #include "tools/rbd_mirror/ImageDeleter.h"
 #include "tools/rbd_mirror/ImageReplayer.h"
 #include "tools/rbd_mirror/InstanceWatcher.h"
@@ -30,22 +32,25 @@ namespace mirror {
 
 template <>
 struct Threads<librbd::MockTestImageCtx> {
+  MockSafeTimer *timer;
   Mutex &timer_lock;
-  SafeTimer *timer;
-  ContextWQ *work_queue;
+  Cond timer_cond;
+
+  MockContextWQ *work_queue;
 
   Threads(Threads<librbd::ImageCtx> *threads)
-    : timer_lock(threads->timer_lock), timer(threads->timer),
-      work_queue(threads->work_queue) {
+    : timer(new MockSafeTimer()),
+      timer_lock(threads->timer_lock),
+      work_queue(new MockContextWQ()) {
+  }
+  ~Threads() {
+    delete timer;
+    delete work_queue;
   }
 };
 
 template <>
 struct ImageDeleter<librbd::MockTestImageCtx> {
-  MOCK_METHOD4(schedule_image_delete, void(RadosRef, int64_t,
-                                           const std::string&, bool));
-  MOCK_METHOD4(wait_for_scheduled_deletion,
-               void(int64_t, const std::string&, Context*, bool));
 };
 
 template<>
@@ -91,19 +96,16 @@ struct ImageReplayer<librbd::MockTestImageCtx> {
   MOCK_METHOD0(restart, void());
   MOCK_METHOD0(flush, void());
   MOCK_METHOD2(print_status, void(Formatter *, stringstream *));
-  MOCK_METHOD3(add_remote_image, void(const std::string &,
-                                      const std::string &,
-                                      librados::IoCtx &));
-  MOCK_METHOD3(remove_remote_image, void(const std::string &,
-                                         const std::string &,
-                                         bool));
-  MOCK_METHOD0(remote_images_empty, bool());
+  MOCK_METHOD2(add_peer, void(const std::string &, librados::IoCtx &));
   MOCK_METHOD0(get_global_image_id, const std::string &());
   MOCK_METHOD0(get_local_image_id, const std::string &());
   MOCK_METHOD0(is_running, bool());
   MOCK_METHOD0(is_stopped, bool());
   MOCK_METHOD0(is_blacklisted, bool());
 
+  MOCK_CONST_METHOD0(is_finished, bool());
+  MOCK_METHOD1(set_finished, void(bool));
+
   MOCK_CONST_METHOD0(get_health_state, image_replayer::HealthState());
 };
 
@@ -127,73 +129,79 @@ using ::testing::WithArg;
 
 class TestMockInstanceReplayer : public TestMockFixture {
 public:
+  typedef Threads<librbd::MockTestImageCtx> MockThreads;
   typedef ImageDeleter<librbd::MockTestImageCtx> MockImageDeleter;
   typedef ImageReplayer<librbd::MockTestImageCtx> MockImageReplayer;
   typedef InstanceReplayer<librbd::MockTestImageCtx> MockInstanceReplayer;
   typedef InstanceWatcher<librbd::MockTestImageCtx> MockInstanceWatcher;
   typedef ServiceDaemon<librbd::MockTestImageCtx> MockServiceDaemon;
-  typedef Threads<librbd::MockTestImageCtx> MockThreads;
-
-  void SetUp() override {
-    TestMockFixture::SetUp();
 
-    m_mock_threads = new MockThreads(m_threads);
+  void expect_work_queue(MockThreads &mock_threads) {
+    EXPECT_CALL(*mock_threads.work_queue, queue(_, _))
+      .WillOnce(Invoke([this](Context *ctx, int r) {
+          m_threads->work_queue->queue(ctx, r);
+        }));
   }
 
-  void TearDown() override {
-    delete m_mock_threads;
-    TestMockFixture::TearDown();
+  void expect_add_event_after(MockThreads &mock_threads,
+                              Context** timer_ctx = nullptr) {
+    EXPECT_CALL(*mock_threads.timer, add_event_after(_, _))
+      .WillOnce(WithArg<1>(
+        Invoke([this, &mock_threads, timer_ctx](Context *ctx) {
+          assert(mock_threads.timer_lock.is_locked());
+          if (timer_ctx != nullptr) {
+            *timer_ctx = ctx;
+            mock_threads.timer_cond.SignalOne();
+          } else {
+            m_threads->work_queue->queue(
+              new FunctionContext([&mock_threads, ctx](int) {
+                Mutex::Locker timer_lock(mock_threads.timer_lock);
+                ctx->complete(0);
+              }), 0);
+          }
+        })));
   }
 
-  void expect_wait_for_scheduled_deletion(MockImageDeleter& mock_image_deleter,
-                                          const std::string& global_image_id,
-                                          int r) {
-    EXPECT_CALL(mock_image_deleter,
-                wait_for_scheduled_deletion(_, global_image_id, _, false))
-      .WillOnce(WithArg<2>(Invoke([this, r](Context *ctx) {
-                             m_threads->work_queue->queue(ctx, r);
-                           })));
+  void expect_cancel_event(MockThreads &mock_threads, bool canceled) {
+    EXPECT_CALL(*mock_threads.timer, cancel_event(_))
+      .WillOnce(Return(canceled));
   }
-
-  MockThreads *m_mock_threads;
 };
 
 TEST_F(TestMockInstanceReplayer, AcquireReleaseImage) {
+  MockThreads mock_threads(m_threads);
   MockServiceDaemon mock_service_daemon;
   MockImageDeleter mock_image_deleter;
   MockInstanceWatcher mock_instance_watcher;
   MockImageReplayer mock_image_replayer;
   MockInstanceReplayer instance_replayer(
-    m_mock_threads, &mock_service_daemon, &mock_image_deleter,
+    &mock_threads, &mock_service_daemon, &mock_image_deleter,
     rbd::mirror::RadosRef(new librados::Rados(m_local_io_ctx)),
     "local_mirror_uuid", m_local_io_ctx.get_id());
-
   std::string global_image_id("global_image_id");
 
   EXPECT_CALL(mock_image_replayer, get_global_image_id())
     .WillRepeatedly(ReturnRef(global_image_id));
-  EXPECT_CALL(mock_image_replayer, is_blacklisted())
-    .WillRepeatedly(Return(false));
-
-  expect_wait_for_scheduled_deletion(mock_image_deleter, "global_image_id", 0);
 
   InSequence seq;
-
+  expect_work_queue(mock_threads);
+  Context *timer_ctx = nullptr;
+  expect_add_event_after(mock_threads, &timer_ctx);
   instance_replayer.init();
-  instance_replayer.add_peer("remote_mirror_uuid", m_remote_io_ctx);
+  instance_replayer.add_peer("peer_uuid", m_remote_io_ctx);
 
   // Acquire
 
   C_SaferCond on_acquire;
-
-  EXPECT_CALL(mock_image_replayer, add_remote_image("remote_mirror_uuid",
-                                                    "remote_image_id", _));
-  EXPECT_CALL(mock_image_replayer, is_stopped())
-    .WillOnce(Return(true));
+  EXPECT_CALL(mock_image_replayer, add_peer("peer_uuid", _));
+  EXPECT_CALL(mock_image_replayer, set_finished(false));
+  EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
+  EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
+  EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
   EXPECT_CALL(mock_image_replayer, start(nullptr, false));
+  expect_work_queue(mock_threads);
 
   instance_replayer.acquire_image(&mock_instance_watcher, global_image_id,
-                                  "remote_mirror_uuid", "remote_image_id",
                                   &on_acquire);
   ASSERT_EQ(0, on_acquire.wait());
 
@@ -201,31 +209,108 @@ TEST_F(TestMockInstanceReplayer, AcquireReleaseImage) {
 
   C_SaferCond on_release;
 
-  EXPECT_CALL(mock_image_replayer,
-              remove_remote_image("remote_mirror_uuid", "remote_image_id",
-                                  false));
-  EXPECT_CALL(mock_image_replayer, remote_images_empty())
-    .WillOnce(Return(true));
   EXPECT_CALL(mock_image_replayer, is_stopped())
     .WillOnce(Return(false));
   EXPECT_CALL(mock_image_replayer, is_running())
     .WillOnce(Return(false));
+  expect_work_queue(mock_threads);
+  expect_add_event_after(mock_threads);
+  expect_work_queue(mock_threads);
   EXPECT_CALL(mock_image_replayer, is_stopped())
     .WillOnce(Return(false));
   EXPECT_CALL(mock_image_replayer, is_running())
     .WillOnce(Return(true));
   EXPECT_CALL(mock_image_replayer, stop(_, false))
     .WillOnce(CompleteContext(0));
+  expect_work_queue(mock_threads);
   EXPECT_CALL(mock_image_replayer, is_stopped())
     .WillOnce(Return(true));
+  expect_work_queue(mock_threads);
   EXPECT_CALL(mock_image_replayer, destroy());
 
-  instance_replayer.release_image("global_image_id", "remote_mirror_uuid",
-                                  "remote_image_id", false, &on_release);
+  instance_replayer.release_image("global_image_id", &on_release);
   ASSERT_EQ(0, on_release.wait());
 
+  expect_work_queue(mock_threads);
+  expect_cancel_event(mock_threads, true);
+  expect_work_queue(mock_threads);
   instance_replayer.shut_down();
+  ASSERT_TRUE(timer_ctx != nullptr);
+  delete timer_ctx;
 }
 
+TEST_F(TestMockInstanceReplayer, RemoveFinishedImage) {
+  MockThreads mock_threads(m_threads);
+  MockServiceDaemon mock_service_daemon;
+  MockImageDeleter mock_image_deleter;
+  MockInstanceWatcher mock_instance_watcher;
+  MockImageReplayer mock_image_replayer;
+  MockInstanceReplayer instance_replayer(
+    &mock_threads, &mock_service_daemon, &mock_image_deleter,
+    rbd::mirror::RadosRef(new librados::Rados(m_local_io_ctx)),
+    "local_mirror_uuid", m_local_io_ctx.get_id());
+  std::string global_image_id("global_image_id");
+
+  EXPECT_CALL(mock_image_replayer, get_global_image_id())
+    .WillRepeatedly(ReturnRef(global_image_id));
+
+  InSequence seq;
+  expect_work_queue(mock_threads);
+  Context *timer_ctx1 = nullptr;
+  expect_add_event_after(mock_threads, &timer_ctx1);
+  instance_replayer.init();
+  instance_replayer.add_peer("peer_uuid", m_remote_io_ctx);
+
+  // Acquire
+
+  C_SaferCond on_acquire;
+  EXPECT_CALL(mock_image_replayer, add_peer("peer_uuid", _));
+  EXPECT_CALL(mock_image_replayer, set_finished(false));
+  EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
+  EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
+  EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(false));
+  EXPECT_CALL(mock_image_replayer, start(nullptr, false));
+  expect_work_queue(mock_threads);
+
+  instance_replayer.acquire_image(&mock_instance_watcher, global_image_id,
+                                  &on_acquire);
+  ASSERT_EQ(0, on_acquire.wait());
+
+  // periodic start timer
+  Context *timer_ctx2 = nullptr;
+  expect_add_event_after(mock_threads, &timer_ctx2);
+
+  Context *start_image_replayers_ctx = nullptr;
+  EXPECT_CALL(*mock_threads.work_queue, queue(_, 0))
+    .WillOnce(Invoke([&start_image_replayers_ctx](Context *ctx, int r) {
+                start_image_replayers_ctx = ctx;
+              }));
+
+  ASSERT_TRUE(timer_ctx1 != nullptr);
+  {
+    Mutex::Locker timer_locker(mock_threads.timer_lock);
+    timer_ctx1->complete(0);
+  }
+
+  // remove finished image replayer
+  EXPECT_CALL(mock_image_replayer, get_health_state()).WillOnce(
+    Return(image_replayer::HEALTH_STATE_OK));
+  EXPECT_CALL(mock_image_replayer, is_stopped()).WillOnce(Return(true));
+  EXPECT_CALL(mock_image_replayer, is_blacklisted()).WillOnce(Return(false));
+  EXPECT_CALL(mock_image_replayer, is_finished()).WillOnce(Return(true));
+  EXPECT_CALL(mock_image_replayer, destroy());
+  EXPECT_CALL(mock_service_daemon,add_or_update_attribute(_, _, _)).Times(3);
+
+  ASSERT_TRUE(start_image_replayers_ctx != nullptr);
+  start_image_replayers_ctx->complete(0);
+
+  // shut down
+  expect_work_queue(mock_threads);
+  expect_cancel_event(mock_threads, true);
+  expect_work_queue(mock_threads);
+  instance_replayer.shut_down();
+  ASSERT_TRUE(timer_ctx2 != nullptr);
+  delete timer_ctx2;
+}
 } // namespace mirror
 } // namespace rbd
index bdd56bfc39dfc3b85fffec9699972f552a083cab..69497d3918cebc18959085f1a24de43df67b1394 100644 (file)
@@ -76,11 +76,11 @@ struct Threads<librbd::MockTestImageCtx> {
 
 template <>
 struct InstanceReplayer<librbd::MockTestImageCtx> {
-  MOCK_METHOD5(acquire_image, void(InstanceWatcher<librbd::MockTestImageCtx> *,
-                                   const std::string &, const std::string &,
+  MOCK_METHOD3(acquire_image, void(InstanceWatcher<librbd::MockTestImageCtx> *,
                                    const std::string &, Context *));
-  MOCK_METHOD5(release_image, void(const std::string &, const std::string &,
-                                   const std::string &, bool, Context *));
+  MOCK_METHOD2(release_image, void(const std::string &, Context *));
+  MOCK_METHOD3(remove_peer_image, void(const std::string&, const std::string&,
+                                       Context *));
 };
 
 template <>
@@ -365,38 +365,32 @@ TEST_F(TestMockInstanceWatcher, ImageAcquireRelease) {
 
   // Acquire Image on the the same instance
   EXPECT_CALL(mock_instance_replayer1, acquire_image(instance_watcher1, "gid",
-                                                     "uuid", "id", _))
-      .WillOnce(WithArg<4>(CompleteContext(0)));
+                                                     _))
+      .WillOnce(WithArg<2>(CompleteContext(0)));
   C_SaferCond on_acquire1;
-  instance_watcher1->notify_image_acquire(instance_id1, "gid", "uuid", "id",
-                                          &on_acquire1);
+  instance_watcher1->notify_image_acquire(instance_id1, "gid", &on_acquire1);
   ASSERT_EQ(0, on_acquire1.wait());
 
   // Acquire Image on the other instance
   EXPECT_CALL(mock_instance_replayer2, acquire_image(instance_watcher2, "gid",
-                                                     "uuid", "id", _))
-      .WillOnce(WithArg<4>(CompleteContext(0)));
+                                                     _))
+      .WillOnce(WithArg<2>(CompleteContext(0)));
   C_SaferCond on_acquire2;
-  instance_watcher1->notify_image_acquire(instance_id2, "gid", "uuid", "id",
-                                          &on_acquire2);
+  instance_watcher1->notify_image_acquire(instance_id2, "gid", &on_acquire2);
   ASSERT_EQ(0, on_acquire2.wait());
 
   // Release Image on the the same instance
-  EXPECT_CALL(mock_instance_replayer1, release_image("gid", "uuid", "id", true,
-                                                     _))
-      .WillOnce(WithArg<4>(CompleteContext(0)));
+  EXPECT_CALL(mock_instance_replayer1, release_image("gid", _))
+      .WillOnce(WithArg<1>(CompleteContext(0)));
   C_SaferCond on_release1;
-  instance_watcher1->notify_image_release(instance_id1, "gid", "uuid", "id",
-                                          true, &on_release1);
+  instance_watcher1->notify_image_release(instance_id1, "gid", &on_release1);
   ASSERT_EQ(0, on_release1.wait());
 
   // Release Image on the other instance
-  EXPECT_CALL(mock_instance_replayer2, release_image("gid", "uuid", "id", true,
-                                                     _))
-      .WillOnce(WithArg<4>(CompleteContext(0)));
+  EXPECT_CALL(mock_instance_replayer2, release_image("gid", _))
+      .WillOnce(WithArg<1>(CompleteContext(0)));
   C_SaferCond on_release2;
-  instance_watcher1->notify_image_release(instance_id2, "gid", "uuid", "id",
-                                          true, &on_release2);
+  instance_watcher1->notify_image_release(instance_id2, "gid", &on_release2);
   ASSERT_EQ(0, on_release2.wait());
 
   // Shutdown instance watcher 1
@@ -418,6 +412,75 @@ TEST_F(TestMockInstanceWatcher, ImageAcquireRelease) {
   delete instance_watcher2;
 }
 
+TEST_F(TestMockInstanceWatcher, PeerImageRemoved) {
+  MockManagedLock mock_managed_lock;
+
+  librados::IoCtx& io_ctx1 = m_local_io_ctx;
+  std::string instance_id1 = m_instance_id;
+  librados::MockTestMemIoCtxImpl &mock_io_ctx1(get_mock_io_ctx(io_ctx1));
+  MockInstanceReplayer mock_instance_replayer1;
+  auto instance_watcher1 = MockInstanceWatcher::create(
+      io_ctx1, m_mock_threads->work_queue, &mock_instance_replayer1);
+
+  librados::Rados cluster;
+  librados::IoCtx io_ctx2;
+  EXPECT_EQ("", connect_cluster_pp(cluster));
+  EXPECT_EQ(0, cluster.ioctx_create(_local_pool_name.c_str(), io_ctx2));
+  std::string instance_id2 = stringify(io_ctx2.get_instance_id());
+  librados::MockTestMemIoCtxImpl &mock_io_ctx2(get_mock_io_ctx(io_ctx2));
+  MockInstanceReplayer mock_instance_replayer2;
+  auto instance_watcher2 = MockInstanceWatcher::create(
+    io_ctx2, m_mock_threads->work_queue, &mock_instance_replayer2);
+
+  InSequence seq;
+
+  // Init instance watcher 1
+  expect_register_instance(mock_io_ctx1, 0);
+  expect_register_watch(mock_io_ctx1, instance_id1);
+  expect_acquire_lock(mock_managed_lock, 0);
+  ASSERT_EQ(0, instance_watcher1->init());
+
+  // Init instance watcher 2
+  expect_register_instance(mock_io_ctx2, 0);
+  expect_register_watch(mock_io_ctx2, instance_id2);
+  expect_acquire_lock(mock_managed_lock, 0);
+  ASSERT_EQ(0, instance_watcher2->init());
+
+  // Peer Image Removed on the same instance
+  EXPECT_CALL(mock_instance_replayer1, remove_peer_image("gid", "uuid", _))
+      .WillOnce(WithArg<2>(CompleteContext(0)));
+  C_SaferCond on_removed1;
+  instance_watcher1->notify_peer_image_removed(instance_id1, "gid", "uuid",
+                                               &on_removed1);
+  ASSERT_EQ(0, on_removed1.wait());
+
+  // Peer Image Removed on the other instance
+  EXPECT_CALL(mock_instance_replayer2, remove_peer_image("gid", "uuid", _))
+      .WillOnce(WithArg<2>(CompleteContext(0)));
+  C_SaferCond on_removed2;
+  instance_watcher1->notify_peer_image_removed(instance_id2, "gid", "uuid",
+                                               &on_removed2);
+  ASSERT_EQ(0, on_removed2.wait());
+
+  // Shutdown instance watcher 1
+  expect_release_lock(mock_managed_lock, 0);
+  expect_unregister_watch(mock_io_ctx1);
+  expect_unregister_instance(mock_io_ctx1, 0);
+  instance_watcher1->shut_down();
+
+  expect_destroy_lock(mock_managed_lock);
+  delete instance_watcher1;
+
+  // Shutdown instance watcher 2
+  expect_release_lock(mock_managed_lock, 0);
+  expect_unregister_watch(mock_io_ctx2);
+  expect_unregister_instance(mock_io_ctx2, 0);
+  instance_watcher2->shut_down();
+
+  expect_destroy_lock(mock_managed_lock);
+  delete instance_watcher2;
+}
+
 TEST_F(TestMockInstanceWatcher, ImageAcquireReleaseCancel) {
   MockManagedLock mock_managed_lock;
   librados::MockTestMemIoCtxImpl &mock_io_ctx(get_mock_io_ctx(m_local_io_ctx));
@@ -450,8 +513,7 @@ TEST_F(TestMockInstanceWatcher, ImageAcquireReleaseCancel) {
                   }));
 
   C_SaferCond on_acquire;
-  instance_watcher->notify_image_acquire("other", "gid", "uuid", "id",
-                                         &on_acquire);
+  instance_watcher->notify_image_acquire("other", "gid", &on_acquire);
   ASSERT_EQ(-ECANCELED, on_acquire.wait());
 
   // Send Release Image and cancel
@@ -472,8 +534,7 @@ TEST_F(TestMockInstanceWatcher, ImageAcquireReleaseCancel) {
                   }));
 
   C_SaferCond on_release;
-  instance_watcher->notify_image_release("other", "gid", "uuid", "id",
-                                         true, &on_release);
+  instance_watcher->notify_image_release("other", "gid", &on_release);
   ASSERT_EQ(-ECANCELED, on_release.wait());
 
   // Shutdown
@@ -486,6 +547,53 @@ TEST_F(TestMockInstanceWatcher, ImageAcquireReleaseCancel) {
   delete instance_watcher;
 }
 
+TEST_F(TestMockInstanceWatcher, PeerImageRemovedCancel) {
+  MockManagedLock mock_managed_lock;
+  librados::MockTestMemIoCtxImpl &mock_io_ctx(get_mock_io_ctx(m_local_io_ctx));
+
+  auto instance_watcher = new MockInstanceWatcher(
+    m_local_io_ctx, m_mock_threads->work_queue, nullptr, m_instance_id);
+  InSequence seq;
+
+  // Init
+  expect_register_instance(mock_io_ctx, 0);
+  expect_register_watch(mock_io_ctx);
+  expect_acquire_lock(mock_managed_lock, 0);
+  ASSERT_EQ(0, instance_watcher->init());
+
+  // Send Acquire Image and cancel
+  EXPECT_CALL(mock_io_ctx, aio_notify(_, _, _, _, _))
+    .WillOnce(Invoke(
+                  [this, instance_watcher, &mock_io_ctx](
+                    const std::string& o, librados::AioCompletionImpl *c,
+                    bufferlist& bl, uint64_t timeout_ms, bufferlist *pbl) {
+                    c->get();
+                    auto ctx = new FunctionContext(
+                      [instance_watcher, &mock_io_ctx, c, pbl](int r) {
+                        instance_watcher->cancel_notify_requests("other");
+                        ::encode(librbd::watcher::NotifyResponse(), *pbl);
+                        mock_io_ctx.get_mock_rados_client()->
+                            finish_aio_completion(c, -ETIMEDOUT);
+                      });
+                    m_threads->work_queue->queue(ctx, 0);
+                  }));
+
+  C_SaferCond on_acquire;
+  instance_watcher->notify_peer_image_removed("other", "gid", "uuid",
+                                              &on_acquire);
+  ASSERT_EQ(-ECANCELED, on_acquire.wait());
+
+  // Shutdown
+  expect_release_lock(mock_managed_lock, 0);
+  expect_unregister_watch(mock_io_ctx);
+  expect_unregister_instance(mock_io_ctx, 0);
+  instance_watcher->shut_down();
+
+  expect_destroy_lock(mock_managed_lock);
+  delete instance_watcher;
+}
+
+
 class TestMockInstanceWatcher_NotifySync : public TestMockInstanceWatcher {
 public:
   typedef ImageSyncThrottler<librbd::MockTestImageCtx> MockImageSyncThrottler;
index ca409aeaca2e5fdf0ec233512840f491788710fe..575b394f14887c5382980f60655b9bbbf4dda2fb 100644 (file)
@@ -108,11 +108,9 @@ target_link_libraries(unittest_rgw_crypto
 set_target_properties(unittest_rgw_crypto PROPERTIES COMPILE_FLAGS$ {UNITTEST_CXX_FLAGS})
 
 # ceph_test_rgw_iam_policy
-set(test_rgw_iam_policy_srcs test_rgw_iam_policy.cc)
-add_executable(ceph_test_rgw_iam_policy
-  ${test_rgw_iam_policy_srcs}
-  )
-target_link_libraries(ceph_test_rgw_iam_policy
+add_executable(unittest_rgw_iam_policy test_rgw_iam_policy.cc)
+add_ceph_unittest(unittest_rgw_iam_policy ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/unittest_rgw_iam_policy)
+target_link_libraries(unittest_rgw_iam_policy
   rgw_a
   cls_rgw_client
   cls_lock_client
@@ -130,7 +128,7 @@ target_link_libraries(ceph_test_rgw_iam_policy
   ${UNITTEST_LIBS}
   ${CRYPTO_LIBS}
   )
-set_target_properties(ceph_test_rgw_iam_policy PROPERTIES COMPILE_FLAGS ${UNITTEST_CXX_FLAGS})
+set_target_properties(unittest_rgw_iam_policy PROPERTIES COMPILE_FLAGS ${UNITTEST_CXX_FLAGS})
 
 # unitttest_rgw_string
 add_executable(unittest_rgw_string test_rgw_string.cc)
index cc512e0b0c3ff00a5d086db7768124336472165a..7751b086898bce2ffe9a81ed4d79502e0f047749 100644 (file)
@@ -61,7 +61,9 @@ using rgw::IAM::s3GetObject;
 using rgw::IAM::s3GetObjectAcl;
 using rgw::IAM::s3GetObjectVersionAcl;
 using rgw::IAM::s3GetObjectTorrent;
+using rgw::IAM::s3GetObjectTagging;
 using rgw::IAM::s3GetObjectVersion;
+using rgw::IAM::s3GetObjectVersionTagging;
 using rgw::IAM::s3GetObjectVersionTorrent;
 using rgw::IAM::s3GetReplicationConfiguration;
 using rgw::IAM::s3ListAllMyBuckets;
@@ -324,7 +326,9 @@ TEST_F(PolicyTest, Parse3) {
                                      s3GetBucketTagging |
                                      s3GetBucketWebsite |
                                      s3GetLifecycleConfiguration |
-                                     s3GetReplicationConfiguration));
+                                     s3GetReplicationConfiguration |
+                                     s3GetObjectTagging |
+                                     s3GetObjectVersionTagging));
   EXPECT_EQ(p->statements[2].notaction, s3None);
   ASSERT_FALSE(p->statements[2].resource.empty());
   ASSERT_EQ(p->statements[2].resource.size(), 2U);
@@ -370,7 +374,8 @@ TEST_F(PolicyTest, Eval3) {
                  s3GetBucketPolicy | s3GetBucketNotification |
                  s3GetBucketLogging | s3GetBucketTagging |
                  s3GetBucketWebsite | s3GetLifecycleConfiguration |
-                 s3GetReplicationConfiguration);
+                 s3GetReplicationConfiguration |
+                 s3GetObjectTagging | s3GetObjectVersionTagging);
 
   EXPECT_EQ(p.eval(em, none, s3PutBucketPolicy,
                   ARN(Partition::aws, Service::s3,
@@ -505,3 +510,120 @@ string PolicyTest::example3 = R"(
   ]
 }
 )";
+
+TEST(MatchWildcards, Simple)
+{
+  EXPECT_TRUE(match_wildcards("", ""));
+  EXPECT_TRUE(match_wildcards("", "", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("", "abc"));
+  EXPECT_FALSE(match_wildcards("", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("abc", ""));
+  EXPECT_FALSE(match_wildcards("abc", "", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("abc", "abc"));
+  EXPECT_TRUE(match_wildcards("abc", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("abc", "abC"));
+  EXPECT_TRUE(match_wildcards("abc", "abC", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("abC", "abc"));
+  EXPECT_TRUE(match_wildcards("abC", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("abc", "abcd"));
+  EXPECT_FALSE(match_wildcards("abc", "abcd", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("abcd", "abc"));
+  EXPECT_FALSE(match_wildcards("abcd", "abc", MATCH_CASE_INSENSITIVE));
+}
+
+TEST(MatchWildcards, QuestionMark)
+{
+  EXPECT_FALSE(match_wildcards("?", ""));
+  EXPECT_FALSE(match_wildcards("?", "", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("?", "a"));
+  EXPECT_TRUE(match_wildcards("?", "a", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("?bc", "abc"));
+  EXPECT_TRUE(match_wildcards("?bc", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("a?c", "abc"));
+  EXPECT_TRUE(match_wildcards("a?c", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("abc", "a?c"));
+  EXPECT_FALSE(match_wildcards("abc", "a?c", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("a?c", "abC"));
+  EXPECT_TRUE(match_wildcards("a?c", "abC", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("ab?", "abc"));
+  EXPECT_TRUE(match_wildcards("ab?", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("a?c?e", "abcde"));
+  EXPECT_TRUE(match_wildcards("a?c?e", "abcde", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("???", "abc"));
+  EXPECT_TRUE(match_wildcards("???", "abc", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("???", "abcd"));
+  EXPECT_FALSE(match_wildcards("???", "abcd", MATCH_CASE_INSENSITIVE));
+}
+
+TEST(MatchWildcards, Asterisk)
+{
+  EXPECT_TRUE(match_wildcards("*", ""));
+  EXPECT_TRUE(match_wildcards("*", "", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("", "*"));
+  EXPECT_FALSE(match_wildcards("", "*", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("*a", ""));
+  EXPECT_FALSE(match_wildcards("*a", "", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("*a", "a"));
+  EXPECT_TRUE(match_wildcards("*a", "a", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("a*", "a"));
+  EXPECT_TRUE(match_wildcards("a*", "a", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("a*c", "ac"));
+  EXPECT_TRUE(match_wildcards("a*c", "ac", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("a*c", "abbc"));
+  EXPECT_TRUE(match_wildcards("a*c", "abbc", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("a*c", "abbC"));
+  EXPECT_TRUE(match_wildcards("a*c", "abbC", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("a*c*e", "abBce"));
+  EXPECT_TRUE(match_wildcards("a*c*e", "abBce", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("http://*.example.com",
+                              "http://www.example.com"));
+  EXPECT_TRUE(match_wildcards("http://*.example.com",
+                              "http://www.example.com", MATCH_CASE_INSENSITIVE));
+  EXPECT_FALSE(match_wildcards("http://*.example.com",
+                               "http://www.Example.com"));
+  EXPECT_TRUE(match_wildcards("http://*.example.com",
+                              "http://www.Example.com", MATCH_CASE_INSENSITIVE));
+  EXPECT_TRUE(match_wildcards("http://example.com/*",
+                              "http://example.com/index.html"));
+  EXPECT_TRUE(match_wildcards("http://example.com/*/*.jpg",
+                              "http://example.com/fun/smiley.jpg"));
+  // note: parsing of * is not greedy, so * does not match 'bc' here
+  EXPECT_FALSE(match_wildcards("a*c", "abcc"));
+  EXPECT_FALSE(match_wildcards("a*c", "abcc", MATCH_CASE_INSENSITIVE));
+}
+
+TEST(MatchPolicy, Action)
+{
+  constexpr auto flag = MATCH_POLICY_ACTION;
+  EXPECT_TRUE(match_policy("a:b:c", "a:b:c", flag));
+  EXPECT_TRUE(match_policy("a:b:c", "A:B:C", flag)); // case insensitive
+  EXPECT_TRUE(match_policy("a:*:e", "a:bcd:e", flag));
+  EXPECT_FALSE(match_policy("a:*", "a:b:c", flag)); // cannot span segments
+}
+
+TEST(MatchPolicy, Resource)
+{
+  constexpr auto flag = MATCH_POLICY_RESOURCE;
+  EXPECT_TRUE(match_policy("a:b:c", "a:b:c", flag));
+  EXPECT_FALSE(match_policy("a:b:c", "A:B:C", flag)); // case sensitive
+  EXPECT_TRUE(match_policy("a:*:e", "a:bcd:e", flag));
+  EXPECT_FALSE(match_policy("a:*", "a:b:c", flag)); // cannot span segments
+}
+
+TEST(MatchPolicy, ARN)
+{
+  constexpr auto flag = MATCH_POLICY_ARN;
+  EXPECT_TRUE(match_policy("a:b:c", "a:b:c", flag));
+  EXPECT_TRUE(match_policy("a:b:c", "A:B:C", flag)); // case insensitive
+  EXPECT_TRUE(match_policy("a:*:e", "a:bcd:e", flag));
+  EXPECT_FALSE(match_policy("a:*", "a:b:c", flag)); // cannot span segments
+}
+
+TEST(MatchPolicy, String)
+{
+  constexpr auto flag = MATCH_POLICY_STRING;
+  EXPECT_TRUE(match_policy("a:b:c", "a:b:c", flag));
+  EXPECT_FALSE(match_policy("a:b:c", "A:B:C", flag)); // case sensitive
+  EXPECT_TRUE(match_policy("a:*:e", "a:bcd:e", flag));
+  EXPECT_FALSE(match_policy("a:*", "a:b:c", flag)); // cannot span segments
+}
index ea241cceef2ae9b66842a1573b4ad43851ad22fb..ed19c63bc3c7da87dfb9ee6fa043d95683f1dc0b 100644 (file)
@@ -100,4 +100,7 @@ if(WITH_RBD)
   if(LINUX)
     add_subdirectory(rbd_nbd)
   endif()
+  if(FREEBSD)
+    add_subdirectory(rbd_ggate)
+  endif()
 endif(WITH_RBD)
index d17a74dbcbb0ec6c18864361365a84d75b5099b9..5853f6a04608b022817f71215e839ff5e6640021 100644 (file)
@@ -328,7 +328,9 @@ int get_log(ObjectStore *fs, __u8 struct_ver,
     PGLog::read_log_and_missing(fs, coll,
                    struct_ver >= 8 ? coll : coll_t::meta(),
                    struct_ver >= 8 ? pgid.make_pgmeta_oid() : log_oid,
-                   info, log, missing, oss,
+                   info, log, missing,
+                               struct_ver < 9,
+                               oss,
                    g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
     if (debug && oss.str().size())
       cerr << oss.str() << std::endl;
index 73054c335ff4520f5d72d93748709334829b1c01..9c94a7d1442c169b96e2645ff53108e994075131 100644 (file)
@@ -63,7 +63,7 @@ int Dumper::recover_journal(Journaler *journaler)
   lock.Lock();
   journaler->recover(&cond);
   lock.Unlock();
-  int const r = cond.wait();
+  const int r = cond.wait();
 
   if (r < 0) { // Error
     derr << "error on recovery: " << cpp_strerror(r) << dendl;
index eb0218df294d63c91f06988881265c76dda2acf7..e6aaa05e11e24349b981f077e28084dddbcce5ec 100644 (file)
@@ -35,7 +35,7 @@ class JournalScanner
   librados::IoCtx &io;
 
   // Input constraints
-  int const rank;
+  const int rank;
   JournalFilter const filter;
 
   void gap_advance();
index ad4005ebf0aec88c07037d5cbf422f7d5f09ac02..7aa42e9efdbdb6d671b5a51bae18cc6738bc7048 100644 (file)
@@ -15,7 +15,6 @@ set(rbd_srcs
   action/Export.cc
   action/Feature.cc
   action/Flatten.cc
-  action/Group.cc
   action/ImageMeta.cc
   action/Import.cc
   action/Info.cc
@@ -36,6 +35,10 @@ set(rbd_srcs
   action/Status.cc
   action/Trash.cc
   action/Watch.cc)
+if(FREEBSD)
+  list(APPEND rbd_srcs action/Ggate.cc)
+endif()
+
 add_executable(rbd ${rbd_srcs}
   $<TARGET_OBJECTS:common_texttable_obj>)
 set_target_properties(rbd PROPERTIES OUTPUT_NAME rbd)
diff --git a/ceph/src/tools/rbd/action/Ggate.cc b/ceph/src/tools/rbd/action/Ggate.cc
new file mode 100644 (file)
index 0000000..a87751b
--- /dev/null
@@ -0,0 +1,187 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/param.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "include/stringify.h"
+#include "common/SubProcess.h"
+
+#include "tools/rbd/ArgumentTypes.h"
+#include "tools/rbd/Shell.h"
+#include "tools/rbd/Utils.h"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/scope_exit.hpp>
+#include <boost/program_options.hpp>
+
+#include <iostream>
+
+namespace rbd {
+namespace action {
+namespace ggate {
+
+namespace at = argument_types;
+namespace po = boost::program_options;
+
+static int call_ggate_cmd(const po::variables_map &vm,
+                          const std::vector<const char*> &args)
+{
+  SubProcess process("rbd-ggate", SubProcess::KEEP, SubProcess::KEEP,
+                     SubProcess::KEEP);
+
+  if (vm.count("conf")) {
+    process.add_cmd_arg("--conf");
+    process.add_cmd_arg(vm["conf"].as<std::string>().c_str());
+  }
+  if (vm.count("cluster")) {
+    process.add_cmd_arg("--cluster");
+    process.add_cmd_arg(vm["cluster"].as<std::string>().c_str());
+  }
+  if (vm.count("id")) {
+    process.add_cmd_arg("--id");
+    process.add_cmd_arg(vm["id"].as<std::string>().c_str());
+  }
+  if (vm.count("name")) {
+    process.add_cmd_arg("--name");
+    process.add_cmd_arg(vm["name"].as<std::string>().c_str());
+  }
+  if (vm.count("mon_host")) {
+    process.add_cmd_arg("--mon_host");
+    process.add_cmd_arg(vm["mon_host"].as<std::string>().c_str());
+  }
+  if (vm.count("keyfile")) {
+    process.add_cmd_arg("--keyfile");
+    process.add_cmd_arg(vm["keyfile"].as<std::string>().c_str());
+  }
+  if (vm.count("keyring")) {
+    process.add_cmd_arg("--keyring");
+    process.add_cmd_arg(vm["keyring"].as<std::string>().c_str());
+  }
+
+  for (std::vector<const char*>::const_iterator p = args.begin();
+       p != args.end(); ++p)
+    process.add_cmd_arg(*p);
+
+  if (process.spawn()) {
+    std::cerr << "rbd: failed to run rbd-ggate: " << process.err() << std::endl;
+    return -EINVAL;
+  } else if (process.join()) {
+    std::cerr << "rbd: rbd-ggate failed with error: " << process.err()
+              << std::endl;
+    return -EINVAL;
+  }
+
+  return 0;
+}
+
+void get_list_arguments(po::options_description *positional,
+                        po::options_description *options)
+{ }
+
+int execute_list(const po::variables_map &vm)
+{
+  std::vector<const char*> args;
+
+  args.push_back("list");
+
+  return call_ggate_cmd(vm, args);
+}
+
+void get_map_arguments(po::options_description *positional,
+                       po::options_description *options)
+{
+  at::add_image_or_snap_spec_options(positional, options,
+                                     at::ARGUMENT_MODIFIER_NONE);
+  options->add_options()
+    ("read-only", po::bool_switch(), "map read-only")
+    ("exclusive", po::bool_switch(), "forbid writes by other clients")
+    ("device", po::value<std::string>(), "specify ggate device");
+}
+
+int execute_map(const po::variables_map &vm)
+{
+  size_t arg_index = 0;
+  std::string pool_name;
+  std::string image_name;
+  std::string snap_name;
+  int r = utils::get_pool_image_snapshot_names(
+    vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &image_name,
+    &snap_name, utils::SNAPSHOT_PRESENCE_PERMITTED,
+    utils::SPEC_VALIDATION_NONE);
+  if (r < 0) {
+    return r;
+  }
+
+  std::vector<const char*> args;
+
+  args.push_back("map");
+  std::string img;
+  img.append(pool_name);
+  img.append("/");
+  img.append(image_name);
+  if (!snap_name.empty()) {
+    img.append("@");
+    img.append(snap_name);
+  }
+  args.push_back(img.c_str());
+
+  if (vm["read-only"].as<bool>())
+    args.push_back("--read-only");
+
+  if (vm["exclusive"].as<bool>())
+    args.push_back("--exclusive");
+
+  if (vm.count("device")) {
+    args.push_back("--device");
+    args.push_back(vm["device"].as<std::string>().c_str());
+  }
+
+  return call_ggate_cmd(vm, args);
+}
+
+void get_unmap_arguments(po::options_description *positional,
+                         po::options_description *options)
+{
+  positional->add_options()
+    ("device-spec", "specify ggate device");
+}
+
+int execute_unmap(const po::variables_map &vm)
+{
+  std::string device_name = utils::get_positional_argument(vm, 0);
+  if (!boost::starts_with(device_name, "/dev/")) {
+    device_name.clear();
+  }
+
+  if (device_name.empty()) {
+    std::cerr << "rbd: ggate unmap requires device path" << std::endl;
+    return -EINVAL;
+  }
+
+  std::vector<const char*> args;
+
+  args.push_back("unmap");
+  args.push_back(device_name.c_str());
+
+  return call_ggate_cmd(vm, args);
+}
+
+Shell::SwitchArguments switched_arguments({"read-only", "exclusive"});
+
+Shell::Action action_list(
+  {"ggate", "list"}, {"ggate", "ls"}, "List mapped ggate devices.", "",
+  &get_list_arguments, &execute_list);
+
+Shell::Action action_map(
+  {"ggate", "map"}, {}, "Map an image to a ggate device.", "",
+  &get_map_arguments, &execute_map);
+
+Shell::Action action_unmap(
+  {"ggate", "unmap"}, {}, "Unmap a ggate device.", "",
+  &get_unmap_arguments, &execute_unmap);
+
+} // namespace ggate
+} // namespace action
+} // namespace rbd
diff --git a/ceph/src/tools/rbd/action/Group.cc b/ceph/src/tools/rbd/action/Group.cc
deleted file mode 100644 (file)
index 40ebd4d..0000000
+++ /dev/null
@@ -1,401 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <iostream>
-
-#include "tools/rbd/ArgumentTypes.h"
-#include "tools/rbd/Shell.h"
-#include "tools/rbd/Utils.h"
-#include "include/rbd_types.h"
-#include "cls/rbd/cls_rbd_types.h"
-#include "common/errno.h"
-#include "common/Formatter.h"
-
-namespace rbd {
-namespace action {
-namespace consgrp {
-
-namespace at = argument_types;
-namespace po = boost::program_options;
-
-int execute_create(const po::variables_map &vm) {
-  size_t arg_index = 0;
-
-  std::string group_name;
-  std::string pool_name;
-
-  int r = utils::get_pool_group_names(vm, at::ARGUMENT_MODIFIER_NONE,
-                                      &arg_index, &pool_name, &group_name);
-  if (r < 0) {
-    return r;
-  }
-
-  librados::Rados rados;
-  librados::IoCtx io_ctx;
-
-  r = utils::init(pool_name, &rados, &io_ctx);
-  if (r < 0) {
-    return r;
-  }
-  librbd::RBD rbd;
-  r = rbd.group_create(io_ctx, group_name.c_str());
-  if (r < 0) {
-    std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  return 0;
-}
-
-int execute_list(const po::variables_map &vm) {
-
-  size_t arg_index = 0;
-  std::string pool_name = utils::get_pool_name(vm, &arg_index);
-
-  at::Format::Formatter formatter;
-  int r = utils::get_formatter(vm, &formatter);
-  if (r < 0) {
-    return r;
-  }
-  Formatter *f = formatter.get();
-
-  librados::Rados rados;
-  librados::IoCtx io_ctx;
-  r = utils::init(pool_name, &rados, &io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librbd::RBD rbd;
-  std::vector<std::string> names;
-  r = rbd.group_list(io_ctx, &names);
-  if (r < 0)
-    return r;
-
-  if (f)
-    f->open_array_section("consistency_groups");
-  for (auto i : names) {
-    if (f)
-      f->dump_string("name", i);
-    else
-      std::cout << i << std::endl;
-  }
-  if (f) {
-    f->close_section();
-    f->flush(std::cout);
-  }
-
-  return 0;
-}
-
-int execute_remove(const po::variables_map &vm) {
-  size_t arg_index = 0;
-
-  std::string group_name;
-  std::string pool_name;
-
-  int r = utils::get_pool_group_names(vm, at::ARGUMENT_MODIFIER_NONE,
-                                      &arg_index, &pool_name, &group_name);
-  if (r < 0) {
-    return r;
-  }
-
-  librados::Rados rados;
-  librados::IoCtx io_ctx;
-
-  r = utils::init(pool_name, &rados, &io_ctx);
-  if (r < 0) {
-    return r;
-  }
-  librbd::RBD rbd;
-
-  r = rbd.group_remove(io_ctx, group_name.c_str());
-  if (r < 0) {
-    std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  return 0;
-}
-
-int execute_add(const po::variables_map &vm) {
-  size_t arg_index = 0;
-  // Parse group data.
-  std::string group_name;
-  std::string group_pool_name;
-
-  int r = utils::get_special_pool_group_names(vm, &arg_index,
-                                             &group_pool_name,
-                                             &group_name);
-  if (r < 0) {
-    std::cerr << "rbd: image add error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  std::string image_name;
-  std::string image_pool_name;
-
-  r = utils::get_special_pool_image_names(vm, &arg_index,
-                                         &image_pool_name,
-                                         &image_name);
-
-  if (r < 0) {
-    std::cerr << "rbd: image add error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  librados::Rados rados;
-
-  librados::IoCtx cg_io_ctx;
-  r = utils::init(group_pool_name, &rados, &cg_io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librados::IoCtx image_io_ctx;
-  r = utils::init(image_pool_name, &rados, &image_io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librbd::RBD rbd;
-  r = rbd.group_image_add(cg_io_ctx, group_name.c_str(),
-                         image_io_ctx, image_name.c_str());
-  if (r < 0) {
-    std::cerr << "rbd: add image error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  return 0;
-}
-
-int execute_remove_image(const po::variables_map &vm) {
-  size_t arg_index = 0;
-
-  std::string group_name;
-  std::string group_pool_name;
-
-  int r = utils::get_special_pool_group_names(vm, &arg_index,
-                                             &group_pool_name,
-                                             &group_name);
-  if (r < 0) {
-    std::cerr << "rbd: image remove error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  std::string image_name;
-  std::string image_pool_name;
-  std::string image_id;
-
-  if (vm.count(at::IMAGE_ID)) {
-    image_id = vm[at::IMAGE_ID].as<std::string>();
-  }
-
-  bool has_image_spec = utils::check_if_image_spec_present(
-      vm, at::ARGUMENT_MODIFIER_NONE, arg_index);
-
-  if (!image_id.empty() && has_image_spec) {
-    std::cerr << "rbd: trying to access image using both name and id. "
-              << std::endl;
-    return -EINVAL;
-  }
-
-  if (image_id.empty()) {
-    r = utils::get_special_pool_image_names(vm, &arg_index, &image_pool_name,
-                                            &image_name);
-  } else {
-    image_pool_name = utils::get_pool_name(vm, &arg_index);
-  }
-
-  if (r < 0) {
-    std::cerr << "rbd: image remove error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  librados::Rados rados;
-
-  librados::IoCtx cg_io_ctx;
-  r = utils::init(group_pool_name, &rados, &cg_io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librados::IoCtx image_io_ctx;
-  r = utils::init(image_pool_name, &rados, &image_io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librbd::RBD rbd;
-  if (image_id.empty()) {
-    r = rbd.group_image_remove(cg_io_ctx, group_name.c_str(),
-                               image_io_ctx, image_name.c_str());
-  } else {
-    r = rbd.group_image_remove_by_id(cg_io_ctx, group_name.c_str(),
-                                     image_io_ctx, image_id.c_str());
-  }
-  if (r < 0) {
-    std::cerr << "rbd: remove image error: " << cpp_strerror(r) << std::endl;
-    return r;
-  }
-
-  return 0;
-}
-
-int execute_list_images(const po::variables_map &vm) {
-  size_t arg_index = 0;
-  std::string group_name;
-  std::string pool_name;
-
-  int r = utils::get_pool_group_names(vm, at::ARGUMENT_MODIFIER_NONE,
-                                      &arg_index, &pool_name, &group_name);
-  if (r < 0) {
-    return r;
-  }
-
-  if (group_name.empty()) {
-    std::cerr << "rbd: "
-              << "consistency group name was not specified" << std::endl;
-    return -EINVAL;
-  }
-
-  at::Format::Formatter formatter;
-  r = utils::get_formatter(vm, &formatter);
-  if (r < 0) {
-    return r;
-  }
-  Formatter *f = formatter.get();
-
-  librados::Rados rados;
-  librados::IoCtx io_ctx;
-  r = utils::init(pool_name, &rados, &io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librbd::RBD rbd;
-  std::vector<librbd::group_image_status_t> images;
-
-  r = rbd.group_image_list(io_ctx, group_name.c_str(), &images);
-
-  if (r == -ENOENT)
-    r = 0;
-
-  if (r < 0)
-    return r;
-
-  if (f)
-    f->open_array_section("consistency_groups");
-
-  for (auto i : images) {
-    std::string image_name = i.name;
-    int64_t pool_id = i.pool;
-    int state = i.state;
-    std::string state_string;
-    if (cls::rbd::GROUP_IMAGE_LINK_STATE_INCOMPLETE == state) {
-      state_string = "incomplete";
-    }
-    if (f) {
-      f->dump_string("image name", image_name);
-      f->dump_int("pool id", pool_id);
-      f->dump_int("state", state);
-    } else
-      std::cout << pool_id << "." << image_name << " " << state_string << std::endl;
-  }
-
-  if (f) {
-    f->close_section();
-    f->flush(std::cout);
-  }
-
-  return 0;
-}
-
-void get_create_arguments(po::options_description *positional,
-                          po::options_description *options) {
-  at::add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
-}
-
-void get_remove_arguments(po::options_description *positional,
-                          po::options_description *options) {
-  at::add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
-}
-
-void get_list_arguments(po::options_description *positional,
-                        po::options_description *options) {
-  add_pool_option(options, at::ARGUMENT_MODIFIER_NONE);
-  at::add_format_options(options);
-}
-
-void get_add_arguments(po::options_description *positional,
-                       po::options_description *options) {
-  positional->add_options()
-    (at::GROUP_SPEC.c_str(),
-     "group specification\n"
-     "(example: [<pool-name>/]<group-name>)");
-
-  at::add_special_pool_option(options, "group");
-  at::add_group_option(options, at::ARGUMENT_MODIFIER_NONE);
-
-  positional->add_options()
-    (at::IMAGE_SPEC.c_str(),
-     "image specification\n"
-     "(example: [<pool-name>/]<image-name>)");
-
-  at::add_special_pool_option(options, "image");
-  at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
-
-  at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE,
-              " unless overridden");
-}
-
-void get_remove_image_arguments(po::options_description *positional,
-                                po::options_description *options) {
-  positional->add_options()
-    (at::GROUP_SPEC.c_str(),
-     "group specification\n"
-     "(example: [<pool-name>/]<group-name>)");
-
-  at::add_special_pool_option(options, "group");
-  at::add_group_option(options, at::ARGUMENT_MODIFIER_NONE);
-
-  positional->add_options()
-    (at::IMAGE_SPEC.c_str(),
-     "image specification\n"
-     "(example: [<pool-name>/]<image-name>)");
-
-  at::add_special_pool_option(options, "image");
-  at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE);
-
-  at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE,
-              " unless overridden");
-  at::add_image_id_option(options);
-}
-
-void get_list_images_arguments(po::options_description *positional,
-                               po::options_description *options) {
-  at::add_format_options(options);
-  at::add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE);
-}
-
-Shell::Action action_create(
-  {"group", "create"}, {}, "Create a consistency group.",
-  "", &get_create_arguments, &execute_create);
-Shell::Action action_remove(
-  {"group", "remove"}, {"group", "rm"}, "Delete a consistency group.",
-  "", &get_remove_arguments, &execute_remove);
-Shell::Action action_list(
-  {"group", "list"}, {"group", "ls"}, "List rbd consistency groups.",
-  "", &get_list_arguments, &execute_list);
-Shell::Action action_add(
-  {"group", "image", "add"}, {}, "Add an image to a consistency group.",
-  "", &get_add_arguments, &execute_add);
-Shell::Action action_remove_image(
-  {"group", "image", "remove"}, {}, "Remove an image from a consistency group.",
-  "", &get_remove_image_arguments, &execute_remove_image);
-Shell::Action action_list_images(
-  {"group", "image", "list"}, {}, "List images in a consistency group.",
-  "", &get_list_images_arguments, &execute_list_images);
-} // namespace group
-} // namespace action
-} // namespace rbd
index d3147ab0e9ca95b7a622798f11a7133cc73d5363..d6dec85f6a99b9c14cab4ee2db58ce94baed211a 100644 (file)
@@ -97,9 +97,12 @@ static int do_metadata_set(librbd::Image& image, const char *key,
 static int do_metadata_remove(librbd::Image& image, const char *key)
 {
   int r = image.metadata_remove(key);
-  if (r < 0) {
-    std::cerr << "failed to remove metadata " << key << " of image : "
-              << cpp_strerror(r) << std::endl;
+  if (r == -ENOENT) {
+      std::cerr << "rbd: no existing metadata key " << key << " of image : "
+                << cpp_strerror(r) << std::endl;
+  } else if(r < 0) {
+      std::cerr << "failed to remove metadata " << key << " of image : "
+                << cpp_strerror(r) << std::endl;
   }
   return r;
 }
index d415c9a5859603e7c0e501b301aa626bf5cf7f4e..7944c0cf74ac34d9711f854edd92a9f75fa0230b 100644 (file)
@@ -130,16 +130,6 @@ static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image,
 
   std::string prefix = image.get_block_name_prefix();
 
-  librbd::group_spec_t group_spec;
-  r = image.get_group(&group_spec);
-  if (r < 0) {
-    return r;
-  }
-
-  std::string group_string = "";
-  if (-1 != group_spec.pool)
-    group_string = stringify(group_spec.pool) + "." + group_spec.name;
-
   struct timespec create_timestamp;
   image.get_create_timestamp(&create_timestamp);
 
@@ -189,15 +179,6 @@ static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image,
     format_flags(f, flags);
   }
 
-  if (!group_string.empty()) {
-    if (f) {
-      f->dump_string("group", group_string);
-    } else {
-      std::cout << "\tconsistency group: " << group_string
-               << std::endl;
-    }
-  }
-
   if (!create_timestamp_str.empty()) {
     if (f) {
       f->dump_string("create_timestamp", create_timestamp_str);
index 53bebd83d8c2f802372dec646be0dcd28190a8fa..4c4babe99489d57a057046cc358603da7e55ad0e 100644 (file)
@@ -4,6 +4,7 @@
 #include "tools/rbd/ArgumentTypes.h"
 #include "tools/rbd/Shell.h"
 #include "tools/rbd/Utils.h"
+#include "include/Context.h"
 #include "include/stringify.h"
 #include "include/types.h"
 #include "common/errno.h"
 #include "common/TextTable.h"
 #include <iostream>
 #include <boost/program_options.hpp>
+#include "global/global_context.h"
 
 namespace rbd {
+
 namespace action {
 namespace list {
 
 namespace at = argument_types;
 namespace po = boost::program_options;
 
-int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool lflag,
-                   Formatter *f) {
+enum WorkerState {
+  STATE_IDLE = 0,
+  STATE_OPENED,
+  STATE_DONE
+} ;
+
+struct WorkerEntry {
+  librbd::Image img;
+  librbd::RBD::AioCompletion* completion;
+  WorkerState state;
+  string name;
+
+  WorkerEntry() {
+    state = STATE_IDLE;
+    completion = nullptr;
+  }
+};
+
+
+int list_process_image(librados::Rados* rados, WorkerEntry* w, bool lflag, Formatter *f, TextTable &tbl)
+{
+  int r = 0;
+  librbd::image_info_t info;
+  std::string pool, image, snap, parent;
+
+  // handle second-nth trips through loop
+  r = w->img.parent_info(&pool, &image, &snap);
+  if (r < 0 && r != -ENOENT)
+    return r;
+  bool has_parent = false;
+  if (r != -ENOENT) {
+    parent = pool + "/" + image + "@" + snap;
+    has_parent = true;
+  }
+
+  if (w->img.stat(info, sizeof(info)) < 0) {
+    return -EINVAL;
+  }
+
+  uint8_t old_format;
+  w->img.old_format(&old_format);
+
+  std::list<librbd::locker_t> lockers;
+  bool exclusive;
+  r = w->img.list_lockers(&lockers, &exclusive, NULL);
+  if (r < 0)
+    return r;
+  std::string lockstr;
+  if (!lockers.empty()) {
+    lockstr = (exclusive) ? "excl" : "shr";
+  }
+
+  if (f) {
+    f->open_object_section("image");
+    f->dump_string("image", w->name);
+    f->dump_unsigned("size", info.size);
+    if (has_parent) {
+      f->open_object_section("parent");
+      f->dump_string("pool", pool);
+      f->dump_string("image", image);
+      f->dump_string("snapshot", snap);
+      f->close_section();
+    }
+    f->dump_int("format", old_format ? 1 : 2);
+    if (!lockers.empty())
+      f->dump_string("lock_type", exclusive ? "exclusive" : "shared");
+    f->close_section();
+  } else {
+    tbl << w->name
+        << stringify(si_t(info.size))
+        << parent
+        << ((old_format) ? '1' : '2')
+        << ""                         // protect doesn't apply to images
+        << lockstr
+        << TextTable::endrow;
+  }
+
+  std::vector<librbd::snap_info_t> snaplist;
+  if (w->img.snap_list(snaplist) >= 0 && !snaplist.empty()) {
+    for (std::vector<librbd::snap_info_t>::iterator s = snaplist.begin();
+         s != snaplist.end(); ++s) {
+      bool is_protected;
+      bool has_parent = false;
+      parent.clear();
+      w->img.snap_set(s->name.c_str());
+      r = w->img.snap_is_protected(s->name.c_str(), &is_protected);
+      if (r < 0)
+        return r;
+      if (w->img.parent_info(&pool, &image, &snap) >= 0) {
+        parent = pool + "/" + image + "@" + snap;
+        has_parent = true;
+      }
+      if (f) {
+        f->open_object_section("snapshot");
+        f->dump_string("image", w->name);
+        f->dump_string("snapshot", s->name);
+        f->dump_unsigned("size", s->size);
+        if (has_parent) {
+          f->open_object_section("parent");
+          f->dump_string("pool", pool);
+          f->dump_string("image", image);
+          f->dump_string("snapshot", snap);
+          f->close_section();
+        }
+        f->dump_int("format", old_format ? 1 : 2);
+        f->dump_string("protected", is_protected ? "true" : "false");
+        f->close_section();
+      } else {
+        tbl << w->name + "@" + s->name
+            << stringify(si_t(s->size))
+            << parent
+            << ((old_format) ? '1' : '2')
+            << (is_protected ? "yes" : "")
+            << ""                     // locks don't apply to snaps
+            << TextTable::endrow;
+      }
+    }
+  }
+
+  return r < 0 ? r : 0;
+}
+
+int do_list(std::string &pool_name, bool lflag, int threads, Formatter *f) {
+  std::vector<WorkerEntry*> workers;
   std::vector<std::string> names;
-  int r = rbd.list(io_ctx, names);
+  librados::Rados rados;
+  librbd::RBD rbd;
+  librados::IoCtx ioctx;
+
+  if (threads < 1) {
+    threads = 1;
+  }
+  if (threads > 32) {
+    threads = 32;
+  }
+
+  int r = utils::init(pool_name, &rados, &ioctx);
+  if (r < 0) {
+    return r;
+  }
+
+  r = rbd.list(ioctx, names);
   if (r < 0)
     return r;
 
@@ -32,9 +173,9 @@ int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool lflag,
     for (std::vector<std::string>::const_iterator i = names.begin();
        i != names.end(); ++i) {
        if (f)
-         f->dump_string("name", *i);
+        f->dump_string("name", *i);
        else
-         std::cout << *i << std::endl;
+        std::cout << *i << std::endl;
     }
     if (f) {
       f->close_section();
@@ -56,125 +197,65 @@ int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool lflag,
     tbl.define_column("LOCK", TextTable::LEFT, TextTable::LEFT);
   }
 
-  std::string pool, image, snap, parent;
-
-  for (std::vector<std::string>::const_iterator i = names.begin();
-       i != names.end(); ++i) {
-    librbd::image_info_t info;
-    librbd::Image im;
-
-    r = rbd.open_read_only(io_ctx, im, i->c_str(), NULL);
-    // image might disappear between rbd.list() and rbd.open(); ignore
-    // that, warn about other possible errors (EPERM, say, for opening
-    // an old-format image, because you need execute permission for the
-    // class method)
-    if (r < 0) {
-      if (r != -ENOENT) {
-        std::cerr << "rbd: error opening " << *i << ": " << cpp_strerror(r)
-                  << std::endl;
-      }
-      // in any event, continue to next image
-      continue;
-    }
-
-    // handle second-nth trips through loop
-    parent.clear();
-    r = im.parent_info(&pool, &image, &snap);
-    if (r < 0 && r != -ENOENT)
-      goto out;
-    bool has_parent = false;
-    if (r != -ENOENT) {
-      parent = pool + "/" + image + "@" + snap;
-      has_parent = true;
-    }
-
-    if (im.stat(info, sizeof(info)) < 0) {
-      r = -EINVAL;
-      goto out;
-    }
-
-    uint8_t old_format;
-    im.old_format(&old_format);
-
-    std::list<librbd::locker_t> lockers;
-    bool exclusive;
-    r = im.list_lockers(&lockers, &exclusive, NULL);
-    if (r < 0)
-      goto out;
-    std::string lockstr;
-    if (!lockers.empty()) {
-      lockstr = (exclusive) ? "excl" : "shr";
-    }
+  for (int left = 0; left < std::min(threads, (int)names.size()); left++) {
+    workers.push_back(new WorkerEntry());
+  }
 
-    if (f) {
-      f->open_object_section("image");
-      f->dump_string("image", *i);
-      f->dump_unsigned("size", info.size);
-      if (has_parent) {
-        f->open_object_section("parent");
-        f->dump_string("pool", pool);
-        f->dump_string("image", image);
-        f->dump_string("snapshot", snap);
-        f->close_section();
+  auto i = names.begin();
+  while (true) {
+    size_t workers_idle = 0;
+    for (auto comp : workers) {
+      switch (comp->state) {
+       case STATE_DONE:
+         comp->completion->wait_for_complete();
+         comp->state = STATE_IDLE;
+         comp->completion->release();
+         comp->completion = nullptr;
+         // we want it to fall through in this case
+       case STATE_IDLE:
+         if (i == names.end()) {
+           workers_idle++;
+           continue;
+         }
+         comp->name = *i;
+         comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr);
+         r = rbd.aio_open_read_only(ioctx, comp->img, i->c_str(), NULL, comp->completion);
+         i++;
+         comp->state = STATE_OPENED;
+         break;
+       case STATE_OPENED:
+         comp->completion->wait_for_complete();
+         // image might disappear between rbd.list() and rbd.open(); ignore
+         // that, warn about other possible errors (EPERM, say, for opening
+         // an old-format image, because you need execute permission for the
+         // class method)
+         r = comp->completion->get_return_value();
+         comp->completion->release();
+         if (r < 0) {
+           if (r != -ENOENT) {
+             std::cerr << "rbd: error opening " << *i << ": " << cpp_strerror(r)
+                       << std::endl;
+           }
+           // in any event, continue to next image
+           comp->state = STATE_IDLE;
+           continue;
+         }
+         r = list_process_image(&rados, comp, lflag, f, tbl);
+         if (r < 0) {
+             std::cerr << "rbd: error processing image  " << comp->name << ": " << cpp_strerror(r)
+                       << std::endl;
+         }
+         comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr);
+         r = comp->img.aio_close(comp->completion);
+         comp->state = STATE_DONE;
+         break;
       }
-      f->dump_int("format", old_format ? 1 : 2);
-      if (!lockers.empty())
-        f->dump_string("lock_type", exclusive ? "exclusive" : "shared");
-      f->close_section();
-    } else {
-      tbl << *i
-          << stringify(si_t(info.size))
-          << parent
-          << ((old_format) ? '1' : '2')
-          << ""                         // protect doesn't apply to images
-          << lockstr
-          << TextTable::endrow;
     }
-
-    std::vector<librbd::snap_info_t> snaplist;
-    if (im.snap_list(snaplist) >= 0 && !snaplist.empty()) {
-      for (std::vector<librbd::snap_info_t>::iterator s = snaplist.begin();
-           s != snaplist.end(); ++s) {
-        bool is_protected;
-        bool has_parent = false;
-        parent.clear();
-        im.snap_set(s->name.c_str());
-        r = im.snap_is_protected(s->name.c_str(), &is_protected);
-        if (r < 0)
-          goto out;
-        if (im.parent_info(&pool, &image, &snap) >= 0) {
-          parent = pool + "/" + image + "@" + snap;
-          has_parent = true;
-        }
-        if (f) {
-          f->open_object_section("snapshot");
-          f->dump_string("image", *i);
-          f->dump_string("snapshot", s->name);
-          f->dump_unsigned("size", s->size);
-          if (has_parent) {
-            f->open_object_section("parent");
-            f->dump_string("pool", pool);
-            f->dump_string("image", image);
-            f->dump_string("snapshot", snap);
-            f->close_section();
-          }
-          f->dump_int("format", old_format ? 1 : 2);
-          f->dump_string("protected", is_protected ? "true" : "false");
-          f->close_section();
-        } else {
-          tbl << *i + "@" + s->name
-              << stringify(si_t(s->size))
-              << parent
-              << ((old_format) ? '1' : '2')
-              << (is_protected ? "yes" : "")
-              << ""                     // locks don't apply to snaps
-              << TextTable::endrow;
-        }
-      }
+    if (workers_idle == workers.size()) {
+       break;
     }
   }
 
-out:
   if (f) {
     f->close_section();
     f->flush(std::cout);
@@ -182,6 +263,12 @@ out:
     std::cout << tbl;
   }
 
+  rados.shutdown();
+
+  for (auto comp : workers) {
+    delete comp;
+  }
+
   return r < 0 ? r : 0;
 }
 
@@ -203,15 +290,7 @@ int execute(const po::variables_map &vm) {
     return r;
   }
 
-  librados::Rados rados;
-  librados::IoCtx io_ctx;
-  r = utils::init(pool_name, &rados, &io_ctx);
-  if (r < 0) {
-    return r;
-  }
-
-  librbd::RBD rbd;
-  r = do_list(rbd, io_ctx, vm["long"].as<bool>(), formatter.get());
+  r = do_list(pool_name, vm["long"].as<bool>(), g_conf->rbd_concurrent_management_ops, formatter.get());
   if (r < 0) {
     std::cerr << "rbd: list: " << cpp_strerror(r) << std::endl;
     return r;
index b407534252b95d94b69cf428aad89e795cceeb21..10e601499f70ec3f215f1426542eb1a874eb0ab7 100644 (file)
@@ -70,23 +70,6 @@ int execute(const po::variables_map &vm) {
                 << "it crashed. Try again after closing/unmapping it or "
                 << "waiting 30s for the crashed client to timeout."
                 << std::endl;
-    } else if (r == -EMLINK) {
-      librbd::Image image;
-      int image_r = utils::open_image(io_ctx, image_name, true, &image);
-      librbd::group_spec_t group_spec;
-      if (image_r == 0) {
-       image_r = image.get_group(&group_spec);
-      }
-      if (image_r == 0)
-       std::cerr << "rbd: error: image belongs to a consistency group "
-                 << group_spec.pool << "." << group_spec.name;
-      else
-       std::cerr << "rbd: error: image belongs to a consistency group";
-
-      std::cerr << std::endl
-               << "Remove the image from the consistency group and try again."
-               << std::endl;
-      image.close();
     } else {
       std::cerr << "rbd: delete error: " << cpp_strerror(r) << std::endl;
     }
diff --git a/ceph/src/tools/rbd_ggate/CMakeLists.txt b/ceph/src/tools/rbd_ggate/CMakeLists.txt
new file mode 100644 (file)
index 0000000..5c5572c
--- /dev/null
@@ -0,0 +1,9 @@
+add_executable(rbd-ggate
+  Driver.cc
+  Server.cc
+  Watcher.cc
+  debug.cc
+  ggate_drv.c
+  main.cc)
+target_link_libraries(rbd-ggate geom librbd librados global)
+install(TARGETS rbd-ggate DESTINATION bin)
diff --git a/ceph/src/tools/rbd_ggate/Driver.cc b/ceph/src/tools/rbd_ggate/Driver.cc
new file mode 100644 (file)
index 0000000..cf63fc6
--- /dev/null
@@ -0,0 +1,166 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <stdlib.h>
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Driver.h"
+#include "Request.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate::Driver: " << this \
+                           << " " << __func__ << ": "
+
+namespace rbd {
+namespace ggate {
+
+int Driver::load() {
+
+  return ggate_drv_load();
+}
+
+int Driver::kill(const std::string &devname) {
+
+  int r = ggate_drv_kill(devname.c_str());
+
+  return r;
+}
+
+int Driver::list(std::list<std::string> &devs) {
+  size_t size = 1024;
+  char **devs_ = nullptr;
+  int r;
+
+  while (size <= 1024 * 1024) {
+    devs_ = static_cast<char **>(
+        realloc(static_cast<void *>(devs_), size * sizeof(*devs_)));
+    r = ggate_drv_list(devs_, &size);
+    if (r != -ERANGE) {
+      break;
+    }
+    size *= 2;
+  }
+  if (r < 0) {
+    goto free;
+  }
+
+  devs.clear();
+  for (size_t i = 0; i < size; i++) {
+    devs.push_back(devs_[i]);
+  }
+
+  ggate_drv_list_free(devs_, size);
+free:
+  free(devs_);
+
+  return r;
+}
+
+Driver::Driver(const std::string &devname, size_t sectorsize, size_t mediasize,
+               bool readonly, const std::string &info)
+  : m_devname(devname), m_sectorsize(sectorsize), m_mediasize(mediasize),
+    m_readonly(readonly), m_info(info) {
+}
+
+int Driver::init() {
+  dout(20) << dendl;
+
+  char name[PATH_MAX];
+  size_t namelen;
+
+  if (m_devname.empty()) {
+    name[0] = '\0';
+    namelen = PATH_MAX;
+  } else {
+    namelen = m_devname.size();
+    if (namelen >= PATH_MAX) {
+      return -ENAMETOOLONG;
+    }
+    strncpy(name, m_devname.c_str(), namelen + 1);
+  }
+
+  int r = ggate_drv_create(name, namelen, m_sectorsize, m_mediasize, m_readonly,
+                           m_info.c_str(), &m_drv);
+  if (r < 0) {
+    return r;
+  }
+
+  if (m_devname.empty()) {
+    m_devname = name;
+  }
+
+  return 0;
+}
+
+std::string Driver::get_devname() const {
+  dout(30) << m_devname << dendl;
+
+  return m_devname;
+}
+
+void Driver::shut_down() {
+  dout(20) << dendl;
+
+  ggate_drv_destroy(m_drv);
+}
+
+int Driver::resize(size_t newsize) {
+  dout(20) << "newsize=" << newsize << dendl;
+
+  int r = ggate_drv_resize(m_drv, newsize);
+  if (r < 0) {
+    return r;
+  }
+
+  m_mediasize = newsize;
+  return 0;
+}
+
+int Driver::recv(Request **req) {
+  dout(20) << dendl;
+
+  ggate_drv_req_t req_;
+
+  int r = ggate_drv_recv(m_drv, &req_);
+  if (r < 0) {
+    return r;
+  }
+
+  *req = new Request(req_);
+
+  dout(20) << "req=" << *req << dendl;
+
+  if (ggate_drv_req_cmd(req_) == GGATE_DRV_CMD_WRITE) {
+    bufferptr ptr(buffer::claim_malloc(
+                    ggate_drv_req_length(req_),
+                    static_cast<char *>(ggate_drv_req_release_buf(req_))));
+    (*req)->bl.push_back(ptr);
+  }
+
+  return 0;
+}
+
+int Driver::send(Request *req) {
+  dout(20) << "req=" << req << dendl;
+
+  if (ggate_drv_req_cmd(req->req) == GGATE_DRV_CMD_READ &&
+      ggate_drv_req_error(req->req) == 0) {
+    assert(req->bl.length() == ggate_drv_req_length(req->req));
+    // TODO: avoid copying?
+    req->bl.copy(0, ggate_drv_req_length(req->req),
+                 static_cast<char *>(ggate_drv_req_buf(req->req)));
+    dout(20) << "copied resulting " << req->bl.length() << " bytes to "
+             << ggate_drv_req_buf(req->req) << dendl;
+  }
+
+  int r = ggate_drv_send(m_drv, req->req);
+
+  delete req;
+  return r;
+}
+
+} // namespace ggate
+} // namespace rbd
diff --git a/ceph/src/tools/rbd_ggate/Driver.h b/ceph/src/tools/rbd_ggate/Driver.h
new file mode 100644 (file)
index 0000000..b52b48a
--- /dev/null
@@ -0,0 +1,49 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_DRIVER_H
+#define CEPH_RBD_GGATE_DRIVER_H
+
+#include <list>
+#include <string>
+
+#include "ggate_drv.h"
+
+namespace rbd {
+namespace ggate {
+
+struct Request;
+
+class Driver {
+public:
+  static int load();
+  static int kill(const std::string &devname);
+  static int list(std::list<std::string> &devs);
+
+  Driver(const std::string &devname, size_t sectorsize, size_t mediasize,
+         bool readonly, const std::string &info);
+
+  int init();
+  void shut_down();
+
+  std::string get_devname() const;
+
+  int recv(Request **req);
+  int send(Request *req);
+
+  int resize(size_t newsize);
+
+private:
+  std::string m_devname;
+  size_t m_sectorsize;
+  size_t m_mediasize;
+  bool m_readonly;
+  std::string m_info;
+  ggate_drv_t m_drv = 0;
+};
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_DRIVER_H
+
diff --git a/ceph/src/tools/rbd_ggate/Request.h b/ceph/src/tools/rbd_ggate/Request.h
new file mode 100644 (file)
index 0000000..66f2198
--- /dev/null
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_REQUEST_H
+#define CEPH_RBD_GGATE_REQUEST_H
+
+#include "ggate_drv.h"
+
+namespace rbd {
+namespace ggate {
+
+struct Request {
+  enum Command {
+    Unknown = 0,
+    Write = 1,
+    Read = 2,
+    Flush = 3,
+    Discard = 4,
+  };
+
+  ggate_drv_req_t req;
+  bufferlist bl;
+
+  Request(ggate_drv_req_t req) : req(req) {
+  }
+
+  uint64_t get_id() {
+    return ggate_drv_req_id(req);
+  }
+
+  Command get_cmd() {
+    return static_cast<Command>(ggate_drv_req_cmd(req));
+  }
+
+  size_t get_length() {
+    return ggate_drv_req_length(req);
+  }
+
+  uint64_t get_offset() {
+    return ggate_drv_req_offset(req);
+  }
+
+  uint64_t get_error() {
+    return ggate_drv_req_error(req);
+  }
+
+  void set_error(int error) {
+    ggate_drv_req_set_error(req, error);
+  }
+};
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_REQUEST_H
diff --git a/ceph/src/tools/rbd_ggate/Server.cc b/ceph/src/tools/rbd_ggate/Server.cc
new file mode 100644 (file)
index 0000000..6fde848
--- /dev/null
@@ -0,0 +1,270 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Driver.h"
+#include "Server.h"
+#include "Request.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate::Server: " << this \
+                           << " " << __func__ << ": "
+
+namespace rbd {
+namespace ggate {
+
+Server::Server(Driver *drv, librbd::Image& image)
+  : m_drv(drv), m_image(image), m_lock("rbd::ggate::Server::m_lock"),
+    m_reader_thread(this, &Server::reader_entry),
+    m_writer_thread(this, &Server::writer_entry) {
+}
+
+void Server::run() {
+  dout(10) << dendl;
+
+  int r = start();
+  assert(r == 0);
+
+  dout(20) << "entering run loop" << dendl;
+
+  {
+    Mutex::Locker locker(m_lock);
+    while (!m_stopping) {
+      m_cond.WaitInterval(m_lock, utime_t(1, 0));
+    }
+  }
+
+  dout(20) << "exiting run loop" << dendl;
+
+  stop();
+}
+
+int Server::start() {
+  dout(10) << dendl;
+
+  m_reader_thread.create("rbd_reader");
+  m_writer_thread.create("rbd_writer");
+  return 0;
+}
+
+void Server::stop() {
+  dout(10) << dendl;
+
+  {
+    Mutex::Locker locker(m_lock);
+    assert(m_stopping);
+  }
+
+  m_reader_thread.join();
+  m_writer_thread.join();
+
+  wait_clean();
+}
+
+void Server::io_start(IOContext *ctx) {
+  dout(20) << ctx << dendl;
+
+  Mutex::Locker locker(m_lock);
+  m_io_pending.push_back(&ctx->item);
+}
+
+void Server::io_finish(IOContext *ctx) {
+  dout(20) << ctx << dendl;
+
+  Mutex::Locker locker(m_lock);
+  assert(ctx->item.is_on_list());
+
+  ctx->item.remove_myself();
+  m_io_finished.push_back(&ctx->item);
+  m_cond.Signal();
+}
+
+Server::IOContext *Server::wait_io_finish() {
+  dout(20) << dendl;
+
+  Mutex::Locker locker(m_lock);
+
+  while (m_io_finished.empty() && !m_stopping) {
+    m_cond.Wait(m_lock);
+  }
+
+  if (m_io_finished.empty()) {
+    return nullptr;
+  }
+
+  IOContext *ret = m_io_finished.front();
+  m_io_finished.pop_front();
+
+  return ret;
+}
+
+void Server::wait_clean() {
+  dout(20) << dendl;
+
+  assert(!m_reader_thread.is_started());
+
+  Mutex::Locker locker(m_lock);
+
+  while (!m_io_pending.empty()) {
+    m_cond.Wait(m_lock);
+  }
+
+  while (!m_io_finished.empty()) {
+    ceph::unique_ptr<IOContext> free_ctx(m_io_finished.front());
+    m_io_finished.pop_front();
+  }
+}
+
+void Server::aio_callback(librbd::completion_t cb, void *arg) {
+  librbd::RBD::AioCompletion *aio_completion =
+    reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
+
+  IOContext *ctx = reinterpret_cast<IOContext *>(arg);
+  int r = aio_completion->get_return_value();
+
+  ctx->server->handle_aio(ctx, r);
+  aio_completion->release();
+}
+
+void Server::handle_aio(IOContext *ctx, int r) {
+  dout(20) << ctx << ": r=" << r << dendl;
+
+  if (r == -EINVAL) {
+    // if shrinking an image, a pagecache writeback might reference
+    // extents outside of the range of the new image extents
+    dout(5) << "masking IO out-of-bounds error" << dendl;
+    ctx->req->bl.clear();
+    r = 0;
+  }
+
+  if (r < 0) {
+    ctx->req->set_error(-r);
+  } else if ((ctx->req->get_cmd() == Request::Read) &&
+             r != static_cast<int>(ctx->req->get_length())) {
+    int pad_byte_count = static_cast<int> (ctx->req->get_length()) - r;
+    ctx->req->bl.append_zero(pad_byte_count);
+    dout(20) << ctx << ": pad byte count: " << pad_byte_count << dendl;
+    ctx->req->set_error(0);
+  } else {
+    ctx->req->set_error(0);
+  }
+  io_finish(ctx);
+}
+
+void Server::reader_entry() {
+  dout(20) << dendl;
+
+  while (!m_stopping) {
+    ceph::unique_ptr<IOContext> ctx(new IOContext(this));
+
+    dout(20) << "waiting for ggate request" << dendl;
+
+    int r = m_drv->recv(&ctx->req);
+    if (r < 0) {
+      if (r != -ECANCELED) {
+        derr << "recv: " << cpp_strerror(r) << dendl;
+      }
+      Mutex::Locker locker(m_lock);
+      m_stopping = true;
+      m_cond.Signal();
+      return;
+    }
+
+    IOContext *pctx = ctx.release();
+
+    dout(20) << pctx << ": start: " << *pctx << dendl;
+
+    io_start(pctx);
+    librbd::RBD::AioCompletion *c =
+      new librbd::RBD::AioCompletion(pctx, aio_callback);
+    switch (pctx->req->get_cmd())
+    {
+    case rbd::ggate::Request::Write:
+      m_image.aio_write(pctx->req->get_offset(), pctx->req->get_length(),
+                        pctx->req->bl, c);
+      break;
+    case rbd::ggate::Request::Read:
+      m_image.aio_read(pctx->req->get_offset(), pctx->req->get_length(),
+                       pctx->req->bl, c);
+      break;
+    case rbd::ggate::Request::Flush:
+      m_image.aio_flush(c);
+      break;
+    case rbd::ggate::Request::Discard:
+      m_image.aio_discard(pctx->req->get_offset(), pctx->req->get_length(), c);
+      break;
+    default:
+      derr << pctx << ": invalid request command: " << pctx->req->get_cmd()
+           << dendl;
+      c->release();
+      Mutex::Locker locker(m_lock);
+      m_stopping = true;
+      m_cond.Signal();
+      return;
+    }
+  }
+  dout(20) << "terminated" << dendl;
+}
+
+void Server::writer_entry() {
+  dout(20) << dendl;
+
+  while (!m_stopping) {
+    dout(20) << "waiting for io request" << dendl;
+
+    ceph::unique_ptr<IOContext> ctx(wait_io_finish());
+    if (!ctx) {
+      dout(20) << "no io requests, terminating" << dendl;
+      return;
+    }
+
+    dout(20) << ctx.get() << ": got: " << *ctx << dendl;
+
+    int r = m_drv->send(ctx->req);
+    if (r < 0) {
+      derr << ctx.get() << ": send: " << cpp_strerror(r) << dendl;
+      Mutex::Locker locker(m_lock);
+      m_stopping = true;
+      m_cond.Signal();
+      return;
+    }
+    dout(20) << ctx.get() << " finish" << dendl;
+  }
+  dout(20) << "terminated" << dendl;
+}
+
+std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx) {
+
+  os << "[" << ctx.req->get_id();
+
+  switch (ctx.req->get_cmd())
+  {
+  case rbd::ggate::Request::Write:
+    os << " Write ";
+    break;
+  case rbd::ggate::Request::Read:
+    os << " Read ";
+    break;
+  case rbd::ggate::Request::Flush:
+    os << " Flush ";
+    break;
+  case rbd::ggate::Request::Discard:
+    os << " Discard ";
+    break;
+  default:
+    os << " Unknow(" << ctx.req->get_cmd() << ") ";
+    break;
+  }
+
+  os << ctx.req->get_offset() << "~" << ctx.req->get_length() << " "
+     << ctx.req->get_error() << "]";
+
+  return os;
+}
+
+} // namespace ggate
+} // namespace rbd
+
diff --git a/ceph/src/tools/rbd_ggate/Server.h b/ceph/src/tools/rbd_ggate/Server.h
new file mode 100644 (file)
index 0000000..8ed4f51
--- /dev/null
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_SERVER_H
+#define CEPH_RBD_GGATE_SERVER_H
+
+#include "include/rbd/librbd.hpp"
+#include "include/xlist.h"
+#include "common/Cond.h"
+#include "common/Mutex.h"
+#include "common/Thread.h"
+
+namespace rbd {
+namespace ggate {
+
+class Driver;
+struct Request;
+
+class Server {
+public:
+  Server(Driver *drv, librbd::Image& image);
+
+  void run();
+
+private:
+  struct IOContext {
+    xlist<IOContext*>::item item;
+    Server *server;
+    Request *req = nullptr;
+
+    IOContext(Server *server) : item(this), server(server) {
+    }
+  };
+
+  class ThreadHelper : public Thread {
+  public:
+    typedef void (Server::*entry_func)();
+
+    ThreadHelper(Server *server, entry_func func)
+      : server(server), func(func) {
+    }
+
+  protected:
+    virtual void* entry() {
+      (server->*func)();
+      return nullptr;
+    }
+
+  private:
+    Server *server;
+    entry_func func;
+  };
+
+  friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx);
+
+  Driver *m_drv;
+  librbd::Image &m_image;
+
+  mutable Mutex m_lock;
+  Cond m_cond;
+  bool m_stopping = false;
+  ThreadHelper m_reader_thread, m_writer_thread;
+  xlist<IOContext*> m_io_pending;
+  xlist<IOContext*> m_io_finished;
+
+  static void aio_callback(librbd::completion_t cb, void *arg);
+
+  int start();
+  void stop();
+
+  void reader_entry();
+  void writer_entry();
+
+  void io_start(IOContext *ctx);
+  void io_finish(IOContext *ctx);
+
+  IOContext *wait_io_finish();
+  void wait_clean();
+
+  void handle_aio(IOContext *ctx, int r);
+};
+
+std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx);
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_SERVER_H
diff --git a/ceph/src/tools/rbd_ggate/Watcher.cc b/ceph/src/tools/rbd_ggate/Watcher.cc
new file mode 100644 (file)
index 0000000..57b3f96
--- /dev/null
@@ -0,0 +1,48 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/debug.h"
+#include "common/errno.h"
+#include "Driver.h"
+#include "Watcher.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate::Watcher: " << this \
+                           << " " << __func__ << ": "
+
+namespace rbd {
+namespace ggate {
+
+Watcher::Watcher(Driver *drv, librados::IoCtx &ioctx, librbd::Image &image,
+                 size_t size)
+  : m_drv(drv), m_ioctx(ioctx), m_image(image), m_size(size) {
+}
+
+void Watcher::handle_notify() {
+  dout(20) << dendl;
+
+  librbd::image_info_t info;
+
+  if (m_image.stat(info, sizeof(info)) == 0) {
+    size_t new_size = info.size;
+
+    if (new_size != m_size) {
+      int r = m_drv->resize(new_size);
+      if (r < 0) {
+        derr << "resize failed: " << cpp_strerror(r) << dendl;
+        m_drv->shut_down();
+      }
+      r = m_image.invalidate_cache();
+      if (r < 0) {
+        derr << "invalidate rbd cache failed: " << cpp_strerror(r) << dendl;
+        m_drv->shut_down();
+      }
+      m_size = new_size;
+    }
+  }
+}
+
+} // namespace ggate
+} // namespace rbd
diff --git a/ceph/src/tools/rbd_ggate/Watcher.h b/ceph/src/tools/rbd_ggate/Watcher.h
new file mode 100644 (file)
index 0000000..8f524b4
--- /dev/null
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_WATCHER_H
+#define CEPH_RBD_GGATE_WATCHER_H
+
+#include "include/rbd/librbd.hpp"
+
+namespace rbd {
+namespace ggate {
+
+class Driver;
+
+class Watcher : public librbd::UpdateWatchCtx
+{
+public:
+  Watcher(Driver *m_drv, librados::IoCtx &ioctx, librbd::Image &image,
+          size_t size);
+
+  void handle_notify() override;
+
+private:
+  Driver *m_drv;
+  librados::IoCtx &m_ioctx;
+  librbd::Image &m_image;
+  size_t m_size;
+};
+
+
+} // namespace ggate
+} // namespace rbd
+
+#endif // CEPH_RBD_GGATE_WATCHER_H
+
diff --git a/ceph/src/tools/rbd_ggate/debug.cc b/ceph/src/tools/rbd_ggate/debug.cc
new file mode 100644 (file)
index 0000000..8cf912c
--- /dev/null
@@ -0,0 +1,55 @@
+#include "common/debug.h"
+#include "common/errno.h"
+#include "debug.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::ggate: "
+
+extern "C" void debugv(int level, const char *fmt, va_list ap) {
+    char *msg;
+    int saved_errno = errno;
+
+    if (g_ceph_context == nullptr) {
+        return;
+    }
+
+    vasprintf(&msg, fmt, ap);
+
+    dout(level) << msg << dendl;
+
+    free(msg);
+    errno = saved_errno;
+}
+
+extern "C" void debug(int level, const char *fmt, ...) {
+    va_list ap;
+
+    va_start(ap, fmt);
+    debugv(level, fmt, ap);
+    va_end(ap);
+}
+
+extern "C" void errx(const char *fmt, ...) {
+    va_list ap;
+
+    va_start(ap, fmt);
+    debugv(-1, fmt, ap);
+    va_end(ap);
+}
+
+extern "C" void err(const char *fmt, ...) {
+    va_list ap;
+    char *msg;
+    int saved_errno = errno;
+
+    va_start(ap, fmt);
+    vasprintf(&msg, fmt, ap);
+    va_end(ap);
+    errno = saved_errno;
+
+    errx("%s: %s", msg, cpp_strerror(errno).c_str());
+
+    free(msg);
+}
diff --git a/ceph/src/tools/rbd_ggate/debug.h b/ceph/src/tools/rbd_ggate/debug.h
new file mode 100644 (file)
index 0000000..da9b46a
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef CEPH_RBD_GGATE_DEBUG_H
+#define CEPH_RBD_GGATE_DEBUG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void debug(int level, const char *fmt, ...) __printflike(2, 3);
+void debugv(int level, const char *fmt, va_list ap) __printflike(2, 0);
+void err(const char *fmt, ...) __printflike(1, 2);
+void errx(const char *fmt, ...) __printflike(1, 2);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // CEPH_RBD_GGATE_DEBUG_H
diff --git a/ceph/src/tools/rbd_ggate/ggate_drv.c b/ceph/src/tools/rbd_ggate/ggate_drv.c
new file mode 100644 (file)
index 0000000..8b02b1b
--- /dev/null
@@ -0,0 +1,375 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/disk.h>
+#include <sys/linker.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <geom/gate/g_gate.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgeom.h>
+
+#include "debug.h"
+#include "ggate_drv.h"
+
+uint64_t ggate_drv_req_id(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  return ggio->gctl_seq;
+}
+
+int ggate_drv_req_cmd(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  switch (ggio->gctl_cmd) {
+  case BIO_WRITE:
+    return GGATE_DRV_CMD_WRITE;
+  case BIO_READ:
+    return GGATE_DRV_CMD_READ;
+  case BIO_FLUSH:
+    return GGATE_DRV_CMD_FLUSH;
+  case BIO_DELETE:
+    return GGATE_DRV_CMD_DISCARD;
+  default:
+    return GGATE_DRV_CMD_UNKNOWN;
+  }
+}
+
+uint64_t ggate_drv_req_offset(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  return ggio->gctl_offset;
+}
+
+size_t ggate_drv_req_length(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  return ggio->gctl_length;
+}
+
+void *ggate_drv_req_buf(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  return ggio->gctl_data;
+}
+
+int ggate_drv_req_error(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  return ggio->gctl_error;
+}
+
+void ggate_drv_req_set_error(ggate_drv_req_t req, int error) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  ggio->gctl_error = error;
+}
+
+void *ggate_drv_req_release_buf(ggate_drv_req_t req) {
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+
+  void *data = ggio->gctl_data;
+  ggio->gctl_data = NULL;
+
+  return data;
+}
+
+struct ggate_drv {
+  int fd;
+  int unit;
+};
+
+int ggate_drv_load() {
+  if (modfind("g_gate") != -1) {
+    /* Present in kernel. */
+    return 0;
+  }
+
+  if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
+    if (errno != EEXIST) {
+      err("failed to load geom_gate module");
+      return -errno;
+    }
+  }
+  return 0;
+}
+
+int ggate_drv_create(char *name, size_t namelen, size_t sectorsize,
+    size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv_) {
+  struct ggate_drv *drv;
+  struct g_gate_ctl_create ggiocreate;
+
+  debug(20, "%s: name=%s, sectorsize=%zd, mediasize=%zd, readonly=%d, info=%s",
+      __func__, name, sectorsize, mediasize, (int)readonly, info);
+
+  if (*name != '\0') {
+    if (namelen > sizeof(ggiocreate.gctl_name) - 1) {
+      return -ENAMETOOLONG;
+    }
+  }
+
+  /*
+   * We communicate with ggate via /dev/ggctl. Open it.
+   */
+  int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR);
+  if (fd == -1) {
+    err("failed to open /dev/" G_GATE_CTL_NAME);
+    return -errno;
+  }
+
+  drv = calloc(1, sizeof(*drv));
+  if (drv == NULL) {
+    errno = -ENOMEM;
+    goto fail_close;
+  }
+
+  /*
+   * Create provider.
+   */
+  memset(&ggiocreate, 0, sizeof(ggiocreate));
+  ggiocreate.gctl_version = G_GATE_VERSION;
+  ggiocreate.gctl_mediasize = mediasize;
+  ggiocreate.gctl_sectorsize = sectorsize;
+  ggiocreate.gctl_flags = readonly ? G_GATE_FLAG_READONLY : 0;
+  ggiocreate.gctl_maxcount = 0;
+  ggiocreate.gctl_timeout = 0;
+  if (*name != '\0') {
+    ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
+    strlcpy(ggiocreate.gctl_name, name, sizeof(ggiocreate.gctl_name));
+  } else {
+    ggiocreate.gctl_unit = G_GATE_UNIT_AUTO;
+  }
+  strlcpy(ggiocreate.gctl_info, info, sizeof(ggiocreate.gctl_info));
+  if (ioctl(fd, G_GATE_CMD_CREATE, &ggiocreate) == -1) {
+    err("failed to create " G_GATE_PROVIDER_NAME " device");
+    goto fail;
+  }
+
+  debug(20, "%s: created, unit: %d, name: %s", __func__, ggiocreate.gctl_unit,
+      ggiocreate.gctl_name);
+
+  drv->fd = fd;
+  drv->unit = ggiocreate.gctl_unit;
+  *drv_ = drv;
+
+  if (*name == '\0') {
+    snprintf(name, namelen, "%s%d", G_GATE_PROVIDER_NAME, drv->unit);
+  }
+
+  return 0;
+
+fail:
+  free(drv);
+fail_close:
+  close(fd);
+  return -errno;
+}
+
+void ggate_drv_destroy(ggate_drv_t drv_) {
+  struct ggate_drv *drv = (struct ggate_drv *)drv_;
+  struct g_gate_ctl_destroy ggiodestroy;
+
+  debug(20, "%s %p", __func__, drv);
+
+  memset(&ggiodestroy, 0, sizeof(ggiodestroy));
+  ggiodestroy.gctl_version = G_GATE_VERSION;
+  ggiodestroy.gctl_unit = drv->unit;
+  ggiodestroy.gctl_force = 1;
+
+  // Remember errno.
+  int rerrno = errno;
+
+  int r = ioctl(drv->fd, G_GATE_CMD_DESTROY, &ggiodestroy);
+  if (r == -1) {
+    err("failed to destroy /dev/%s%d device", G_GATE_PROVIDER_NAME,
+        drv->unit);
+  }
+  // Restore errno.
+  errno = rerrno;
+
+  free(drv);
+}
+
+int ggate_drv_resize(ggate_drv_t drv_, size_t newsize) {
+  struct ggate_drv *drv = (struct ggate_drv *)drv_;
+
+  debug(20, "%s %p: newsize=%zd", __func__, drv, newsize);
+
+  struct g_gate_ctl_modify ggiomodify;
+
+  memset(&ggiomodify, 0, sizeof(ggiomodify));
+  ggiomodify.gctl_version = G_GATE_VERSION;
+  ggiomodify.gctl_unit = drv->unit;
+  ggiomodify.gctl_modify = GG_MODIFY_MEDIASIZE;
+  ggiomodify.gctl_mediasize = newsize;
+
+  int r = ioctl(drv->fd, G_GATE_CMD_MODIFY, &ggiomodify);
+  if (r == -1) {
+    r = -errno;
+    err("failed to resize /dev/%s%d device", G_GATE_PROVIDER_NAME, drv->unit);
+  }
+  return r;
+}
+
+int ggate_drv_kill(const char *devname) {
+  debug(20, "%s %s", __func__, devname);
+
+  int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR);
+  if (fd == -1) {
+    err("failed to open /dev/" G_GATE_CTL_NAME);
+    return -errno;
+  }
+
+  struct g_gate_ctl_destroy ggiodestroy;
+  memset(&ggiodestroy, 0, sizeof(ggiodestroy));
+  ggiodestroy.gctl_version = G_GATE_VERSION;
+  ggiodestroy.gctl_unit = G_GATE_NAME_GIVEN;
+  ggiodestroy.gctl_force = 1;
+
+  strlcpy(ggiodestroy.gctl_name, devname, sizeof(ggiodestroy.gctl_name));
+
+  int r = ioctl(fd, G_GATE_CMD_DESTROY, &ggiodestroy);
+  if (r == -1) {
+    r = -errno;
+    err("failed to destroy %s device", devname);
+  }
+
+  close(fd);
+  return r;
+}
+
+int ggate_drv_recv(ggate_drv_t drv_, ggate_drv_req_t *req) {
+  struct ggate_drv *drv = (struct ggate_drv *)drv_;
+  struct g_gate_ctl_io *ggio;
+  int error, r;
+
+  debug(20, "%s", __func__);
+
+  ggio = calloc(1, sizeof(*ggio));
+  if (ggio == NULL) {
+    return -ENOMEM;
+  }
+
+  ggio->gctl_version = G_GATE_VERSION;
+  ggio->gctl_unit = drv->unit;
+  ggio->gctl_data = malloc(MAXPHYS);
+  ggio->gctl_length = MAXPHYS;
+
+  debug(20, "%s: waiting for request from kernel",  __func__);
+  if (ioctl(drv->fd, G_GATE_CMD_START, ggio) == -1) {
+    err("%s: G_GATE_CMD_START failed", __func__);
+    return -errno;
+  }
+
+  debug(20, "%s: got request from kernel: "
+        "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p",
+        __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd,
+        (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length,
+        ggio->gctl_error, ggio->gctl_data);
+
+  error = ggio->gctl_error;
+  switch (error) {
+  case 0:
+    break;
+  case ECANCELED:
+    debug(10, "%s: canceled: exit gracefully",  __func__);
+    r = -error;
+    goto fail;
+  case ENOMEM:
+    /*
+     * Buffer too small? Impossible, we allocate MAXPHYS
+     * bytes - request can't be bigger than that.
+     */
+    /* FALLTHROUGH */
+  case ENXIO:
+  default:
+    errno = error;
+    err("%s: G_GATE_CMD_START failed", __func__);
+    r = -error;
+    goto fail;
+  }
+
+  *req = ggio;
+  return 0;
+
+fail:
+  free(ggio->gctl_data);
+  free(ggio);
+  return r;
+}
+
+int ggate_drv_send(ggate_drv_t drv_, ggate_drv_req_t req) {
+  struct ggate_drv *drv = (struct ggate_drv *)drv_;
+  struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req;
+  int r = 0;
+
+  debug(20, "%s: send request to kernel: "
+        "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p",
+        __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd,
+        (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length,
+        ggio->gctl_error, ggio->gctl_data);
+
+  if (ioctl(drv->fd, G_GATE_CMD_DONE, ggio) == -1) {
+    err("%s: G_GATE_CMD_DONE failed", __func__);
+    r = -errno;
+  }
+
+  free(ggio->gctl_data);
+  free(ggio);
+  return r;
+}
+
+int ggate_drv_list(char **devs, size_t *size) {
+  struct gmesh mesh;
+  struct gclass *class;
+  struct ggeom *gp;
+  int r;
+  size_t max_size;
+
+  r = geom_gettree(&mesh);
+  if (r != 0) {
+    return -errno;
+  }
+
+  max_size = *size;
+  *size = 0;
+
+  LIST_FOREACH(class, &mesh.lg_class, lg_class) {
+    if (strcmp(class->lg_name, G_GATE_CLASS_NAME) == 0) {
+      LIST_FOREACH(gp, &class->lg_geom, lg_geom) {
+        (*size)++;
+      }
+      if (*size > max_size) {
+        r = -ERANGE;
+        goto done;
+      }
+      LIST_FOREACH(gp, &class->lg_geom, lg_geom) {
+        *devs = strdup(gp->lg_name);
+        devs++;
+      }
+    }
+  }
+
+done:
+  geom_deletetree(&mesh);
+  return r;
+}
+
+void ggate_drv_list_free(char **devs, size_t size) {
+  size_t i;
+
+  for (i = 0; i < size; i++) {
+    free(devs[i]);
+  }
+}
diff --git a/ceph/src/tools/rbd_ggate/ggate_drv.h b/ceph/src/tools/rbd_ggate/ggate_drv.h
new file mode 100644 (file)
index 0000000..5ea5f32
--- /dev/null
@@ -0,0 +1,57 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_RBD_GGATE_GGATE_DRV_H
+#define CEPH_RBD_GGATE_GGATE_DRV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef void *ggate_drv_t;
+typedef void *ggate_drv_req_t;
+
+/*
+ * GGATE driver commands. They are mapped to GgateReq::Command.
+ */
+enum {
+  GGATE_DRV_CMD_UNKNOWN = 0,
+  GGATE_DRV_CMD_WRITE = 1,
+  GGATE_DRV_CMD_READ = 2,
+  GGATE_DRV_CMD_FLUSH = 3,
+  GGATE_DRV_CMD_DISCARD = 4,
+};
+
+uint64_t ggate_drv_req_id(ggate_drv_req_t req);
+int ggate_drv_req_cmd(ggate_drv_req_t req);
+void *ggate_drv_req_buf(ggate_drv_req_t req);
+size_t ggate_drv_req_length(ggate_drv_req_t req);
+uint64_t ggate_drv_req_offset(ggate_drv_req_t req);
+int ggate_drv_req_error(ggate_drv_req_t req);
+
+void ggate_drv_req_set_error(ggate_drv_req_t req, int error);
+void *ggate_drv_req_release_buf(ggate_drv_req_t req);
+
+int ggate_drv_load();
+
+int ggate_drv_create(char *name, size_t namelen, size_t sectorsize,
+    size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv);
+void ggate_drv_destroy(ggate_drv_t drv);
+
+int ggate_drv_recv(ggate_drv_t drv, ggate_drv_req_t *req);
+int ggate_drv_send(ggate_drv_t drv, ggate_drv_req_t req);
+
+int ggate_drv_resize(ggate_drv_t drv, size_t newsize);
+
+int ggate_drv_kill(const char *devname);
+int ggate_drv_list(char **devs, size_t *size);
+void ggate_drv_list_free(char **devs, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // CEPH_RBD_GGATE_GGATE_DRV_H
diff --git a/ceph/src/tools/rbd_ggate/main.cc b/ceph/src/tools/rbd_ggate/main.cc
new file mode 100644 (file)
index 0000000..1a0e314
--- /dev/null
@@ -0,0 +1,364 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/int_types.h"
+
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+
+#include <iostream>
+#include <boost/regex.hpp>
+
+#include "common/Preforker.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+
+#include "Driver.h"
+#include "Server.h"
+#include "Watcher.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-ggate: " << __func__ << ": "
+
+static void usage() {
+  std::cout << "Usage: rbd-ggate [options] map <image-or-snap-spec>  Map an image to ggate device\n"
+            << "                           unmap <device path>       Unmap ggate device\n"
+            << "                           list                      List mapped ggate devices\n"
+            << "Options:\n"
+            << "  --device <device path>  Specify ggate device path\n"
+            << "  --read-only             Map readonly\n"
+            << "  --exclusive             Forbid writes by other clients\n"
+            << std::endl;
+  generic_server_usage();
+}
+
+static std::string devpath, poolname("rbd"), imgname, snapname;
+static bool readonly = false;
+static bool exclusive = false;
+
+static std::unique_ptr<rbd::ggate::Driver> drv;
+
+static void handle_signal(int signum)
+{
+  derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
+
+  assert(signum == SIGINT || signum == SIGTERM);
+  assert(drv);
+
+  drv->shut_down();
+}
+
+static int do_map(int argc, const char *argv[])
+{
+  int r;
+
+  librados::Rados rados;
+  librbd::RBD rbd;
+  librados::IoCtx io_ctx;
+  librbd::Image image;
+
+  librbd::image_info_t info;
+  std::string desc;
+
+  Preforker forker;
+
+  vector<const char*> args;
+  argv_to_vec(argc, argv, args);
+  env_to_vec(args);
+
+  auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+                         CODE_ENVIRONMENT_DAEMON,
+                         CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
+  g_ceph_context->_conf->set_val_or_die("pid_file", "");
+
+  if (global_init_prefork(g_ceph_context) >= 0) {
+    std::string err;
+    r = forker.prefork(err);
+    if (r < 0) {
+      cerr << err << std::endl;
+      return r;
+    }
+
+    if (forker.is_parent()) {
+      global_init_postfork_start(g_ceph_context);
+      if (forker.parent_wait(err) != 0) {
+        return -ENXIO;
+      }
+      return 0;
+    }
+  }
+
+  common_init_finish(g_ceph_context);
+  global_init_chdir(g_ceph_context);
+
+  std::string devname = (devpath.compare(0, 5, "/dev/") == 0) ?
+    devpath.substr(5) : devpath;
+  std::unique_ptr<rbd::ggate::Watcher> watcher;
+  uint64_t handle;
+
+  r = rados.init_with_context(g_ceph_context);
+  if (r < 0) {
+    goto done;
+  }
+
+  r = rados.connect();
+  if (r < 0) {
+    goto done;
+  }
+
+  r = rados.ioctx_create(poolname.c_str(), io_ctx);
+  if (r < 0) {
+    goto done;
+  }
+
+  r = rbd.open(io_ctx, image, imgname.c_str());
+  if (r < 0) {
+    goto done;
+  }
+
+  if (exclusive) {
+    r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
+    if (r < 0) {
+      cerr << "rbd-ggate: failed to acquire exclusive lock: " << cpp_strerror(r)
+           << std::endl;
+      goto done;
+    }
+  }
+
+  desc = "RBD " + poolname + "/" + imgname;
+
+  if (!snapname.empty()) {
+    r = image.snap_set(snapname.c_str());
+    if (r < 0) {
+      goto done;
+    }
+    readonly = true;
+    desc += "@" + snapname;
+  }
+
+  r = image.stat(info, sizeof(info));
+  if (r < 0) {
+    goto done;
+  }
+
+  rbd::ggate::Driver::load();
+  drv.reset(new rbd::ggate::Driver(devname, 512, info.size, readonly, desc));
+  r = drv->init();
+  if (r < 0) {
+    r = -errno;
+    goto done;
+  }
+
+  watcher.reset(new rbd::ggate::Watcher(drv.get(), io_ctx, image, info.size));
+  r = image.update_watch(watcher.get(), &handle);
+  if (r < 0) {
+    drv->shut_down();
+    goto done;
+  }
+
+  std::cout << "/dev/" << drv->get_devname() << std::endl;
+
+  if (g_conf->daemonize) {
+    forker.daemonize();
+    global_init_postfork_start(g_ceph_context);
+    global_init_postfork_finish(g_ceph_context);
+  }
+
+  init_async_signal_handler();
+  register_async_signal_handler(SIGHUP, sighup_handler);
+  register_async_signal_handler_oneshot(SIGINT, handle_signal);
+  register_async_signal_handler_oneshot(SIGTERM, handle_signal);
+
+  rbd::ggate::Server(drv.get(), image).run();
+
+  unregister_async_signal_handler(SIGHUP, sighup_handler);
+  unregister_async_signal_handler(SIGINT, handle_signal);
+  unregister_async_signal_handler(SIGTERM, handle_signal);
+  shutdown_async_signal_handler();
+
+  r = image.update_unwatch(handle);
+  assert(r == 0);
+
+done:
+  image.close();
+  io_ctx.close();
+  rados.shutdown();
+
+  forker.exit(r < 0 ? EXIT_FAILURE : 0);
+  // Unreachable;
+  return r;
+}
+
+static int do_unmap()
+{
+  std::string devname = (devpath.compare(0, 5, "/dev/") == 0) ?
+    devpath.substr(5) : devpath;
+
+  int r = rbd::ggate::Driver::kill(devname);
+  if (r < 0) {
+    cerr << "rbd-ggate: failed to destroy " << devname << ": "
+         << cpp_strerror(r) << std::endl;
+    return r;
+  }
+
+  return 0;
+}
+
+static int parse_imgpath(const std::string &imgpath)
+{
+  boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$");
+  boost::smatch match;
+  if (!boost::regex_match(imgpath, match, pattern)) {
+    std::cerr << "rbd-ggate: invalid spec '" << imgpath << "'" << std::endl;
+    return -EINVAL;
+  }
+
+  if (match[1].matched) {
+    poolname = match[1];
+  }
+
+  imgname = match[2];
+
+  if (match[3].matched) {
+    snapname = match[3];
+  }
+
+  return 0;
+}
+
+static int do_list()
+{
+  rbd::ggate::Driver::load();
+
+  std::list<std::string> devs;
+  int r = rbd::ggate::Driver::list(devs);
+  if (r < 0) {
+    return -r;
+  }
+
+  for (auto &devname : devs) {
+    cout << "/dev/" << devname << std::endl;
+  }
+  return 0;
+}
+
+int main(int argc, const char *argv[]) {
+  int r;
+  enum {
+    None,
+    Connect,
+    Disconnect,
+    List
+  } cmd = None;
+
+  vector<const char*> args;
+
+  argv_to_vec(argc, argv, args);
+  md_config_t().parse_argv(args);
+
+  std::vector<const char*>::iterator i;
+
+  for (i = args.begin(); i != args.end(); ) {
+    if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+      usage();
+      return 0;
+    } else if (ceph_argparse_witharg(args, i, &devpath, "--device",
+                                     (char *)NULL)) {
+    } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
+      readonly = true;
+    } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
+      exclusive = true;
+    } else {
+      ++i;
+    }
+  }
+
+  if (args.begin() != args.end()) {
+    if (strcmp(*args.begin(), "map") == 0) {
+      cmd = Connect;
+    } else if (strcmp(*args.begin(), "unmap") == 0) {
+      cmd = Disconnect;
+    } else if (strcmp(*args.begin(), "list") == 0) {
+      cmd = List;
+    } else {
+      cerr << "rbd-ggate: unknown command: " << *args.begin() << std::endl;
+      return EXIT_FAILURE;
+    }
+    args.erase(args.begin());
+  }
+
+  if (cmd == None) {
+    cerr << "rbd-ggate: must specify command" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  switch (cmd) {
+    case Connect:
+      if (args.begin() == args.end()) {
+        cerr << "rbd-ggate: must specify image-or-snap-spec" << std::endl;
+        return EXIT_FAILURE;
+      }
+      if (parse_imgpath(string(*args.begin())) < 0)
+        return EXIT_FAILURE;
+      args.erase(args.begin());
+      break;
+    case Disconnect:
+      if (args.begin() == args.end()) {
+        cerr << "rbd-ggate: must specify ggate device path" << std::endl;
+        return EXIT_FAILURE;
+      }
+      devpath = *args.begin();
+      args.erase(args.begin());
+      break;
+    default:
+      break;
+  }
+
+  if (args.begin() != args.end()) {
+    cerr << "rbd-ggate: unknown args: " << *args.begin() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  switch (cmd) {
+    case Connect:
+      if (imgname.empty()) {
+        cerr << "rbd-ggate: image name was not specified" << std::endl;
+        return EXIT_FAILURE;
+      }
+
+      r = do_map(argc, argv);
+      if (r < 0)
+        return EXIT_FAILURE;
+      break;
+    case Disconnect:
+      r = do_unmap();
+      if (r < 0)
+        return EXIT_FAILURE;
+      break;
+    case List:
+      r = do_list();
+      if (r < 0)
+        return EXIT_FAILURE;
+      break;
+    default:
+      usage();
+      return EXIT_FAILURE;
+  }
+
+  return 0;
+}
index 88ff456419c61cc591748780df61362557cd765d..5538fc1945a2cd065a629aeaed402118f9fc21eb 100644 (file)
@@ -23,10 +23,12 @@ set(rbd_mirror_internal
   image_replayer/CloseImageRequest.cc
   image_replayer/CreateImageRequest.cc
   image_replayer/EventPreprocessor.cc
+  image_replayer/GetMirrorImageIdRequest.cc
   image_replayer/IsPrimaryRequest.cc
   image_replayer/OpenImageRequest.cc
   image_replayer/OpenLocalImageRequest.cc
   image_replayer/PrepareLocalImageRequest.cc
+  image_replayer/PrepareRemoteImageRequest.cc
   image_replayer/ReplayStatusFormatter.cc
   image_sync/ImageCopyRequest.cc
   image_sync/ObjectCopyRequest.cc
index f68d51d40d8d1aebb1f2f6aaadd03e4b8be43916..a84199968a253657c44be60dc252ba242bfa6a3d 100644 (file)
 #include "librbd/Operations.h"
 #include "librbd/Utils.h"
 #include "librbd/journal/Replay.h"
+#include "ImageDeleter.h"
 #include "ImageReplayer.h"
 #include "Threads.h"
 #include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
 #include "tools/rbd_mirror/image_replayer/EventPreprocessor.h"
 #include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
 #include "tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h"
 
 #define dout_context g_ceph_context
@@ -159,46 +161,56 @@ class ImageReplayerAdminSocketHook : public AdminSocketHook {
 public:
   ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name,
                               ImageReplayer<I> *replayer)
-    : admin_socket(cct->get_admin_socket()),
+    : admin_socket(cct->get_admin_socket()), name(name), replayer(replayer),
       lock("ImageReplayerAdminSocketHook::lock " +
              replayer->get_global_image_id()) {
+  }
+
+  int register_commands() {
     std::string command;
     int r;
 
     command = "rbd mirror status " + name;
     r = admin_socket->register_command(command, command, this,
                                       "get status for rbd mirror " + name);
-    if (r == 0) {
-      commands[command] = new StatusCommand<I>(replayer);
+    if (r < 0) {
+      return r;
     }
+    commands[command] = new StatusCommand<I>(replayer);
 
     command = "rbd mirror start " + name;
     r = admin_socket->register_command(command, command, this,
                                       "start rbd mirror " + name);
-    if (r == 0) {
-      commands[command] = new StartCommand<I>(replayer);
+    if (r < 0) {
+      return r;
     }
+    commands[command] = new StartCommand<I>(replayer);
 
     command = "rbd mirror stop " + name;
     r = admin_socket->register_command(command, command, this,
                                       "stop rbd mirror " + name);
-    if (r == 0) {
-      commands[command] = new StopCommand<I>(replayer);
+    if (r < 0) {
+      return r;
     }
+    commands[command] = new StopCommand<I>(replayer);
 
     command = "rbd mirror restart " + name;
     r = admin_socket->register_command(command, command, this,
                                       "restart rbd mirror " + name);
-    if (r == 0) {
-      commands[command] = new RestartCommand<I>(replayer);
+    if (r < 0) {
+      return r;
     }
+    commands[command] = new RestartCommand<I>(replayer);
 
     command = "rbd mirror flush " + name;
     r = admin_socket->register_command(command, command, this,
                                       "flush rbd mirror " + name);
-    if (r == 0) {
-      commands[command] = new FlushCommand<I>(replayer);
+    if (r < 0) {
+      return r;
     }
+    commands[command] = new FlushCommand<I>(replayer);
+
+    return 0;
   }
 
   ~ImageReplayerAdminSocketHook() override {
@@ -228,6 +240,8 @@ private:
   typedef std::map<std::string, ImageReplayerAdminSocketCommand*> Commands;
 
   AdminSocket *admin_socket;
+  std::string name;
+  ImageReplayer<I> *replayer;
   Mutex lock;
   Commands commands;
 };
@@ -270,7 +284,7 @@ void ImageReplayer<I>::RemoteJournalerListener::handle_update(
 }
 
 template <typename I>
-ImageReplayer<I>::ImageReplayer(Threads<librbd::ImageCtx> *threads,
+ImageReplayer<I>::ImageReplayer(Threads<I> *threads,
                                 ImageDeleter<I>* image_deleter,
                                 InstanceWatcher<I> *instance_watcher,
                                 RadosRef local,
@@ -303,14 +317,13 @@ ImageReplayer<I>::ImageReplayer(Threads<librbd::ImageCtx> *threads,
   }
 
   m_name = pool_name + "/" + m_global_image_id;
-  dout(20) << "registered asok hook: " << m_name << dendl;
-  m_asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name,
-                                                    this);
+  register_admin_socket_hook();
 }
 
 template <typename I>
 ImageReplayer<I>::~ImageReplayer()
 {
+  unregister_admin_socket_hook();
   assert(m_event_preprocessor == nullptr);
   assert(m_replay_status_formatter == nullptr);
   assert(m_local_image_ctx == nullptr);
@@ -323,7 +336,6 @@ ImageReplayer<I>::~ImageReplayer()
   assert(m_in_flight_status_updates == 0);
 
   delete m_journal_listener;
-  delete m_asok_hook;
 }
 
 template <typename I>
@@ -342,32 +354,15 @@ image_replayer::HealthState ImageReplayer<I>::get_health_state() const {
 }
 
 template <typename I>
-void ImageReplayer<I>::add_remote_image(const std::string &mirror_uuid,
-                                        const std::string &image_id,
-                                        librados::IoCtx &io_ctx) {
+void ImageReplayer<I>::add_peer(const std::string &peer_uuid,
+                                librados::IoCtx &io_ctx) {
   Mutex::Locker locker(m_lock);
-
-  RemoteImage remote_image(mirror_uuid, image_id, io_ctx);
-  auto it = m_remote_images.find(remote_image);
-  if (it == m_remote_images.end()) {
-    m_remote_images.insert(remote_image);
+  auto it = m_peers.find({peer_uuid});
+  if (it == m_peers.end()) {
+    m_peers.insert({peer_uuid, io_ctx});
   }
 }
 
-template <typename I>
-void ImageReplayer<I>::remove_remote_image(const std::string &mirror_uuid,
-                                           const std::string &image_id,
-                                          bool schedule_delete) {
-  Mutex::Locker locker(m_lock);
-  m_remote_images.erase({mirror_uuid, image_id});
-}
-
-template <typename I>
-bool ImageReplayer<I>::remote_images_empty() const {
-  Mutex::Locker locker(m_lock);
-  return m_remote_images.empty();
-}
-
 template <typename I>
 void ImageReplayer<I>::set_state_description(int r, const std::string &desc) {
   dout(20) << r << " " << desc << dendl;
@@ -397,6 +392,7 @@ void ImageReplayer<I>::start(Context *on_finish, bool manual)
       m_last_r = 0;
       m_state_desc.clear();
       m_manual_stop = false;
+      m_delete_requested = false;
 
       if (on_finish != nullptr) {
         assert(m_on_start_finish == nullptr);
@@ -421,6 +417,31 @@ void ImageReplayer<I>::start(Context *on_finish, bool manual)
     return;
   }
 
+  wait_for_deletion();
+}
+
+template <typename I>
+void ImageReplayer<I>::wait_for_deletion() {
+  dout(20) << dendl;
+
+  Context *ctx = create_context_callback<
+    ImageReplayer, &ImageReplayer<I>::handle_wait_for_deletion>(this);
+  m_image_deleter->wait_for_scheduled_deletion(
+    m_local_pool_id, m_global_image_id, ctx, false);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_wait_for_deletion(int r) {
+  dout(20) << "r=" << r << dendl;
+
+  if (r == -ECANCELED) {
+    on_start_fail(0, "");
+    return;
+  } else if (r < 0) {
+    on_start_fail(r, "error waiting for image deletion");
+    return;
+  }
+
   prepare_local_image();
 }
 
@@ -428,6 +449,7 @@ template <typename I>
 void ImageReplayer<I>::prepare_local_image() {
   dout(20) << dendl;
 
+  m_local_image_id = "";
   Context *ctx = create_context_callback<
     ImageReplayer, &ImageReplayer<I>::handle_prepare_local_image>(this);
   auto req = PrepareLocalImageRequest<I>::create(
@@ -452,20 +474,55 @@ void ImageReplayer<I>::handle_prepare_local_image(int r) {
   }
 
   // local image doesn't exist or is non-primary
-  bootstrap();
+  prepare_remote_image();
 }
 
 template <typename I>
-void ImageReplayer<I>::bootstrap() {
+void ImageReplayer<I>::prepare_remote_image() {
   dout(20) << dendl;
 
-  if (m_remote_images.empty()) {
-    on_start_fail(-EREMOTEIO, "waiting for primary remote image");
+  // TODO need to support multiple remote images
+  assert(!m_peers.empty());
+  m_remote_image = {*m_peers.begin()};
+
+  Context *ctx = create_context_callback<
+    ImageReplayer, &ImageReplayer<I>::handle_prepare_remote_image>(this);
+  auto req = PrepareRemoteImageRequest<I>::create(
+    m_remote_image.io_ctx, m_global_image_id, &m_remote_image.mirror_uuid,
+    &m_remote_image.image_id, ctx);
+  req->send();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_prepare_remote_image(int r) {
+  dout(20) << "r=" << r << dendl;
+
+  if (r == -ENOENT) {
+    dout(20) << "remote image does not exist" << dendl;
+
+    // TODO need to support multiple remote images
+    if (!m_local_image_id.empty() &&
+        m_local_image_tag_owner == m_remote_image.mirror_uuid) {
+      // local image exists and is non-primary and linked to the missing
+      // remote image
+
+      m_delete_requested = true;
+      on_start_fail(0, "remote image no longer exists");
+    } else {
+      on_start_fail(-ENOENT, "remote image does not exist");
+    }
+    return;
+  } else if (r < 0) {
+    on_start_fail(r, "error retrieving remote image id");
     return;
   }
 
-  // TODO bootstrap will need to support multiple remote images
-  m_remote_image = *m_remote_images.begin();
+  bootstrap();
+}
+
+template <typename I>
+void ImageReplayer<I>::bootstrap() {
+  dout(20) << dendl;
 
   CephContext *cct = static_cast<CephContext *>(m_local->cct());
   journal::Settings settings;
@@ -487,7 +544,8 @@ void ImageReplayer<I>::bootstrap() {
     &m_local_image_ctx, m_local_image_id, m_remote_image.image_id,
     m_global_image_id, m_threads->work_queue, m_threads->timer,
     &m_threads->timer_lock, m_local_mirror_uuid, m_remote_image.mirror_uuid,
-    m_remote_journaler, &m_client_meta, ctx, &m_do_resync, &m_progress_cxt);
+    m_remote_journaler, &m_client_meta, ctx, &m_resync_requested,
+    &m_progress_cxt);
 
   {
     Mutex::Locker locker(m_lock);
@@ -527,6 +585,9 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
     return;
   } else if (on_start_interrupted()) {
     return;
+  } else if (m_resync_requested) {
+    on_start_fail(0, "resync requested");
+    return;
   }
 
   assert(m_local_journal == nullptr);
@@ -545,22 +606,6 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
 
   {
     Mutex::Locker locker(m_lock);
-
-    if (m_do_resync) {
-      Context *on_finish = m_on_start_finish;
-      m_stopping_for_resync = true;
-      FunctionContext *ctx = new FunctionContext([this, on_finish](int r) {
-         if (r < 0) {
-           if (on_finish) {
-             on_finish->complete(r);
-           }
-           return;
-         }
-          resync_image(on_finish);
-        });
-      m_on_start_finish = ctx;
-    }
-
     std::string name = m_local_ioctx.get_pool_name() + "/" +
                        m_local_image_ctx->name;
     if (m_name != name) {
@@ -571,11 +616,7 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
        m_asok_hook = nullptr;
       }
     }
-    if (!m_asok_hook) {
-      dout(20) << "registered asok hook: " << m_name << dendl;
-      m_asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name,
-                                                        this);
-    }
+    register_admin_socket_hook();
   }
 
   update_mirror_image_status(false, boost::none);
@@ -614,13 +655,18 @@ void ImageReplayer<I>::handle_init_remote_journaler(int r) {
     return;
   }
 
-  if (client.state != cls::journal::CLIENT_STATE_CONNECTED) {
+  derr << "image_id=" << m_local_image_id << ", "
+       << "m_client_meta.image_id=" << m_client_meta.image_id << ", "
+       << "client.state=" << client.state << dendl;
+  if (m_client_meta.image_id == m_local_image_id &&
+      client.state != cls::journal::CLIENT_STATE_CONNECTED) {
     dout(5) << "client flagged disconnected, stopping image replay" << dendl;
     if (m_local_image_ctx->mirroring_resync_after_disconnect) {
-      Mutex::Locker locker(m_lock);
-      m_stopping_for_resync = true;
+      m_resync_requested = true;
+      on_start_fail(-ENOTCONN, "disconnected: automatic resync");
+    } else {
+      on_start_fail(-ENOTCONN, "disconnected");
     }
-    on_start_fail(-ENOTCONN, "disconnected");
     return;
   }
 
@@ -665,12 +711,6 @@ void ImageReplayer<I>::handle_start_replay(int r) {
   update_mirror_image_status(true, boost::none);
   reschedule_update_status_task(30);
 
-  dout(20) << "start succeeded" << dendl;
-  if (on_finish != nullptr) {
-    dout(20) << "on finish complete, r=" << r << dendl;
-    on_finish->complete(r);
-  }
-
   if (on_replay_interrupted()) {
     return;
   }
@@ -686,6 +726,11 @@ void ImageReplayer<I>::handle_start_replay(int r) {
     dout(20) << "m_remote_journaler=" << *m_remote_journaler << dendl;
   }
 
+  dout(20) << "start succeeded" << dendl;
+  if (on_finish != nullptr) {
+    dout(20) << "on finish complete, r=" << r << dendl;
+    on_finish->complete(r);
+  }
 }
 
 template <typename I>
@@ -697,7 +742,7 @@ void ImageReplayer<I>::on_start_fail(int r, const std::string &desc)
         Mutex::Locker locker(m_lock);
         assert(m_state == STATE_STARTING);
         m_state = STATE_STOPPING;
-        if (r < 0 && r != -ECANCELED && r != -EREMOTEIO) {
+        if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) {
           derr << "start failed: " << cpp_strerror(r) << dendl;
         } else {
           dout(20) << "start canceled" << dendl;
@@ -732,6 +777,8 @@ void ImageReplayer<I>::stop(Context *on_finish, bool manual, int r,
   dout(20) << "on_finish=" << on_finish << ", manual=" << manual
           << ", desc=" << desc << dendl;
 
+  m_image_deleter->cancel_waiter(m_local_pool_id, m_global_image_id);
+
   image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr;
   bool shut_down_replay = false;
   bool running = true;
@@ -1544,11 +1591,6 @@ void ImageReplayer<I>::shut_down(int r) {
        m_remote_journaler->remove_listener(&m_remote_listener);
         m_remote_journaler->shut_down(ctx);
       });
-    if (m_stopping_for_resync) {
-      ctx = new FunctionContext([this, ctx](int r) {
-          m_remote_journaler->unregister_client(ctx);
-        });
-    }
   }
 
   // stop the replay of remote journal events
@@ -1636,12 +1678,29 @@ void ImageReplayer<I>::handle_shut_down(int r) {
       return;
     }
 
-    if (m_stopping_for_resync) {
+    bool delete_requested = false;
+    if (m_delete_requested && !m_local_image_id.empty()) {
+      assert(m_remote_image.image_id.empty());
+      dout(0) << "remote image no longer exists: scheduling deletion" << dendl;
+      delete_requested = true;
+    }
+    if (delete_requested || m_resync_requested) {
       m_image_deleter->schedule_image_delete(m_local,
                                              m_local_pool_id,
                                              m_global_image_id,
-                                             true);
-      m_stopping_for_resync = false;
+                                             m_resync_requested);
+
+      m_local_image_id = "";
+      m_resync_requested = false;
+      if (m_delete_requested) {
+        unregister_admin_socket_hook();
+        m_delete_requested = false;
+      }
+    } else if (m_last_r == -ENOENT &&
+               m_local_image_id.empty() && m_remote_image.image_id.empty()) {
+      dout(0) << "mirror image no longer exists" << dendl;
+      unregister_admin_socket_hook();
+      m_finished = true;
     }
   }
 
@@ -1721,12 +1780,36 @@ template <typename I>
 void ImageReplayer<I>::resync_image(Context *on_finish) {
   dout(20) << dendl;
 
-  {
-    Mutex::Locker l(m_lock);
-    m_stopping_for_resync = true;
+  m_resync_requested = true;
+  stop(on_finish);
+}
+
+template <typename I>
+void ImageReplayer<I>::register_admin_socket_hook() {
+  if (m_asok_hook != nullptr) {
+    return;
   }
 
-  stop(on_finish);
+  dout(20) << "registered asok hook: " << m_name << dendl;
+  auto asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name,
+                                                       this);
+  int r = asok_hook->register_commands();
+  if (r < 0) {
+    derr << "error registering admin socket commands" << dendl;
+    delete asok_hook;
+    asok_hook = nullptr;
+    return;
+  }
+
+  m_asok_hook = asok_hook;
+}
+
+template <typename I>
+void ImageReplayer<I>::unregister_admin_socket_hook() {
+  dout(20) << dendl;
+
+  delete m_asok_hook;
+  m_asok_hook = nullptr;
 }
 
 template <typename I>
index 3ea41dc5378f50abc88d9c7b26272b27ac974811..3f2ab2fca74e8985f4ce5871a92ebdca6b566b0f 100644 (file)
@@ -15,7 +15,6 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/journal/Types.h"
 #include "librbd/journal/TypeTraits.h"
-#include "ImageDeleter.h"
 #include "ProgressContext.h"
 #include "types.h"
 #include "tools/rbd_mirror/image_replayer/Types.h"
@@ -48,6 +47,7 @@ namespace journal { template <typename> class Replay; }
 namespace rbd {
 namespace mirror {
 
+template <typename> struct ImageDeleter;
 template <typename> struct InstanceWatcher;
 template <typename> struct Threads;
 
@@ -62,7 +62,7 @@ template <typename ImageCtxT = librbd::ImageCtx>
 class ImageReplayer {
 public:
   static ImageReplayer *create(
-    Threads<librbd::ImageCtx> *threads, ImageDeleter<ImageCtxT>* image_deleter,
+    Threads<ImageCtxT> *threads, ImageDeleter<ImageCtxT>* image_deleter,
     InstanceWatcher<ImageCtxT> *instance_watcher,
     RadosRef local, const std::string &local_mirror_uuid, int64_t local_pool_id,
     const std::string &global_image_id) {
@@ -74,7 +74,7 @@ public:
     delete this;
   }
 
-  ImageReplayer(Threads<librbd::ImageCtx> *threads,
+  ImageReplayer(Threads<ImageCtxT> *threads,
                 ImageDeleter<ImageCtxT>* image_deleter,
                 InstanceWatcher<ImageCtxT> *instance_watcher,
                 RadosRef local, const std::string &local_mirror_uuid,
@@ -90,6 +90,16 @@ public:
   std::string get_name() { Mutex::Locker l(m_lock); return m_name; };
   void set_state_description(int r, const std::string &desc);
 
+  // TODO temporary until policy handles release of image replayers
+  inline bool is_finished() const {
+    Mutex::Locker locker(m_lock);
+    return m_finished;
+  }
+  inline void set_finished(bool finished) {
+    Mutex::Locker locker(m_lock);
+    m_finished = finished;
+  }
+
   inline bool is_blacklisted() const {
     Mutex::Locker locker(m_lock);
     return (m_last_r == -EBLACKLISTED);
@@ -97,13 +107,7 @@ public:
 
   image_replayer::HealthState get_health_state() const;
 
-  void add_remote_image(const std::string &remote_mirror_uuid,
-                        const std::string &remote_image_id,
-                        librados::IoCtx &remote_io_ctx);
-  void remove_remote_image(const std::string &remote_mirror_uuid,
-                           const std::string &remote_image_id,
-                          bool schedule_delete);
-  bool remote_images_empty() const;
+  void add_peer(const std::string &peer_uuid, librados::IoCtx &remote_io_ctx);
 
   inline int64_t get_local_pool_id() const {
     return m_local_pool_id;
@@ -134,10 +138,16 @@ protected:
    *    v                                                   *
    * <starting>                                             *
    *    |                                                   *
+   *    v                                                   *
+   * WAIT_FOR_DELETION                                      *
+   *    |                                                   *
    *    v                                           (error) *
    * PREPARE_LOCAL_IMAGE  * * * * * * * * * * * * * * * * * *
    *    |                                                   *
    *    v                                           (error) *
+   * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * *
+   *    |                                                   *
+   *    v                                           (error) *
    * BOOTSTRAP_IMAGE  * * * * * * * * * * * * * * * * * * * *
    *    |                                                   *
    *    v                                           (error) *
@@ -224,30 +234,10 @@ private:
 
     RemoteImage() {
     }
-    RemoteImage(const std::string &mirror_uuid,
-                const std::string &image_id)
-      : mirror_uuid(mirror_uuid), image_id(image_id) {
-    }
-    RemoteImage(const std::string &mirror_uuid,
-                const std::string &image_id,
-                librados::IoCtx &io_ctx)
-      : mirror_uuid(mirror_uuid), image_id(image_id), io_ctx(io_ctx) {
-    }
-
-    inline bool operator<(const RemoteImage &rhs) const {
-      if (mirror_uuid != rhs.mirror_uuid) {
-        return mirror_uuid < rhs.mirror_uuid;
-      } else {
-        return image_id < rhs.image_id;
-      }
-    }
-    inline bool operator==(const RemoteImage &rhs) const {
-      return (mirror_uuid == rhs.mirror_uuid && image_id == rhs.image_id);
+    RemoteImage(const Peer& peer) : io_ctx(peer.io_ctx) {
     }
   };
 
-  typedef std::set<RemoteImage> RemoteImages;
-
   typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler;
   typedef boost::optional<State> OptionalState;
   typedef boost::optional<cls::rbd::MirrorImageStatusState>
@@ -285,11 +275,11 @@ private:
     ImageReplayer<ImageCtxT> *replayer;
   };
 
-  Threads<librbd::ImageCtx> *m_threads;
+  Threads<ImageCtxT> *m_threads;
   ImageDeleter<ImageCtxT>* m_image_deleter;
   InstanceWatcher<ImageCtxT> *m_instance_watcher;
 
-  RemoteImages m_remote_images;
+  Peers m_peers;
   RemoteImage m_remote_image;
 
   RadosRef m_local;
@@ -298,6 +288,7 @@ private:
   std::string m_local_image_id;
   std::string m_global_image_id;
   std::string m_name;
+
   mutable Mutex m_lock;
   State m_state = STATE_STOPPED;
   std::string m_state_desc;
@@ -306,7 +297,11 @@ private:
   int m_last_r = 0;
 
   BootstrapProgressContext m_progress_cxt;
-  bool m_do_resync{false};
+
+  bool m_finished = false;
+  bool m_delete_requested = false;
+  bool m_resync_requested = false;
+
   image_replayer::EventPreprocessor<ImageCtxT> *m_event_preprocessor = nullptr;
   image_replayer::ReplayStatusFormatter<ImageCtxT> *m_replay_status_formatter =
     nullptr;
@@ -319,7 +314,6 @@ private:
   Journaler* m_remote_journaler = nullptr;
   ::journal::ReplayHandler *m_replay_handler = nullptr;
   librbd::journal::Listener *m_journal_listener;
-  bool m_stopping_for_resync = false;
 
   Context *m_on_start_finish = nullptr;
   Context *m_on_stop_finish = nullptr;
@@ -394,9 +388,15 @@ private:
   void handle_shut_down(int r);
   void handle_remote_journal_metadata_updated();
 
+  void wait_for_deletion();
+  void handle_wait_for_deletion(int r);
+
   void prepare_local_image();
   void handle_prepare_local_image(int r);
 
+  void prepare_remote_image();
+  void handle_prepare_remote_image(int r);
+
   void bootstrap();
   void handle_bootstrap(int r);
 
@@ -423,6 +423,8 @@ private:
   void handle_process_entry_ready(int r);
   void handle_process_entry_safe(const ReplayEntry& replay_entry, int r);
 
+  void register_admin_socket_hook();
+  void unregister_admin_socket_hook();
 };
 
 } // namespace mirror
index 40a8c1b43205a6f7d6e79043ff4437050ee9c619..52e60605c758eac5dffa87f4422052f7e902a4e8 100644 (file)
@@ -99,24 +99,15 @@ void InstanceReplayer<I>::shut_down(Context *on_finish) {
 }
 
 template <typename I>
-void InstanceReplayer<I>::add_peer(std::string mirror_uuid,
+void InstanceReplayer<I>::add_peer(std::string peer_uuid,
                                    librados::IoCtx io_ctx) {
-  dout(20) << mirror_uuid << dendl;
+  dout(20) << peer_uuid << dendl;
 
   Mutex::Locker locker(m_lock);
-  auto result = m_peers.insert(Peer(mirror_uuid, io_ctx)).second;
+  auto result = m_peers.insert(Peer(peer_uuid, io_ctx)).second;
   assert(result);
 }
 
-template <typename I>
-void InstanceReplayer<I>::remove_peer(std::string mirror_uuid) {
-  dout(20) << mirror_uuid << dendl;
-
-  Mutex::Locker locker(m_lock);
-  auto result = m_peers.erase(Peer(mirror_uuid));
-  assert(result > 0);
-}
-
 template <typename I>
 void InstanceReplayer<I>::release_all(Context *on_finish) {
   dout(20) << dendl;
@@ -141,18 +132,14 @@ void InstanceReplayer<I>::release_all(Context *on_finish) {
 template <typename I>
 void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
                                         const std::string &global_image_id,
-                                        const std::string &peer_mirror_uuid,
-                                        const std::string &peer_image_id,
                                         Context *on_finish) {
-  dout(20) << "global_image_id=" << global_image_id << ", peer_mirror_uuid="
-           << peer_mirror_uuid << ", peer_image_id=" << peer_image_id << dendl;
+  dout(20) << "global_image_id=" << global_image_id << dendl;
 
   Mutex::Locker locker(m_lock);
 
   assert(m_on_shut_down == nullptr);
 
   auto it = m_image_replayers.find(global_image_id);
-
   if (it == m_image_replayers.end()) {
     auto image_replayer = ImageReplayer<I>::create(
         m_threads, m_image_deleter, instance_watcher, m_local_rados,
@@ -163,36 +150,30 @@ void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher,
 
     it = m_image_replayers.insert(std::make_pair(global_image_id,
                                                  image_replayer)).first;
+
+    // TODO only a single peer is currently supported
+    assert(m_peers.size() == 1);
+    auto peer = *m_peers.begin();
+    image_replayer->add_peer(peer.peer_uuid, peer.io_ctx);
   }
 
-  auto image_replayer = it->second;
-  if (!peer_mirror_uuid.empty()) {
-    auto iter = m_peers.find(Peer(peer_mirror_uuid));
-    assert(iter != m_peers.end());
-    auto io_ctx = iter->io_ctx;
+  auto& image_replayer = it->second;
+  // TODO temporary until policy integrated
+  image_replayer->set_finished(false);
 
-    image_replayer->add_remote_image(peer_mirror_uuid, peer_image_id, io_ctx);
-  }
   start_image_replayer(image_replayer);
-
   m_threads->work_queue->queue(on_finish, 0);
 }
 
 template <typename I>
 void InstanceReplayer<I>::release_image(const std::string &global_image_id,
-                                        const std::string &peer_mirror_uuid,
-                                        const std::string &peer_image_id,
-                                        bool schedule_delete,
                                         Context *on_finish) {
-  dout(20) << "global_image_id=" << global_image_id << ", peer_mirror_uuid="
-           << peer_mirror_uuid << ", peer_image_id=" << peer_image_id << dendl;
+  dout(20) << "global_image_id=" << global_image_id << dendl;
 
   Mutex::Locker locker(m_lock);
-
   assert(m_on_shut_down == nullptr);
 
   auto it = m_image_replayers.find(global_image_id);
-
   if (it == m_image_replayers.end()) {
     dout(20) << global_image_id << ": not found" << dendl;
     m_threads->work_queue->queue(on_finish, 0);
@@ -200,17 +181,6 @@ void InstanceReplayer<I>::release_image(const std::string &global_image_id,
   }
 
   auto image_replayer = it->second;
-  if (!peer_mirror_uuid.empty()) {
-    image_replayer->remove_remote_image(peer_mirror_uuid, peer_image_id,
-                                       schedule_delete);
-  }
-
-  if (!image_replayer->remote_images_empty()) {
-    dout(20) << global_image_id << ": still has peer images" << dendl;
-    m_threads->work_queue->queue(on_finish, 0);
-    return;
-  }
-
   m_image_replayers.erase(it);
 
   on_finish = new FunctionContext(
@@ -218,18 +188,29 @@ void InstanceReplayer<I>::release_image(const std::string &global_image_id,
       image_replayer->destroy();
       on_finish->complete(0);
     });
+  stop_image_replayer(image_replayer, on_finish);
+}
 
-  if (schedule_delete) {
-    on_finish = new FunctionContext(
-      [this, image_replayer, on_finish] (int r) {
-        auto global_image_id = image_replayer->get_global_image_id();
-        m_image_deleter->schedule_image_delete(
-          m_local_rados, m_local_pool_id, global_image_id, false);
-        on_finish->complete(0);
-      });
-  }
+template <typename I>
+void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id,
+                                            const std::string &peer_mirror_uuid,
+                                            Context *on_finish) {
+  dout(20) << "global_image_id=" << global_image_id << ", "
+           << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
 
-  stop_image_replayer(image_replayer, on_finish);
+  Mutex::Locker locker(m_lock);
+  assert(m_on_shut_down == nullptr);
+
+  auto it = m_image_replayers.find(global_image_id);
+  if (it != m_image_replayers.end()) {
+    // TODO only a single peer is currently supported, therefore
+    // we can just interrupt the current image replayer and
+    // it will eventually detect that the peer image is missing and
+    // determine if a delete propagation is required.
+    auto image_replayer = it->second;
+    image_replayer->restart();
+  }
+  m_threads->work_queue->queue(on_finish, 0);
 }
 
 template <typename I>
@@ -321,36 +302,16 @@ void InstanceReplayer<I>::start_image_replayer(
   } else if (image_replayer->is_blacklisted()) {
     derr << "blacklisted detected during image replay" << dendl;
     return;
+  } else if (image_replayer->is_finished()) {
+    // TODO temporary until policy integrated
+    dout(5) << "removing image replayer for global_image_id="
+            << global_image_id << dendl;
+    m_image_replayers.erase(image_replayer->get_global_image_id());
+    image_replayer->destroy();
+    return;
   }
 
-  FunctionContext *ctx = new FunctionContext(
-    [this, global_image_id] (int r) {
-      dout(20) << "image deleter result: r=" << r << ", "
-               << "global_image_id=" << global_image_id << dendl;
-
-      Mutex::Locker locker(m_lock);
-      m_async_op_tracker.finish_op();
-
-      if (r == -ESTALE || r == -ECANCELED) {
-        return;
-      }
-
-      auto it = m_image_replayers.find(global_image_id);
-      if (it == m_image_replayers.end()) {
-        return;
-      }
-
-      auto image_replayer = it->second;
-      if (r >= 0) {
-        image_replayer->start(nullptr, false);
-      } else {
-        start_image_replayer(image_replayer);
-      }
-    });
-
-  m_async_op_tracker.start_op();
-  m_image_deleter->wait_for_scheduled_deletion(
-    m_local_pool_id, image_replayer->get_global_image_id(), ctx, false);
+  image_replayer->start(nullptr, false);
 }
 
 template <typename I>
@@ -372,19 +333,23 @@ void InstanceReplayer<I>::start_image_replayers(int r) {
     return;
   }
 
-  size_t image_count = 0;
-  size_t warning_count = 0;
-  size_t error_count = 0;
-  for (auto &it : m_image_replayers) {
+  uint64_t image_count = 0;
+  uint64_t warning_count = 0;
+  uint64_t error_count = 0;
+  for (auto it = m_image_replayers.begin();
+       it != m_image_replayers.end();) {
+    auto current_it(it);
+    ++it;
+
     ++image_count;
-    auto health_state = it.second->get_health_state();
+    auto health_state = current_it->second->get_health_state();
     if (health_state == image_replayer::HEALTH_STATE_WARNING) {
       ++warning_count;
     } else if (health_state == image_replayer::HEALTH_STATE_ERROR) {
       ++error_count;
     }
 
-    start_image_replayer(it.second);
+    start_image_replayer(current_it->second);
   }
 
   m_service_daemon->add_or_update_attribute(
index 16618b84123daced34494a214f0ad46e8a039328..4721f742a3ae08405deb99d14e0c8f3736271ac1 100644 (file)
@@ -52,18 +52,15 @@ public:
   void init(Context *on_finish);
   void shut_down(Context *on_finish);
 
-  void add_peer(std::string mirror_uuid, librados::IoCtx io_ctx);
-  void remove_peer(std::string mirror_uuid);
+  void add_peer(std::string peer_uuid, librados::IoCtx io_ctx);
 
   void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher,
-                     const std::string &global_image_id,
-                     const std::string &peer_mirror_uuid,
-                     const std::string &peer_image_id,
-                     Context *on_finish);
-  void release_image(const std::string &global_image_id,
-                     const std::string &peer_mirror_uuid,
-                     const std::string &peer_image_id,
-                     bool schedule_delete, Context *on_finish);
+                     const std::string &global_image_id, Context *on_finish);
+  void release_image(const std::string &global_image_id, Context *on_finish);
+  void remove_peer_image(const std::string &global_image_id,
+                         const std::string &peer_mirror_uuid,
+                         Context *on_finish);
+
   void release_all(Context *on_finish);
 
   void print_status(Formatter *f, stringstream *ss);
@@ -87,30 +84,6 @@ private:
    * @endverbatim
    */
 
-  struct Peer {
-    std::string mirror_uuid;
-    librados::IoCtx io_ctx;
-
-    Peer() {
-    }
-
-    Peer(const std::string &mirror_uuid) : mirror_uuid(mirror_uuid) {
-    }
-
-    Peer(const std::string &mirror_uuid, librados::IoCtx &io_ctx)
-      : mirror_uuid(mirror_uuid), io_ctx(io_ctx) {
-    }
-
-    inline bool operator<(const Peer &rhs) const {
-      return mirror_uuid < rhs.mirror_uuid;
-    }
-    inline bool operator==(const Peer &rhs) const {
-      return mirror_uuid == rhs.mirror_uuid;
-    }
-  };
-
-  typedef std::set<Peer> Peers;
-
   Threads<ImageCtxT> *m_threads;
   ServiceDaemon<ImageCtxT>* m_service_daemon;
   ImageDeleter<ImageCtxT>* m_image_deleter;
index 04e0a5e342d40721510649fd806f6363cc482096..0c82c3007fc73f4af29ba3a5dd27d536799a3572 100644 (file)
@@ -156,7 +156,7 @@ struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context {
     }
 
     dout(20) << "C_NotifyInstanceRequest: " << this << " " << __func__
-             << ": sendding to " << instance_id << dendl;
+             << ": sending to " << instance_id << dendl;
     notifier->notify(bl, &response, this);
   }
 
@@ -397,8 +397,7 @@ void InstanceWatcher<I>::remove(Context *on_finish) {
 template <typename I>
 void InstanceWatcher<I>::notify_image_acquire(
     const std::string &instance_id, const std::string &global_image_id,
-    const std::string &peer_mirror_uuid, const std::string &peer_image_id,
-  Context *on_notify_ack) {
+    Context *on_notify_ack) {
   dout(20) << "instance_id=" << instance_id << ", global_image_id="
            << global_image_id << dendl;
 
@@ -407,13 +406,12 @@ void InstanceWatcher<I>::notify_image_acquire(
   assert(m_on_finish == nullptr);
 
   if (instance_id == m_instance_id) {
-    handle_image_acquire(global_image_id, peer_mirror_uuid, peer_image_id,
-                         on_notify_ack);
+    handle_image_acquire(global_image_id, on_notify_ack);
   } else {
     uint64_t request_id = ++m_request_seq;
     bufferlist bl;
-    ::encode(NotifyMessage{ImageAcquirePayload{
-        request_id, global_image_id, peer_mirror_uuid, peer_image_id}}, bl);
+    ::encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}},
+             bl);
     auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
                                            std::move(bl), on_notify_ack);
     req->send();
@@ -422,9 +420,8 @@ void InstanceWatcher<I>::notify_image_acquire(
 
 template <typename I>
 void InstanceWatcher<I>::notify_image_release(
-  const std::string &instance_id, const std::string &global_image_id,
-  const std::string &peer_mirror_uuid, const std::string &peer_image_id,
-  bool schedule_delete, Context *on_notify_ack) {
+    const std::string &instance_id, const std::string &global_image_id,
+    Context *on_notify_ack) {
   dout(20) << "instance_id=" << instance_id << ", global_image_id="
            << global_image_id << dendl;
 
@@ -433,14 +430,36 @@ void InstanceWatcher<I>::notify_image_release(
   assert(m_on_finish == nullptr);
 
   if (instance_id == m_instance_id) {
-    handle_image_release(global_image_id, peer_mirror_uuid, peer_image_id,
-                         schedule_delete, on_notify_ack);
+    handle_image_release(global_image_id, on_notify_ack);
   } else {
     uint64_t request_id = ++m_request_seq;
     bufferlist bl;
-    ::encode(NotifyMessage{ImageReleasePayload{
-        request_id, global_image_id, peer_mirror_uuid, peer_image_id,
-        schedule_delete}}, bl);
+    ::encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}},
+             bl);
+    auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
+                                           std::move(bl), on_notify_ack);
+    req->send();
+  }
+}
+
+template <typename I>
+void InstanceWatcher<I>::notify_peer_image_removed(
+    const std::string &instance_id, const std::string &global_image_id,
+    const std::string &peer_mirror_uuid, Context *on_notify_ack) {
+  dout(20) << "instance_id=" << instance_id << ", "
+           << "global_image_id=" << global_image_id << ", "
+           << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+  Mutex::Locker locker(m_lock);
+  assert(m_on_finish == nullptr);
+
+  if (instance_id == m_instance_id) {
+    handle_peer_image_removed(global_image_id, peer_mirror_uuid, on_notify_ack);
+  } else {
+    uint64_t request_id = ++m_request_seq;
+    bufferlist bl;
+    ::encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id,
+                                                   peer_mirror_uuid}}, bl);
     auto req = new C_NotifyInstanceRequest(this, instance_id, request_id,
                                            std::move(bl), on_notify_ack);
     req->send();
@@ -1101,16 +1120,12 @@ void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
 
 template <typename I>
 void InstanceWatcher<I>::handle_image_acquire(
-  const std::string &global_image_id, const std::string &peer_mirror_uuid,
-  const std::string &peer_image_id, Context *on_finish) {
+    const std::string &global_image_id, Context *on_finish) {
   dout(20) << "global_image_id=" << global_image_id << dendl;
 
   auto ctx = new FunctionContext(
-      [this, global_image_id, peer_mirror_uuid, peer_image_id,
-       on_finish] (int r) {
-        m_instance_replayer->acquire_image(this, global_image_id,
-                                           peer_mirror_uuid, peer_image_id,
-                                           on_finish);
+      [this, global_image_id, on_finish] (int r) {
+        m_instance_replayer->acquire_image(this, global_image_id, on_finish);
         m_notify_op_tracker.finish_op();
       });
 
@@ -1120,16 +1135,30 @@ void InstanceWatcher<I>::handle_image_acquire(
 
 template <typename I>
 void InstanceWatcher<I>::handle_image_release(
-  const std::string &global_image_id,  const std::string &peer_mirror_uuid,
-  const std::string &peer_image_id, bool schedule_delete, Context *on_finish) {
+    const std::string &global_image_id, Context *on_finish) {
   dout(20) << "global_image_id=" << global_image_id << dendl;
 
   auto ctx = new FunctionContext(
-      [this, global_image_id, peer_mirror_uuid, peer_image_id, schedule_delete,
-       on_finish] (int r) {
-        m_instance_replayer->release_image(global_image_id, peer_mirror_uuid,
-                                           peer_image_id, schedule_delete,
-                                           on_finish);
+      [this, global_image_id, on_finish] (int r) {
+        m_instance_replayer->release_image(global_image_id, on_finish);
+        m_notify_op_tracker.finish_op();
+      });
+
+  m_notify_op_tracker.start_op();
+  m_work_queue->queue(ctx, 0);
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_peer_image_removed(
+    const std::string &global_image_id, const std::string &peer_mirror_uuid,
+    Context *on_finish) {
+  dout(20) << "global_image_id=" << global_image_id << ", "
+           << "peer_mirror_uuid=" << peer_mirror_uuid << dendl;
+
+  auto ctx = new FunctionContext(
+      [this, peer_mirror_uuid, global_image_id, on_finish] (int r) {
+        m_instance_replayer->remove_peer_image(global_image_id,
+                                               peer_mirror_uuid, on_finish);
         m_notify_op_tracker.finish_op();
       });
 
@@ -1199,8 +1228,7 @@ void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
   auto on_finish = prepare_request(instance_id, payload.request_id,
                                    on_notify_ack);
   if (on_finish != nullptr) {
-    handle_image_acquire(payload.global_image_id, payload.peer_mirror_uuid,
-                         payload.peer_image_id, on_finish);
+    handle_image_acquire(payload.global_image_id, on_finish);
   }
 }
 
@@ -1214,9 +1242,22 @@ void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
   auto on_finish = prepare_request(instance_id, payload.request_id,
                                    on_notify_ack);
   if (on_finish != nullptr) {
-    handle_image_release(payload.global_image_id, payload.peer_mirror_uuid,
-                         payload.peer_image_id, payload.schedule_delete,
-                         on_finish);
+    handle_image_release(payload.global_image_id, on_finish);
+  }
+}
+
+template <typename I>
+void InstanceWatcher<I>::handle_payload(const std::string &instance_id,
+                                        const PeerImageRemovedPayload &payload,
+                                        C_NotifyAck *on_notify_ack) {
+  dout(20) << "remove_peer_image: instance_id=" << instance_id << ", "
+           << "request_id=" << payload.request_id << dendl;
+
+  auto on_finish = prepare_request(instance_id, payload.request_id,
+                                   on_notify_ack);
+  if (on_finish != nullptr) {
+    handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid,
+                              on_finish);
   }
 }
 
index 79c2d1c0714a295d8b28f5bb57298ba3ad04fbd4..be90f10e213736dd635bfbd5e51d64c60760a033 100644 (file)
@@ -65,14 +65,14 @@ public:
 
   void notify_image_acquire(const std::string &instance_id,
                             const std::string &global_image_id,
-                            const std::string &peer_mirror_uuid,
-                            const std::string &peer_image_id,
                             Context *on_notify_ack);
   void notify_image_release(const std::string &instance_id,
                             const std::string &global_image_id,
-                            const std::string &peer_mirror_uuid,
-                            const std::string &peer_image_id,
-                           bool schedule_delete, Context *on_notify_ack);
+                           Context *on_notify_ack);
+  void notify_peer_image_removed(const std::string &instance_id,
+                                 const std::string &global_image_id,
+                                 const std::string &peer_mirror_uuid,
+                                 Context *on_notify_ack);
 
   void notify_sync_request(const std::string &sync_id, Context *on_sync_start);
   bool cancel_sync_request(const std::string &sync_id);
@@ -225,13 +225,12 @@ private:
                      uint64_t notifier_id, bufferlist &bl) override;
 
   void handle_image_acquire(const std::string &global_image_id,
-                            const std::string &peer_mirror_uuid,
-                            const std::string &peer_image_id,
                             Context *on_finish);
   void handle_image_release(const std::string &global_image_id,
-                            const std::string &peer_mirror_uuid,
-                            const std::string &peer_image_id,
-                            bool schedule_delete, Context *on_finish);
+                            Context *on_finish);
+  void handle_peer_image_removed(const std::string &global_image_id,
+                                 const std::string &peer_mirror_uuid,
+                                 Context *on_finish);
 
   void handle_sync_request(const std::string &instance_id,
                            const std::string &sync_id, Context *on_finish);
@@ -244,6 +243,9 @@ private:
   void handle_payload(const std::string &instance_id,
                       const instance_watcher::ImageReleasePayload &payload,
                       C_NotifyAck *on_notify_ack);
+  void handle_payload(const std::string &instance_id,
+                      const instance_watcher::PeerImageRemovedPayload &payload,
+                      C_NotifyAck *on_notify_ack);
   void handle_payload(const std::string &instance_id,
                       const instance_watcher::SyncRequestPayload &payload,
                       C_NotifyAck *on_notify_ack);
index fd0b21b1847d84fa67b84a5e1c4b3186d09444c7..6c3b228dd58aa55a8e2e090ca42f44542a2f6810 100644 (file)
@@ -357,12 +357,15 @@ void PoolReplayer::shut_down() {
   }
   if (m_leader_watcher) {
     m_leader_watcher->shut_down();
+    m_leader_watcher.reset();
   }
   if (m_instance_watcher) {
     m_instance_watcher->shut_down();
+    m_instance_watcher.reset();
   }
   if (m_instance_replayer) {
     m_instance_replayer->shut_down();
+    m_instance_replayer.reset();
   }
 
   assert(!m_local_pool_watcher);
@@ -471,7 +474,9 @@ void PoolReplayer::run()
       break;
     }
 
-    m_cond.WaitInterval(m_lock, utime_t(1, 0));
+    if (!m_stopping) {
+      m_cond.WaitInterval(m_lock, utime_t(1, 0));
+    }
   }
 }
 
@@ -615,37 +620,6 @@ void PoolReplayer::handle_update(const std::string &mirror_uuid,
       m_remote_pool_watcher->get_image_count());
   }
 
-  std::string removed_remote_peer_id;
-  ImageIds removed_remote_image_ids;
-  if (m_initial_mirror_image_ids.find(mirror_uuid) ==
-        m_initial_mirror_image_ids.end() &&
-      m_initial_mirror_image_ids.size() < 2) {
-    m_initial_mirror_image_ids[mirror_uuid] = added_image_ids;
-
-    if (m_initial_mirror_image_ids.size() == 2) {
-      dout(10) << "local and remote pools refreshed" << dendl;
-
-      // both local and remote initial pool listing received. derive
-      // removal notifications for the remote pool
-      auto &local_image_ids = m_initial_mirror_image_ids.begin()->second;
-      auto &remote_image_ids = m_initial_mirror_image_ids.rbegin()->second;
-      removed_remote_peer_id = m_initial_mirror_image_ids.rbegin()->first;
-      for (auto &local_image_id : local_image_ids) {
-        if (remote_image_ids.find(local_image_id) == remote_image_ids.end()) {
-          removed_remote_image_ids.emplace(local_image_id.global_id, "");
-        }
-      }
-      local_image_ids.clear();
-      remote_image_ids.clear();
-    }
-  }
-
-  if (!mirror_uuid.empty() && m_peer.uuid != mirror_uuid) {
-    m_instance_replayer->remove_peer(m_peer.uuid);
-    m_instance_replayer->add_peer(mirror_uuid, m_remote_io_ctx);
-    m_peer.uuid = mirror_uuid;
-  }
-
   m_update_op_tracker.start_op();
   Context *ctx = new FunctionContext([this](int r) {
       dout(20) << "complete handle_update: r=" << r << dendl;
@@ -658,26 +632,18 @@ void PoolReplayer::handle_update(const std::string &mirror_uuid,
     // for now always send to myself (the leader)
     std::string &instance_id = m_instance_watcher->get_instance_id();
     m_instance_watcher->notify_image_acquire(instance_id, image_id.global_id,
-                                             mirror_uuid, image_id.id,
-                                             gather_ctx->new_sub());
-  }
-
-  for (auto &image_id : removed_image_ids) {
-    // for now always send to myself (the leader)
-    std::string &instance_id = m_instance_watcher->get_instance_id();
-    m_instance_watcher->notify_image_release(instance_id, image_id.global_id,
-                                             mirror_uuid, image_id.id, true,
                                              gather_ctx->new_sub());
   }
 
-  // derived removal events for remote after initial image listing
-  for (auto& image_id : removed_remote_image_ids) {
-    // for now always send to myself (the leader)
-    std::string &instance_id = m_instance_watcher->get_instance_id();
-    m_instance_watcher->notify_image_release(instance_id, image_id.global_id,
-                                             removed_remote_peer_id,
-                                             image_id.id, true,
-                                             gather_ctx->new_sub());
+  if (!mirror_uuid.empty()) {
+    for (auto &image_id : removed_image_ids) {
+      // for now always send to myself (the leader)
+      std::string &instance_id = m_instance_watcher->get_instance_id();
+      m_instance_watcher->notify_peer_image_removed(instance_id,
+                                                    image_id.global_id,
+                                                    mirror_uuid,
+                                                    gather_ctx->new_sub());
+    }
   }
 
   gather_ctx->activate();
@@ -707,7 +673,6 @@ void PoolReplayer::init_local_pool_watcher(Context *on_finish) {
   assert(!m_local_pool_watcher);
   m_local_pool_watcher.reset(new PoolWatcher<>(
     m_threads, m_local_io_ctx, m_local_pool_watcher_listener));
-  m_initial_mirror_image_ids.clear();
 
   // ensure the initial set of local images is up-to-date
   // after acquiring the leader role
index 73dd663643ff7e5935068ebe39c8f4bcd5d626a7..ca693ef74808600f948fa132e994712ded8e85b7 100644 (file)
@@ -137,8 +137,6 @@ private:
   std::string m_asok_hook_name;
   AdminSocketHook *m_asok_hook = nullptr;
 
-  std::map<std::string, ImageIds> m_initial_mirror_image_ids;
-
   service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE;
 
   class PoolReplayerThread : public Thread {
index 51ee00e9add84934057116c29b3f1f7eeca5ad83..9a02bad4bd83ae4398767ff1d2761d8ed5f1f049 100644 (file)
@@ -52,7 +52,7 @@ public:
   void init(Context *on_finish = nullptr);
   void shut_down(Context *on_finish);
 
-  inline size_t get_image_count() const {
+  inline uint64_t get_image_count() const {
     Mutex::Locker locker(m_lock);
     return m_image_ids.size();
   }
index 434aa783c05055bf32c6773ef527b976a97d8abc..1b2359a7df03199b04f3bb9bd1f4fe1d46dd1231 100644 (file)
@@ -143,6 +143,35 @@ void BootstrapRequest<I>::handle_get_remote_tag_class(int r) {
   m_remote_tag_class = client_meta->tag_class;
   dout(10) << ": remote tag class=" << m_remote_tag_class << dendl;
 
+  open_remote_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::open_remote_image() {
+  dout(20) << dendl;
+
+  update_progress("OPEN_REMOTE_IMAGE");
+
+  Context *ctx = create_context_callback<
+    BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_remote_image>(
+      this);
+  OpenImageRequest<I> *request = OpenImageRequest<I>::create(
+    m_remote_io_ctx, &m_remote_image_ctx, m_remote_image_id, false,
+    ctx);
+  request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_open_remote_image(int r) {
+  dout(20) << ": r=" << r << dendl;
+
+  if (r < 0) {
+    derr << ": failed to open remote image: " << cpp_strerror(r) << dendl;
+    assert(m_remote_image_ctx == nullptr);
+    finish(r);
+    return;
+  }
+
   get_client();
 }
 
@@ -166,11 +195,12 @@ void BootstrapRequest<I>::handle_get_client(int r) {
     dout(10) << ": client not registered" << dendl;
   } else if (r < 0) {
     derr << ": failed to retrieve client: " << cpp_strerror(r) << dendl;
-    finish(r);
+    m_ret_val = r;
+    close_remote_image();
     return;
   } else if (decode_client_meta()) {
     // skip registration if it already exists
-    open_remote_image();
+    is_primary();
     return;
   }
 
@@ -202,40 +232,13 @@ void BootstrapRequest<I>::handle_register_client(int r) {
   if (r < 0) {
     derr << ": failed to register with remote journal: " << cpp_strerror(r)
          << dendl;
-    finish(r);
+    m_ret_val = r;
+    close_remote_image();
     return;
   }
 
+  m_client = {};
   *m_client_meta = librbd::journal::MirrorPeerClientMeta(m_local_image_id);
-  open_remote_image();
-}
-
-template <typename I>
-void BootstrapRequest<I>::open_remote_image() {
-  dout(20) << dendl;
-
-  update_progress("OPEN_REMOTE_IMAGE");
-
-  Context *ctx = create_context_callback<
-    BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_remote_image>(
-      this);
-  OpenImageRequest<I> *request = OpenImageRequest<I>::create(
-    m_remote_io_ctx, &m_remote_image_ctx, m_remote_image_id, false,
-    ctx);
-  request->send();
-}
-
-template <typename I>
-void BootstrapRequest<I>::handle_open_remote_image(int r) {
-  dout(20) << ": r=" << r << dendl;
-
-  if (r < 0) {
-    derr << ": failed to open remote image: " << cpp_strerror(r) << dendl;
-    assert(m_remote_image_ctx == nullptr);
-    finish(r);
-    return;
-  }
-
   is_primary();
 }
 
@@ -274,7 +277,7 @@ void BootstrapRequest<I>::handle_is_primary(int r) {
   }
 
   if (m_local_image_id.empty()) {
-    create_local_image();
+    update_client_image();
     return;
   }
 
@@ -311,7 +314,7 @@ void BootstrapRequest<I>::handle_update_client_state(int r) {
   if (r < 0) {
     derr << ": failed to update client: " << cpp_strerror(r) << dendl;
   } else {
-    m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;;
+    m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING;
   }
 
   close_remote_image();
@@ -339,7 +342,7 @@ void BootstrapRequest<I>::handle_open_local_image(int r) {
   if (r == -ENOENT) {
     assert(*m_local_image_ctx == nullptr);
     dout(10) << ": local image missing" << dendl;
-    create_local_image();
+    unregister_client();
     return;
   } else if (r == -EREMOTEIO) {
     assert(*m_local_image_ctx == nullptr);
@@ -387,57 +390,43 @@ void BootstrapRequest<I>::handle_open_local_image(int r) {
     return;
   }
 
-  update_client_image();
+  get_remote_tags();
 }
 
 template <typename I>
-void BootstrapRequest<I>::create_local_image() {
+void BootstrapRequest<I>::unregister_client() {
   dout(20) << dendl;
+  update_progress("UNREGISTER_CLIENT");
 
   m_local_image_id = "";
-  update_progress("CREATE_LOCAL_IMAGE");
-
-  m_remote_image_ctx->snap_lock.get_read();
-  std::string image_name = m_remote_image_ctx->name;
-  m_remote_image_ctx->snap_lock.put_read();
-
   Context *ctx = create_context_callback<
-    BootstrapRequest<I>, &BootstrapRequest<I>::handle_create_local_image>(
+    BootstrapRequest<I>, &BootstrapRequest<I>::handle_unregister_client>(
       this);
-  CreateImageRequest<I> *request = CreateImageRequest<I>::create(
-    m_local_io_ctx, m_work_queue, m_global_image_id, m_remote_mirror_uuid,
-    image_name, m_remote_image_ctx, &m_local_image_id, ctx);
-  request->send();
+  m_journaler->unregister_client(ctx);
 }
 
 template <typename I>
-void BootstrapRequest<I>::handle_create_local_image(int r) {
+void BootstrapRequest<I>::handle_unregister_client(int r) {
   dout(20) << ": r=" << r << dendl;
-
   if (r < 0) {
-    derr << ": failed to create local image: " << cpp_strerror(r) << dendl;
+    derr << ": failed to unregister with remote journal: " << cpp_strerror(r)
+         << dendl;
     m_ret_val = r;
     close_remote_image();
     return;
   }
 
-  open_local_image();
+  *m_client_meta = librbd::journal::MirrorPeerClientMeta("");
+  register_client();
 }
 
 template <typename I>
 void BootstrapRequest<I>::update_client_image() {
   dout(20) << dendl;
-
   update_progress("UPDATE_CLIENT_IMAGE");
 
-  if (m_client_meta->image_id == (*m_local_image_ctx)->id) {
-    // already registered local image with remote journal
-    get_remote_tags();
-    return;
-  }
-  m_local_image_id = (*m_local_image_ctx)->id;
-
-  dout(20) << dendl;
+  assert(m_local_image_id.empty());
+  m_local_image_id = librbd::util::generate_image_id<I>(m_local_io_ctx);
 
   librbd::journal::MirrorPeerClientMeta client_meta{m_local_image_id};
   client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
@@ -459,20 +448,52 @@ void BootstrapRequest<I>::handle_update_client_image(int r) {
   if (r < 0) {
     derr << ": failed to update client: " << cpp_strerror(r) << dendl;
     m_ret_val = r;
-    close_local_image();
+    close_remote_image();
     return;
   }
 
   if (m_canceled) {
     dout(10) << ": request canceled" << dendl;
     m_ret_val = -ECANCELED;
-    close_local_image();
+    close_remote_image();
     return;
   }
 
   *m_client_meta = {m_local_image_id};
   m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_SYNCING;
-  get_remote_tags();
+  create_local_image();
+}
+
+template <typename I>
+void BootstrapRequest<I>::create_local_image() {
+  dout(20) << dendl;
+  update_progress("CREATE_LOCAL_IMAGE");
+
+  m_remote_image_ctx->snap_lock.get_read();
+  std::string image_name = m_remote_image_ctx->name;
+  m_remote_image_ctx->snap_lock.put_read();
+
+  Context *ctx = create_context_callback<
+    BootstrapRequest<I>, &BootstrapRequest<I>::handle_create_local_image>(
+      this);
+  CreateImageRequest<I> *request = CreateImageRequest<I>::create(
+    m_local_io_ctx, m_work_queue, m_global_image_id, m_remote_mirror_uuid,
+    image_name, m_local_image_id, m_remote_image_ctx, ctx);
+  request->send();
+}
+
+template <typename I>
+void BootstrapRequest<I>::handle_create_local_image(int r) {
+  dout(20) << ": r=" << r << dendl;
+
+  if (r < 0) {
+    derr << ": failed to create local image: " << cpp_strerror(r) << dendl;
+    m_ret_val = r;
+    close_remote_image();
+    return;
+  }
+
+  open_local_image();
 }
 
 template <typename I>
index cebd568972ba88efaf9c21ed248d30646d54e6d8..6c67222d4abd24cf3ecb16f3a9213bccda825088 100644 (file)
@@ -92,57 +92,55 @@ private:
    * <start>
    *    |
    *    v
-   * GET_REMOTE_TAG_CLASS * * * * * * * * * * * * * * * *
-   *    |                                               *
-   *    v                                               *
-   * GET_CLIENT * * * * * * * * * * * * * * * * * * * * *
-   *    |                                               *
-   *    v (skip if not needed)                          * (error)
-   * REGISTER_CLIENT  * * * * * * * * * * * * * * * * * *
-   *    |                                               *
-   *    v                                               *
-   * OPEN_REMOTE_IMAGE  * * * * * * * * * * * * * * * * *
-   *    |                                               *
-   *    v                                               *
-   * IS_PRIMARY * * * * * * * * * * * * * * * * * * * * *
-   *    |                                               *
-   *    | (remote image primary)                        *
-   *    \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * *
-   *    |         |   .   ^                             *
-   *    |         |   .   |                             *
-   *    |         |   .   \-----------------------\     *
-   *    |         |   .                           |     *
-   *    |         |   . (image sync requested)    |     *
-   *    |         |   . . > REMOVE_LOCAL_IMAGE  * * * * *
-   *    |         |   .                   |       |     *
-   *    |         |   . (image doesn't    |       |     *
-   *    |         |   .  exist)           v       |     *
-   *    |         |   . . > CREATE_LOCAL_IMAGE  * * * * *
-   *    |         |             |                 |     *
-   *    |         |             \-----------------/     *
-   *    |         |                                     *
-   *    |         v (skip if not needed)                *
-   *    |      UPDATE_CLIENT_IMAGE  * * * * *           *
-   *    |         |                         *           *
-   *    |         v (skip if not needed)    *           *
-   *    |      GET_REMOTE_TAGS  * * * * * * *           *
-   *    |         |                         *           *
-   *    |         v (skip if not needed)    v           *
-   *    |      IMAGE_SYNC * * * > CLOSE_LOCAL_IMAGE     *
-   *    |         |                         |           *
-   *    |         \-----------------\ /-----/           *
-   *    |                            |                  *
-   *    |                            |                  *
-   *    | (skip if not needed)       |                  *
-   *    \----> UPDATE_CLIENT_STATE  *|* * * * * * * * * *
-   *                |                |                  *
-   *    /-----------/----------------/                  *
-   *    |                                               *
-   *    v                                               *
-   * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * *
-   *    |
-   *    v
-   * <finish>
+   * GET_REMOTE_TAG_CLASS * * * * * * * * * * * * * * * * * *
+   *    |                                                   * (error)
+   *    v                                                   *
+   * OPEN_REMOTE_IMAGE  * * * * * * * * * * * * * * * * * * *
+   *    |                                                   *
+   *    v                                                   *
+   * GET_CLIENT * * * * * * * * * * * * * * * * * * * * *   *
+   *    |                                               *   *
+   *    |/----------------------------------------------*---*---\
+   *    v (skip if not needed)                          *   *   |
+   * REGISTER_CLIENT  * * * * * * * * * * * * * * * * * *   *   |
+   *    |                                               *   *   |
+   *    v                                               *   *   |
+   * IS_PRIMARY * * * * * * * * * * * * * * * * * * * * *   *   |
+   *    |                                               *   *   |
+   *    | (remote image primary, no local image id)     *   *   |
+   *    \----> UPDATE_CLIENT_IMAGE  * * * * * * * * * * *   *   |
+   *    |         |                                     *   *   |
+   *    |         v                                     *   *   |
+   *    \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * *   *   |
+   *    |         |                                     *   *   |
+   *    |         v                                     *   *   |
+   *    | (remote image primary)                        *   *   |
+   *    \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * *   *   |
+   *    |         |   .                                 *   *   |
+   *    |         |   . (image doesn't exist)           *   *   |
+   *    |         |   . . > UNREGISTER_CLIENT * * * * * *   *   |
+   *    |         |             |                       *   *   |
+   *    |         |             \-----------------------*---*---/
+   *    |         |                                     *   *
+   *    |         v (skip if not needed)                *   *
+   *    |      GET_REMOTE_TAGS  * * * * * * *           *   *
+   *    |         |                         *           *   *
+   *    |         v (skip if not needed)    v           *   *
+   *    |      IMAGE_SYNC * * * > CLOSE_LOCAL_IMAGE     *   *
+   *    |         |                         |           *   *
+   *    |         \-----------------\ /-----/           *   *
+   *    |                            |                  *   *
+   *    |                            |                  *   *
+   *    | (skip if not needed)       |                  *   *
+   *    \----> UPDATE_CLIENT_STATE  *|* * * * * * * * * *   *
+   *                |                |                  *   *
+   *    /-----------/----------------/                  *   *
+   *    |                                               *   *
+   *    v                                               *   *
+   * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * *   *
+   *    |                                                   *
+   *    v                                                   *
+   * <finish> < * * * * * * * * * * * * * * * * * * * * * * *
    *
    * @endverbatim
    */
@@ -199,8 +197,8 @@ private:
   void open_local_image();
   void handle_open_local_image(int r);
 
-  void remove_local_image();
-  void handle_remove_local_image(int r);
+  void unregister_client();
+  void handle_unregister_client(int r);
 
   void create_local_image();
   void handle_create_local_image(int r);
index 2c7a0f367e3d8597425c9f62f90672a72f21a02e..5db89b4d1cc396ce7aa5f8b4ab3ecbb038117798 100644 (file)
@@ -33,14 +33,14 @@ CreateImageRequest<I>::CreateImageRequest(librados::IoCtx &local_io_ctx,
                                           const std::string &global_image_id,
                                           const std::string &remote_mirror_uuid,
                                           const std::string &local_image_name,
+                                         const std::string &local_image_id,
                                           I *remote_image_ctx,
-                                         std::string *local_image_id,
                                           Context *on_finish)
   : m_local_io_ctx(local_io_ctx), m_work_queue(work_queue),
     m_global_image_id(global_image_id),
     m_remote_mirror_uuid(remote_mirror_uuid),
-    m_local_image_name(local_image_name), m_remote_image_ctx(remote_image_ctx),
-    m_local_image_id(local_image_id), m_on_finish(on_finish) {
+    m_local_image_name(local_image_name), m_local_image_id(local_image_id),
+    m_remote_image_ctx(remote_image_ctx), m_on_finish(on_finish) {
 }
 
 template <typename I>
@@ -75,10 +75,8 @@ void CreateImageRequest<I>::create_image() {
   image_options.set(RBD_IMAGE_OPTION_STRIPE_COUNT,
                     m_remote_image_ctx->stripe_count);
 
-  *m_local_image_id = librbd::util::generate_image_id(m_local_io_ctx);;
-
   librbd::image::CreateRequest<I> *req = librbd::image::CreateRequest<I>::create(
-    m_local_io_ctx, m_local_image_name, *m_local_image_id,
+    m_local_io_ctx, m_local_image_name, m_local_image_id,
     m_remote_image_ctx->size, image_options, m_global_image_id,
     m_remote_mirror_uuid, false, m_remote_image_ctx->op_work_queue, ctx);
   req->send();
@@ -290,14 +288,12 @@ void CreateImageRequest<I>::clone_image() {
   opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, m_remote_image_ctx->stripe_unit);
   opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, m_remote_image_ctx->stripe_count);
 
-  *m_local_image_id = librbd::util::generate_image_id(m_local_io_ctx);;
-
   using klass = CreateImageRequest<I>;
   Context *ctx = create_context_callback<klass, &klass::handle_clone_image>(this);
 
   librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create(
     m_local_parent_image_ctx, m_local_io_ctx, m_local_image_name,
-    *m_local_image_id, opts, m_global_image_id, m_remote_mirror_uuid,
+    m_local_image_id, opts, m_global_image_id, m_remote_mirror_uuid,
     m_remote_image_ctx->op_work_queue, ctx);
   req->send();
 }
index 683a6d9a2cf96d6dfd0c2690b4071df1b817e17d..b45deb66770cd4fd1e22ca0d9615b579fb36678e 100644 (file)
@@ -26,20 +26,20 @@ public:
                                     const std::string &global_image_id,
                                     const std::string &remote_mirror_uuid,
                                     const std::string &local_image_name,
+                                   const std::string &local_image_id,
                                     ImageCtxT *remote_image_ctx,
-                                   std::string *local_image_id,
                                     Context *on_finish) {
     return new CreateImageRequest(local_io_ctx, work_queue, global_image_id,
                                   remote_mirror_uuid, local_image_name,
-                                  remote_image_ctx, local_image_id, on_finish);
+                                  local_image_id, remote_image_ctx, on_finish);
   }
 
   CreateImageRequest(librados::IoCtx &local_io_ctx, ContextWQ *work_queue,
                      const std::string &global_image_id,
                      const std::string &remote_mirror_uuid,
                      const std::string &local_image_name,
+                    const std::string &local_image_id,
                      ImageCtxT *remote_image_ctx,
-                    std::string *local_image_id,
                      Context *on_finish);
 
   void send();
@@ -86,8 +86,8 @@ private:
   std::string m_global_image_id;
   std::string m_remote_mirror_uuid;
   std::string m_local_image_name;
+  std::string m_local_image_id;
   ImageCtxT *m_remote_image_ctx;
-  std::string *m_local_image_id;
   Context *m_on_finish;
 
   librados::IoCtx m_remote_parent_io_ctx;
diff --git a/ceph/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/ceph/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc
new file mode 100644 (file)
index 0000000..a6a72b4
--- /dev/null
@@ -0,0 +1,84 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+                           << "GetMirrorImageIdRequest: " << this << " " \
+                           << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::send() {
+  dout(20) << dendl;
+  get_image_id();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::get_image_id() {
+  dout(20) << dendl;
+
+  // attempt to cross-reference a image id by the global image id
+  librados::ObjectReadOperation op;
+  librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
+
+  librados::AioCompletion *aio_comp = create_rados_callback<
+    GetMirrorImageIdRequest<I>,
+    &GetMirrorImageIdRequest<I>::handle_get_image_id>(
+      this);
+  int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+  assert(r == 0);
+  aio_comp->release();
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) {
+  if (r == 0) {
+    bufferlist::iterator iter = m_out_bl.begin();
+    r = librbd::cls_client::mirror_image_get_image_id_finish(
+      &iter, m_image_id);
+  }
+
+  dout(20) << "r=" << r << ", "
+           << "image_id=" << *m_image_id << dendl;
+
+  if (r < 0) {
+    if (r == -ENOENT) {
+      dout(10) << "global image " << m_global_image_id << " not registered"
+               << dendl;
+    } else {
+      derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl;
+    }
+    finish(r);
+    return;
+  }
+
+  finish(0);
+}
+
+template <typename I>
+void GetMirrorImageIdRequest<I>::finish(int r) {
+  dout(20) << "r=" << r << dendl;
+
+  m_on_finish->complete(r);
+  delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
diff --git a/ceph/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/ceph/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h
new file mode 100644 (file)
index 0000000..9bdb778
--- /dev/null
@@ -0,0 +1,75 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
+
+#include "include/buffer.h"
+#include <string>
+
+namespace librados { struct IoCtx; }
+namespace librbd { struct ImageCtx; }
+
+struct Context;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class GetMirrorImageIdRequest {
+public:
+  static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx,
+                                         const std::string &global_image_id,
+                                         std::string *image_id,
+                                         Context *on_finish) {
+    return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id,
+                                       on_finish);
+  }
+
+  GetMirrorImageIdRequest(librados::IoCtx &io_ctx,
+                           const std::string &global_image_id,
+                           std::string *image_id,
+                           Context *on_finish)
+    : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+      m_image_id(image_id), m_on_finish(on_finish) {
+  }
+
+  void send();
+
+private:
+  /**
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   * GET_IMAGE_ID
+   *    |
+   *    v
+   * <finish>
+
+   * @endverbatim
+   */
+
+  librados::IoCtx &m_io_ctx;
+  std::string m_global_image_id;
+  std::string *m_image_id;
+  Context *m_on_finish;
+
+  bufferlist m_out_bl;
+
+  void get_image_id();
+  void handle_get_image_id(int r);
+
+  void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H
index b26ac05e9429fbfb55bd8262dd74935f44126b5f..4009dc10d4af6f86ea126ec2cdbf2d00ba606419 100644 (file)
@@ -9,6 +9,7 @@
 #include "librbd/Journal.h"
 #include "librbd/Utils.h"
 #include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
 #include <type_traits>
 
 #define dout_context g_ceph_context
@@ -35,38 +36,20 @@ template <typename I>
 void PrepareLocalImageRequest<I>::get_local_image_id() {
   dout(20) << dendl;
 
-  // attempt to cross-reference a local image by the global image id
-  librados::ObjectReadOperation op;
-  librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id);
-
-  m_out_bl.clear();
-  librados::AioCompletion *aio_comp = create_rados_callback<
+  Context *ctx = create_context_callback<
     PrepareLocalImageRequest<I>,
-    &PrepareLocalImageRequest<I>::handle_get_local_image_id>(
-      this);
-  int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
-  assert(r == 0);
-  aio_comp->release();
+    &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this);
+  auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id,
+                                                m_local_image_id, ctx);
+  req->send();
 }
 
 template <typename I>
 void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) {
-  if (r == 0) {
-    bufferlist::iterator iter = m_out_bl.begin();
-    r = librbd::cls_client::mirror_image_get_image_id_finish(
-      &iter, m_local_image_id);
-  }
-
   dout(20) << "r=" << r << ", "
            << "local_image_id=" << *m_local_image_id << dendl;
 
   if (r < 0) {
-    if (r == -ENOENT) {
-      dout(10) << "image not registered locally" << dendl;
-    } else {
-      derr << "failed to retrieve local image id: " << cpp_strerror(r)
-           << dendl;
-    }
     finish(r);
     return;
   }
diff --git a/ceph/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/ceph/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc
new file mode 100644 (file)
index 0000000..2e62093
--- /dev/null
@@ -0,0 +1,108 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h"
+#include "include/rados/librados.hpp"
+#include "cls/rbd/cls_rbd_client.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \
+                           << "PrepareRemoteImageRequest: " << this << " " \
+                           << __func__ << ": "
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+using librbd::util::create_rados_callback;
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::send() {
+  get_remote_mirror_uuid();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_remote_mirror_uuid() {
+  dout(20) << dendl;
+
+  librados::ObjectReadOperation op;
+  librbd::cls_client::mirror_uuid_get_start(&op);
+
+  librados::AioCompletion *aio_comp = create_rados_callback<
+    PrepareRemoteImageRequest<I>,
+    &PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid>(this);
+  int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl);
+  assert(r == 0);
+  aio_comp->release();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid(int r) {
+  if (r >= 0) {
+    bufferlist::iterator it = m_out_bl.begin();
+    r = librbd::cls_client::mirror_uuid_get_finish(&it, m_remote_mirror_uuid);
+    if (r >= 0 && m_remote_mirror_uuid->empty()) {
+      r = -ENOENT;
+    }
+  }
+
+  dout(20) << "r=" << r << dendl;
+  if (r < 0) {
+    if (r == -ENOENT) {
+      dout(5) << "remote mirror uuid missing" << dendl;
+    } else {
+      derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r)
+           << dendl;
+    }
+    finish(r);
+    return;
+  }
+
+  get_remote_image_id();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::get_remote_image_id() {
+  dout(20) << dendl;
+
+  Context *ctx = create_context_callback<
+    PrepareRemoteImageRequest<I>,
+    &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this);
+  auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id,
+                                                m_remote_image_id, ctx);
+  req->send();
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) {
+  dout(20) << "r=" << r << ", "
+           << "remote_image_id=" << *m_remote_image_id << dendl;
+
+  if (r < 0) {
+    finish(r);
+    return;
+  }
+
+  finish(0);
+}
+
+template <typename I>
+void PrepareRemoteImageRequest<I>::finish(int r) {
+  dout(20) << "r=" << r << dendl;
+
+  m_on_finish->complete(r);
+  delete this;
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
diff --git a/ceph/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/ceph/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h
new file mode 100644 (file)
index 0000000..9943fd7
--- /dev/null
@@ -0,0 +1,88 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
+
+#include "include/buffer.h"
+#include <string>
+
+namespace librados { struct IoCtx; }
+namespace librbd { struct ImageCtx; }
+
+struct Context;
+struct ContextWQ;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class PrepareRemoteImageRequest {
+public:
+  static PrepareRemoteImageRequest *create(librados::IoCtx &io_ctx,
+                                           const std::string &global_image_id,
+                                           std::string *remote_mirror_uuid,
+                                           std::string *remote_image_id,
+                                           Context *on_finish) {
+    return new PrepareRemoteImageRequest(io_ctx, global_image_id,
+                                         remote_mirror_uuid, remote_image_id,
+                                         on_finish);
+  }
+
+  PrepareRemoteImageRequest(librados::IoCtx &io_ctx,
+                           const std::string &global_image_id,
+                           std::string *remote_mirror_uuid,
+                           std::string *remote_image_id,
+                           Context *on_finish)
+    : m_io_ctx(io_ctx), m_global_image_id(global_image_id),
+      m_remote_mirror_uuid(remote_mirror_uuid),
+      m_remote_image_id(remote_image_id),
+      m_on_finish(on_finish) {
+  }
+
+  void send();
+
+private:
+  /**
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   * GET_REMOTE_MIRROR_UUID
+   *    |
+   *    v
+   * GET_REMOTE_IMAGE_ID
+   *    |
+   *    v
+   * <finish>
+
+   * @endverbatim
+   */
+
+  librados::IoCtx &m_io_ctx;
+  std::string m_global_image_id;
+  std::string *m_remote_mirror_uuid;
+  std::string *m_remote_image_id;
+  Context *m_on_finish;
+
+  bufferlist m_out_bl;
+
+  void get_remote_mirror_uuid();
+  void handle_get_remote_mirror_uuid(int r);
+
+  void get_remote_image_id();
+  void handle_get_remote_image_id(int r);
+
+  void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H
index 83ab5d84f8f5761c29bd7dd57b8bea173af540b9..69b84e1580ddc6c4627bfdb3222fc7a74f5d6ab7 100644 (file)
@@ -73,37 +73,34 @@ void PayloadBase::dump(Formatter *f) const {
 void ImagePayloadBase::encode(bufferlist &bl) const {
   PayloadBase::encode(bl);
   ::encode(global_image_id, bl);
-  ::encode(peer_mirror_uuid, bl);
-  ::encode(peer_image_id, bl);
 }
 
 void ImagePayloadBase::decode(__u8 version, bufferlist::iterator &iter) {
   PayloadBase::decode(version, iter);
   ::decode(global_image_id, iter);
-  ::decode(peer_mirror_uuid, iter);
-  ::decode(peer_image_id, iter);
 }
 
 void ImagePayloadBase::dump(Formatter *f) const {
   PayloadBase::dump(f);
   f->dump_string("global_image_id", global_image_id);
-  f->dump_string("peer_mirror_uuid", peer_mirror_uuid);
-  f->dump_string("peer_image_id", peer_image_id);
 }
 
-void ImageReleasePayload::encode(bufferlist &bl) const {
-  ImagePayloadBase::encode(bl);
-  ::encode(schedule_delete, bl);
+void PeerImageRemovedPayload::encode(bufferlist &bl) const {
+  PayloadBase::encode(bl);
+  ::encode(global_image_id, bl);
+  ::encode(peer_mirror_uuid, bl);
 }
 
-void ImageReleasePayload::decode(__u8 version, bufferlist::iterator &iter) {
-  ImagePayloadBase::decode(version, iter);
-  ::decode(schedule_delete, iter);
+void PeerImageRemovedPayload::decode(__u8 version, bufferlist::iterator &iter) {
+  PayloadBase::decode(version, iter);
+  ::decode(global_image_id, iter);
+  ::decode(peer_mirror_uuid, iter);
 }
 
-void ImageReleasePayload::dump(Formatter *f) const {
-  ImagePayloadBase::dump(f);
-  f->dump_bool("schedule_delete", schedule_delete);
+void PeerImageRemovedPayload::dump(Formatter *f) const {
+  PayloadBase::dump(f);
+  f->dump_string("global_image_id", global_image_id);
+  f->dump_string("peer_mirror_uuid", peer_mirror_uuid);
 }
 
 void SyncPayloadBase::encode(bufferlist &bl) const {
@@ -132,13 +129,13 @@ void UnknownPayload::dump(Formatter *f) const {
 }
 
 void NotifyMessage::encode(bufferlist& bl) const {
-  ENCODE_START(1, 1, bl);
+  ENCODE_START(2, 2, bl);
   boost::apply_visitor(EncodePayloadVisitor(bl), payload);
   ENCODE_FINISH(bl);
 }
 
 void NotifyMessage::decode(bufferlist::iterator& iter) {
-  DECODE_START(1, iter);
+  DECODE_START(2, iter);
 
   uint32_t notify_op;
   ::decode(notify_op, iter);
@@ -151,6 +148,9 @@ void NotifyMessage::decode(bufferlist::iterator& iter) {
   case NOTIFY_OP_IMAGE_RELEASE:
     payload = ImageReleasePayload();
     break;
+  case NOTIFY_OP_PEER_IMAGE_REMOVED:
+    payload = PeerImageRemovedPayload();
+    break;
   case NOTIFY_OP_SYNC_REQUEST:
     payload = SyncRequestPayload();
     break;
@@ -172,11 +172,13 @@ void NotifyMessage::dump(Formatter *f) const {
 
 void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
   o.push_back(new NotifyMessage(ImageAcquirePayload()));
-  o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid", "uuid", "id")));
+  o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid")));
 
   o.push_back(new NotifyMessage(ImageReleasePayload()));
-  o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid", "uuid", "id",
-                                                    true)));
+  o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid")));
+
+  o.push_back(new NotifyMessage(PeerImageRemovedPayload()));
+  o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid")));
 
   o.push_back(new NotifyMessage(SyncRequestPayload()));
   o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id")));
@@ -193,6 +195,9 @@ std::ostream &operator<<(std::ostream &out, const NotifyOp &op) {
   case NOTIFY_OP_IMAGE_RELEASE:
     out << "ImageRelease";
     break;
+  case NOTIFY_OP_PEER_IMAGE_REMOVED:
+    out << "PeerImageRemoved";
+    break;
   case NOTIFY_OP_SYNC_REQUEST:
     out << "SyncRequest";
     break;
index 53b333c34afc98da330d99b9e74d50f04cb3d2d1..70af2f79112b4b5af15ff2d3666a3271732d3634 100644 (file)
@@ -19,10 +19,11 @@ namespace mirror {
 namespace instance_watcher {
 
 enum NotifyOp {
-  NOTIFY_OP_IMAGE_ACQUIRE  = 0,
-  NOTIFY_OP_IMAGE_RELEASE  = 1,
-  NOTIFY_OP_SYNC_REQUEST   = 2,
-  NOTIFY_OP_SYNC_START     = 3,
+  NOTIFY_OP_IMAGE_ACQUIRE      = 0,
+  NOTIFY_OP_IMAGE_RELEASE      = 1,
+  NOTIFY_OP_PEER_IMAGE_REMOVED = 2,
+  NOTIFY_OP_SYNC_REQUEST       = 3,
+  NOTIFY_OP_SYNC_START         = 4
 };
 
 struct PayloadBase {
@@ -41,17 +42,12 @@ struct PayloadBase {
 
 struct ImagePayloadBase : public PayloadBase {
   std::string global_image_id;
-  std::string peer_mirror_uuid;
-  std::string peer_image_id;
 
   ImagePayloadBase() : PayloadBase() {
   }
 
-  ImagePayloadBase(uint64_t request_id, const std::string &global_image_id,
-                   const std::string &peer_mirror_uuid,
-                   const std::string &peer_image_id)
-    : PayloadBase(request_id), global_image_id(global_image_id),
-      peer_mirror_uuid(peer_mirror_uuid), peer_image_id(peer_image_id) {
+  ImagePayloadBase(uint64_t request_id, const std::string &global_image_id)
+    : PayloadBase(request_id), global_image_id(global_image_id) {
   }
 
   void encode(bufferlist &bl) const;
@@ -62,31 +58,36 @@ struct ImagePayloadBase : public PayloadBase {
 struct ImageAcquirePayload : public ImagePayloadBase {
   static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE;
 
-  ImageAcquirePayload() : ImagePayloadBase() {
+  ImageAcquirePayload() {
   }
-
-  ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id,
-                      const std::string &peer_mirror_uuid,
-                      const std::string &peer_image_id)
-    : ImagePayloadBase(request_id, global_image_id, peer_mirror_uuid,
-                       peer_image_id) {
+  ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id)
+    : ImagePayloadBase(request_id, global_image_id) {
   }
 };
 
 struct ImageReleasePayload : public ImagePayloadBase {
   static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE;
 
-  bool schedule_delete;
-
-  ImageReleasePayload() : ImagePayloadBase(), schedule_delete(false) {
+  ImageReleasePayload() {
   }
+  ImageReleasePayload(uint64_t request_id, const std::string &global_image_id)
+    : ImagePayloadBase(request_id, global_image_id) {
+  }
+};
 
-  ImageReleasePayload(uint64_t request_id, const std::string &global_image_id,
-                      const std::string &peer_mirror_uuid,
-                      const std::string &peer_image_id, bool schedule_delete)
-    : ImagePayloadBase(request_id, global_image_id, peer_mirror_uuid,
-                       peer_image_id),
-      schedule_delete(schedule_delete) {
+struct PeerImageRemovedPayload : public PayloadBase {
+  static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED;
+
+  std::string global_image_id;
+  std::string peer_mirror_uuid;
+
+  PeerImageRemovedPayload() {
+  }
+  PeerImageRemovedPayload(uint64_t request_id,
+                          const std::string& global_image_id,
+                          const std::string& peer_mirror_uuid)
+    : PayloadBase(request_id),
+      global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) {
   }
 
   void encode(bufferlist &bl) const;
@@ -144,6 +145,7 @@ struct UnknownPayload {
 
 typedef boost::variant<ImageAcquirePayload,
                        ImageReleasePayload,
+                       PeerImageRemovedPayload,
                        SyncRequestPayload,
                        SyncStartPayload,
                        UnknownPayload> Payload;
index cec8a4deb63470571196a7beaeb2912d52d55f4f..ba8aa7df1fa6142e02b47a0dadd4ea699cc1d7f1 100644 (file)
@@ -10,6 +10,7 @@
 #include <string>
 #include <vector>
 
+#include "include/rados/librados.hpp"
 #include "include/rbd/librbd.hpp"
 
 namespace rbd {
@@ -41,6 +42,25 @@ std::ostream &operator<<(std::ostream &, const ImageId &image_id);
 
 typedef std::set<ImageId> ImageIds;
 
+struct Peer {
+  std::string peer_uuid;
+  librados::IoCtx io_ctx;
+
+  Peer() {
+  }
+  Peer(const std::string &peer_uuid) : peer_uuid(peer_uuid) {
+  }
+  Peer(const std::string &peer_uuid, librados::IoCtx& io_ctx)
+    : peer_uuid(peer_uuid), io_ctx(io_ctx) {
+  }
+
+  inline bool operator<(const Peer &rhs) const {
+    return peer_uuid < rhs.peer_uuid;
+  }
+};
+
+typedef std::set<Peer> Peers;
+
 struct peer_t {
   peer_t() = default;
   peer_t(const std::string &uuid, const std::string &cluster_name,
index 5e7f98dcaa4d0b1bc775a50d73853f6832af9e01..a3d3577310ccfe78e83157f0df03147936f14d0d 100644 (file)
@@ -1028,7 +1028,7 @@ static int rbd_nbd(int argc, const char *argv[])
   r = parse_args(args, &err_msg, &cfg);
   if (r == HELP_INFO) {
     usage();
-    return 0;
+    assert(false);
   } else if (r == VERSION_INFO) {
     std::cout << pretty_version_to_str() << std::endl;
     return 0;
@@ -1061,7 +1061,8 @@ static int rbd_nbd(int argc, const char *argv[])
       break;
     default:
       usage();
-      return -EINVAL;
+      assert(false);
+      break;
   }
 
   return 0;
index 5b8d3966131448848a13f1e7bd661ac46d2be18f..7a60e07035aa1eae81efcfc05adb0830f5003c42 100644 (file)
@@ -2283,72 +2283,6 @@ TRACEPOINT_EVENT(librbd, stat_exit,
     )
 )
 
-TRACEPOINT_EVENT(librbd, group_create_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id,
-        const char*, groupname),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-        ctf_string(groupname, groupname)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_create_exit,
-    TP_ARGS(
-        int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_remove_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id,
-        const char*, groupname),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-        ctf_string(groupname, groupname)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_remove_exit,
-    TP_ARGS(
-        int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_list_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_list_entry,
-    TP_ARGS(
-        const char*, name),
-    TP_FIELDS(
-        ctf_string(name, name)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_list_exit,
-    TP_ARGS(
-        int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
 TRACEPOINT_EVENT(librbd, update_watch_enter,
     TP_ARGS(
         void*, imagectx,
@@ -2386,117 +2320,3 @@ TRACEPOINT_EVENT(librbd, update_unwatch_exit,
         ctf_integer(int, retval, retval)
     )
 )
-
-TRACEPOINT_EVENT(librbd, group_image_add_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id,
-        const char*, group_name,
-        const char*, image_pool_name,
-        int64_t, image_id,
-        const char*, image_name),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-        ctf_string(group_name, group_name)
-        ctf_string(image_pool_name, image_pool_name)
-        ctf_integer(int64_t, image_id, image_id)
-        ctf_string(image_name, image_name)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_add_exit,
-    TP_ARGS(
-        int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_remove_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id,
-        const char*, group_name,
-        const char*, image_pool_name,
-        int64_t, image_id,
-        const char*, image_name),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-        ctf_string(group_name, group_name)
-        ctf_string(image_pool_name, image_pool_name)
-        ctf_integer(int64_t, image_id, image_id)
-        ctf_string(image_name, image_name)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_remove_exit,
-    TP_ARGS(
-        int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_remove_by_id_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id,
-        const char*, group_name,
-        const char*, image_pool_name,
-        int64_t, image_ioctx_id,
-        const char*, image_id),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-        ctf_string(group_name, group_name)
-        ctf_string(image_pool_name, image_pool_name)
-        ctf_integer(int64_t, image_ioctx_id, image_ioctx_id)
-        ctf_string(image_id, image_id)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_remove_by_id_exit,
-    TP_ARGS(
-        int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_list_enter,
-    TP_ARGS(
-        const char*, pool_name,
-        int64_t, id,
-        const char*, group_name),
-    TP_FIELDS(
-        ctf_string(pool_name, pool_name)
-        ctf_integer(int64_t, id, id)
-        ctf_string(group_name, group_name)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, group_image_list_exit,
-    TP_ARGS(
-           int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, image_get_group_enter,
-    TP_ARGS(
-        const char*, image_name),
-    TP_FIELDS(
-        ctf_string(image_name, image_name)
-    )
-)
-
-TRACEPOINT_EVENT(librbd, image_get_group_exit,
-    TP_ARGS(
-           int, retval),
-    TP_FIELDS(
-        ctf_integer(int, retval, retval)
-    )
-)
index 240a565d09ecc7f76459873ef595f0cd35eb5d6c..3b03b6e613217f2af1d3377366b36ea7ce705a9a 100644 (file)
@@ -15,5 +15,6 @@ install(FILES
   ceph-radosgw@.service
   ceph-rbd-mirror@.service
   ceph-disk@.service
+  ceph-volume@.service
   rbdmap.service
   DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/systemd/system)
diff --git a/ceph/systemd/ceph-volume@.service b/ceph/systemd/ceph-volume@.service
new file mode 100644 (file)
index 0000000..c21002c
--- /dev/null
@@ -0,0 +1,14 @@
+[Unit]
+Description=Ceph Volume activation: %i
+After=local-fs.target
+Wants=local-fs.target
+
+[Service]
+Type=oneshot
+KillMode=none
+Environment=CEPH_VOLUME_TIMEOUT=10000
+ExecStart=/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT /usr/sbin/ceph-volume-systemd %i'
+TimeoutSec=0
+
+[Install]
+WantedBy=multi-user.target