From f64942e41c1f59e95cdc1205bbe5d32ed6dfd429 Mon Sep 17 00:00:00 2001 From: Alwin Antreich Date: Wed, 6 Feb 2019 09:29:01 +0100 Subject: [PATCH] update source to 12.2.11 Signed-off-by: Alwin Antreich --- Makefile | 4 +- ceph/CMakeLists.txt | 19 +- ceph/PendingReleaseNotes | 33 + ceph/alpine/APKBUILD | 6 +- ceph/ceph.spec | 7 +- ceph/ceph.spec.in | 1 + ceph/debian/changelog | 6 + ceph/debian/control | 2 + .../upgrade/luminous-p2p/% => doc/README.md} | 0 ceph/doc/_ext/edit_on_github.py | 43 + ceph/doc/_static/js/ceph.js | 41 + ceph/doc/_templates/page.html | 21 + ceph/doc/ceph-volume/lvm/zap.rst | 49 +- ceph/doc/cephfs/dirfrags.rst | 7 +- ceph/doc/cephfs/eviction.rst | 6 +- ceph/doc/cephfs/fuse.rst | 4 +- ceph/doc/cephfs/health-messages.rst | 2 +- ceph/doc/cephfs/mds-config-ref.rst | 34 - ceph/doc/conf.py | 14 + ceph/doc/man/8/ceph-volume.rst | 11 + ceph/doc/man/8/crushtool.rst | 8 + ceph/doc/mgr/balancer.rst | 4 + .../configuration/bluestore-config-ref.rst | 160 +++- ceph/doc/rados/operations/add-or-rm-mons.rst | 5 + ceph/doc/rados/operations/crush-map-edits.rst | 293 ++++--- ceph/doc/rados/operations/crush-map.rst | 5 + ceph/doc/rados/operations/user-management.rst | 6 - .../troubleshooting/troubleshooting-mon.rst | 5 +- ceph/doc/radosgw/adminops.rst | 5 +- ceph/doc/radosgw/config-ref.rst | 11 + ceph/doc/radosgw/encryption.rst | 5 + ceph/doc/radosgw/frontends.rst | 29 +- ceph/doc/start/hardware-recommendations.rst | 28 +- ceph/doc/start/quick-ceph-deploy.rst | 2 +- ceph/examples/librados/Makefile | 10 +- ceph/examples/librados/hello_world.readme | 2 +- ceph/install-deps.sh | 1 + .../cephfs/clusters/1-mds-1-client-coloc.yaml | 12 + ceph/qa/cephfs/clusters/1-mds-1-client.yaml | 7 +- .../cephfs/clusters/1-mds-2-client-coloc.yaml | 12 + ceph/qa/cephfs/clusters/1-mds-2-client.yaml | 7 +- ceph/qa/cephfs/clusters/1-mds-3-client.yaml | 15 + .../cephfs/clusters/1-mds-4-client-coloc.yaml | 12 + ceph/qa/cephfs/clusters/1-mds-4-client.yaml | 7 +- ceph/qa/cephfs/clusters/3-mds.yaml | 7 +- ceph/qa/cephfs/clusters/9-mds.yaml | 7 +- ceph/qa/cephfs/clusters/fixed-2-ucephfs.yaml | 4 +- ceph/qa/run-standalone.sh | 3 +- ceph/qa/standalone/ceph-helpers.sh | 14 +- ceph/qa/standalone/scrub/osd-scrub-repair.sh | 61 ++ .../smoke/basic/2-ceph/ceph_ansible.yaml | 2 +- .../tasks/cfuse_workunit_suites_pjd.yaml | 1 + .../clusters/1-mds-4-client-coloc.yaml | 1 + .../clusters/4-remote-clients.yaml | 10 - .../fs/basic_functional/tasks/damage.yaml | 2 + .../tasks/cfuse_workunit_suites_pjd.yaml | 1 + .../clusters/small-cluster.yaml | 2 + .../multiclient/clusters/1-mds-2-client.yaml | 1 + .../multiclient/clusters/1-mds-3-client.yaml | 1 + .../multiclient/clusters/three_clients.yaml | 15 - .../fs/multiclient/clusters/two_clients.yaml | 14 - .../fs/multifs/clusters/2-remote-clients.yaml | 10 - .../tasks/cfuse_workunit_suites_pjd.yaml | 1 + .../thrash/clusters/1-mds-1-client-coloc.yaml | 1 + .../thrash/clusters/mds-1active-1standby.yaml | 10 - .../thrash/msgr-failures/osd-mds-delay.yaml | 2 +- .../tasks/cfuse_workunit_suites_pjd.yaml | 1 + .../tasks/kclient_workunit_suites_pjd.yaml | 1 + .../suites/kcephfs/recovery/tasks/damage.yaml | 2 + .../tasks/cfuse_workunit_suites_pjd.yaml | 1 + .../singleton/all/mon-config-key-caps.yaml | 17 + .../1.1-pg-log-overrides/normal_pg_log.yaml | 1 + .../1.1-pg-log-overrides/short_pg_log.yaml | 6 + .../1.1-pg-log-overrides/normal_pg_log.yaml | 1 + .../1.1-pg-log-overrides/short_pg_log.yaml | 6 + .../1.1-pg-log-overrides/normal_pg_log.yaml | 1 + .../1.1-pg-log-overrides/short_pg_log.yaml | 6 + .../2-partial-upgrade/firsthalf.yaml | 5 + .../stress-split/5-finish-upgrade.yaml | 10 + .../luminous-p2p/luminous-p2p-parallel/% | 0 .../luminous-p2p/luminous-p2p-parallel/.qa | 1 + .../point-to-point-upgrade.yaml | 33 + .../luminous-p2p-parallel/supported | 1 + .../luminous-p2p/luminous-p2p-stress-split/% | 0 .../luminous-p2p-stress-split/0-cluster/+ | 0 .../0-cluster}/.qa | 0 .../0-cluster/openstack.yaml | 6 + .../0-cluster/start.yaml | 20 + .../1-ceph-install/luminous.yaml | 19 + .../1.1-pg-log-overrides/normal_pg_log.yaml | 1 + .../1.1-pg-log-overrides/short_pg_log.yaml | 6 + .../2-partial-upgrade/.qa | 1 + .../2-partial-upgrade/firsthalf.yaml | 17 + .../luminous-p2p-stress-split/3-thrash/.qa | 1 + .../3-thrash/default.yaml | 25 + .../luminous-p2p-stress-split/4-workload/+ | 0 .../luminous-p2p-stress-split/4-workload/.qa | 1 + .../4-workload/radosbench.yaml | 40 + .../4-workload/rbd-cls.yaml | 10 + .../4-workload/rbd-import-export.yaml | 12 + .../4-workload/rbd_api.yaml | 10 + .../4-workload/readwrite.yaml | 16 + .../4-workload/snaps-few-objects.yaml | 18 + .../5-finish-upgrade.yaml | 14 + .../7-final-workload/+ | 0 .../7-final-workload/.qa | 1 + .../7-final-workload/rbd-python.yaml | 9 + .../7-final-workload/rgw-swift.yaml | 11 + .../7-final-workload/snaps-many-objects.yaml | 16 + .../luminous-p2p-stress-split/supported | 1 + .../thrashosds-health.yaml | 1 + ceph/qa/suites/upgrade/luminous-p2p/supported | 1 - ceph/qa/tasks/cephfs/filesystem.py | 52 +- ceph/qa/tasks/cephfs/fuse_mount.py | 39 +- ceph/qa/tasks/cephfs/kernel_mount.py | 14 +- ceph/qa/tasks/cephfs/test_client_limits.py | 6 +- ceph/qa/tasks/cephfs/test_client_recovery.py | 28 +- ceph/qa/tasks/cephfs/test_damage.py | 71 +- ceph/qa/tasks/cephfs/test_data_scan.py | 4 +- ceph/qa/tasks/cephfs/test_flush.py | 4 +- ceph/qa/tasks/cephfs/test_forward_scrub.py | 4 +- ceph/qa/tasks/cephfs/test_fragment.py | 1 - .../qa/tasks/cephfs/test_journal_migration.py | 5 +- ceph/qa/tasks/cephfs/test_journal_repair.py | 10 +- ceph/qa/tasks/cephfs/test_misc.py | 92 +- ceph/qa/tasks/cephfs/test_recovery_pool.py | 29 +- ceph/qa/tasks/qemu.py | 4 +- ceph/qa/tasks/thrashosds-health.yaml | 2 +- ceph/qa/tasks/workunit.py | 2 +- .../ceph-tests/ceph-admin-commands.sh | 7 +- ceph/qa/workunits/mon/test_config_key_caps.sh | 201 +++++ .../qa/workunits/rados/test_librados_build.sh | 4 +- ceph/qa/workunits/rbd/run_devstack_tempest.sh | 4 +- .../suites/cephfs_journal_tool_smoke.sh | 2 +- ceph/run-make-check.sh | 90 +- ceph/src/.git_version | 4 +- ceph/src/auth/AuthSessionHandler.cc | 4 + ceph/src/ceph-create-keys | 12 +- ceph/src/ceph-volume/ceph_volume/api/lvm.py | 3 + .../ceph_volume/devices/lvm/activate.py | 8 +- .../ceph_volume/devices/lvm/batch.py | 11 +- .../devices/lvm/strategies/bluestore.py | 59 +- .../devices/lvm/strategies/filestore.py | 64 +- .../devices/lvm/strategies/strategies.py | 50 ++ .../ceph_volume/devices/lvm/zap.py | 288 +++++-- .../ceph-volume/ceph_volume/inventory/main.py | 6 +- .../ceph-volume/ceph_volume/tests/conftest.py | 4 +- .../tests/devices/lvm/test_batch.py | 7 + .../ceph_volume/tests/devices/lvm/test_zap.py | 153 ++++ .../ceph_volume/tests/devices/test_zap.py | 4 +- .../bluestore/mixed-type-dmcrypt/test_zap.yml | 1 + .../centos7/bluestore/mixed-type/test_zap.yml | 1 + .../single-type-dmcrypt/test_zap.yml | 1 + .../bluestore/single-type/test_zap.yml | 1 + .../filestore/mixed-type-dmcrypt/test_zap.yml | 1 + .../centos7/filestore/mixed-type/test_zap.yml | 1 + .../single-type-dmcrypt/test_zap.yml | 1 + .../filestore/single-type/test_zap.yml | 1 + .../functional/batch/playbooks/test_zap.yml | 31 + .../tests/functional/batch/tox.ini | 3 + .../single-type-dmcrypt/test_zap.yml | 1 + .../xenial/bluestore/single-type/test_zap.yml | 1 + .../single-type-dmcrypt/test_zap.yml | 1 + .../xenial/filestore/single-type/test_zap.yml | 1 + .../lvm/centos7/bluestore/dmcrypt/test.yml | 11 + .../lvm/centos7/filestore/dmcrypt/test.yml | 31 + .../lvm/playbooks/test_bluestore.yml | 42 + .../lvm/playbooks/test_filestore.yml | 49 ++ .../lvm/xenial/bluestore/dmcrypt/test.yml | 11 + .../lvm/xenial/filestore/dmcrypt/test.yml | 31 + .../tests/functional/playbooks/deploy.yml | 8 +- .../ceph_volume/tests/util/test_device.py | 161 +++- .../ceph_volume/tests/util/test_disk.py | 50 ++ .../ceph_volume/tests/util/test_encryption.py | 18 + .../ceph_volume/tests/util/test_util.py | 36 +- .../ceph-volume/ceph_volume/util/__init__.py | 23 +- .../ceph_volume/util/arg_validators.py | 11 +- .../ceph-volume/ceph_volume/util/device.py | 87 +- ceph/src/ceph-volume/ceph_volume/util/disk.py | 67 +- .../ceph_volume/util/encryption.py | 4 +- ceph/src/client/Client.cc | 32 +- ceph/src/cls/lock/cls_lock.cc | 71 +- ceph/src/cls/lock/cls_lock_client.cc | 21 +- ceph/src/cls/lock/cls_lock_client.h | 43 +- ceph/src/cls/lock/cls_lock_ops.cc | 2 +- ceph/src/cls/lock/cls_lock_ops.h | 3 + ceph/src/cls/lock/cls_lock_types.h | 29 +- ceph/src/cls/rgw/cls_rgw.cc | 8 +- ceph/src/cls/rgw/cls_rgw_client.cc | 23 +- ceph/src/cls/rgw/cls_rgw_client.h | 36 +- ceph/src/cls/rgw/cls_rgw_types.h | 21 + ceph/src/common/Cond.h | 100 +-- ceph/src/common/CondVar.h | 109 +++ ceph/src/common/TrackedOp.cc | 4 +- ceph/src/common/WeightedPriorityQueue.h | 21 +- ceph/src/common/buffer.cc | 26 + ceph/src/common/ceph_context.cc | 16 +- ceph/src/common/cmdparse.h | 53 +- ceph/src/common/config.cc | 140 +-- ceph/src/common/config.h | 59 +- ceph/src/common/hobject.h | 31 +- ceph/src/common/legacy_config_opts.h | 16 +- ceph/src/common/options.cc | 82 +- ceph/src/crush/CrushCompiler.cc | 10 +- ceph/src/crush/CrushTester.cc | 77 ++ ceph/src/crush/CrushTester.h | 2 + ceph/src/crush/CrushWrapper.cc | 424 ++++++++- ceph/src/crush/CrushWrapper.h | 14 + ceph/src/include/buffer.h | 1 + ceph/src/include/ceph_features.h | 2 + ceph/src/include/ceph_fs.h | 4 +- ceph/src/include/cephfs/libcephfs.h | 2 + ceph/src/include/config-h.in.cmake | 3 + ceph/src/include/rados.h | 4 +- ceph/src/include/rados/librados.hpp | 17 +- ceph/src/librados/librados.cc | 6 +- ceph/src/librbd/librbd.cc | 1 + ceph/src/librbd/operation/ResizeRequest.cc | 1 + ceph/src/mds/CInode.cc | 13 +- ceph/src/mds/CInode.h | 1 + ceph/src/mds/FSMap.cc | 8 - ceph/src/mds/Locker.cc | 16 +- ceph/src/mds/MDBalancer.cc | 2 +- ceph/src/mds/MDCache.cc | 201 +++-- ceph/src/mds/MDCache.h | 11 +- ceph/src/mds/MDLog.cc | 8 +- ceph/src/mds/MDSDaemon.cc | 18 +- ceph/src/mds/MDSMap.h | 16 + ceph/src/mds/MDSRank.cc | 645 ++++++++++---- ceph/src/mds/MDSRank.h | 10 +- ceph/src/mds/PurgeQueue.cc | 83 +- ceph/src/mds/PurgeQueue.h | 5 +- ceph/src/mds/Server.cc | 218 +++-- ceph/src/mds/Server.h | 9 +- ceph/src/mds/SessionMap.h | 1 + ceph/src/mds/StrayManager.cc | 18 +- ceph/src/mgr/DaemonServer.cc | 7 +- ceph/src/mgr/DaemonState.cc | 13 +- ceph/src/mon/AuthMonitor.cc | 34 +- ceph/src/mon/ConfigKeyService.cc | 6 +- ceph/src/mon/FSCommands.cc | 72 +- ceph/src/mon/LogMonitor.cc | 32 +- ceph/src/mon/MDSMonitor.cc | 22 +- ceph/src/mon/MgrMonitor.cc | 43 +- ceph/src/mon/MonCap.cc | 8 +- ceph/src/mon/MonCommands.h | 6 +- ceph/src/mon/Monitor.cc | 20 +- ceph/src/mon/MonmapMonitor.cc | 39 +- ceph/src/mon/OSDMonitor.cc | 407 +++++---- ceph/src/os/bluestore/BlueFS.cc | 24 + ceph/src/os/bluestore/BlueFS.h | 7 + ceph/src/os/bluestore/BlueStore.cc | 91 +- ceph/src/os/bluestore/BlueStore.h | 10 +- ceph/src/os/bluestore/bluestore_tool.cc | 81 +- ceph/src/os/filestore/LFNIndex.h | 2 +- ceph/src/osd/OSD.cc | 66 +- ceph/src/osd/OSD.h | 3 +- ceph/src/osd/OSDMap.cc | 48 +- ceph/src/osd/OSDMap.h | 7 +- ceph/src/osd/PG.cc | 52 +- ceph/src/osd/PG.h | 7 +- ceph/src/osd/PGLog.cc | 41 +- ceph/src/osd/PGLog.h | 3 +- ceph/src/osd/PrimaryLogPG.cc | 95 ++- ceph/src/osd/PrimaryLogPG.h | 1 + ceph/src/osd/osd_types.h | 11 +- ceph/src/osdc/Journaler.cc | 10 +- ceph/src/osdc/ObjectCacher.cc | 29 +- ceph/src/osdc/ObjectCacher.h | 1 + ceph/src/osdc/Objecter.cc | 11 +- ceph/src/pybind/ceph_volume_client.py | 26 +- ceph/src/pybind/mgr/balancer/module.py | 24 +- ceph/src/pybind/mgr/influx/module.py | 2 + ceph/src/pybind/mgr/prometheus/module.py | 29 +- ceph/src/pybind/mgr/restful/common.py | 2 +- ceph/src/pybind/mgr/restful/module.py | 6 +- ceph/src/pybind/mgr/status/module.py | 4 +- ceph/src/pybind/rbd/rbd.pyx | 1 + ceph/src/rgw/CMakeLists.txt | 22 +- ceph/src/rgw/librgw.cc | 5 + ceph/src/rgw/rgw_admin.cc | 65 +- ceph/src/rgw/rgw_asio_client.cc | 58 +- ceph/src/rgw/rgw_asio_client.h | 22 +- ceph/src/rgw/rgw_asio_frontend.cc | 386 ++++++--- ceph/src/rgw/rgw_auth.cc | 5 + ceph/src/rgw/rgw_auth_s3.cc | 3 +- ceph/src/rgw/rgw_bucket.cc | 219 ++++- ceph/src/rgw/rgw_bucket.h | 11 +- ceph/src/rgw/rgw_common.cc | 32 +- ceph/src/rgw/rgw_common.h | 4 + ceph/src/rgw/rgw_cr_rados.cc | 2 +- ceph/src/rgw/rgw_crypt.cc | 8 +- ceph/src/rgw/rgw_data_sync.cc | 4 + ceph/src/rgw/rgw_file.h | 7 + ceph/src/rgw/rgw_iam_policy.cc | 2 +- ceph/src/rgw/rgw_metadata.cc | 4 +- ceph/src/rgw/rgw_metadata.h | 4 +- ceph/src/rgw/rgw_op.cc | 27 +- ceph/src/rgw/rgw_op.h | 30 + ceph/src/rgw/rgw_quota.cc | 23 + ceph/src/rgw/rgw_quota.h | 4 + ceph/src/rgw/rgw_rados.cc | 285 +++++-- ceph/src/rgw/rgw_rados.h | 24 +- ceph/src/rgw/rgw_reshard.cc | 513 +++++++---- ceph/src/rgw/rgw_reshard.h | 96 ++- ceph/src/rgw/rgw_rest.cc | 15 +- ceph/src/rgw/rgw_rest_s3.cc | 5 +- ceph/src/rgw/rgw_rest_swift.cc | 2 + ceph/src/rgw/rgw_rest_user.cc | 42 +- ceph/src/rgw/rgw_sync_log_trim.cc | 73 +- ceph/src/rgw/rgw_sync_module_es.cc | 58 +- ceph/src/rgw/rgw_user.cc | 19 +- ceph/src/test/cli/crushtool/crush-classes/a | Bin 0 -> 2358 bytes ceph/src/test/cli/crushtool/crush-classes/b | Bin 0 -> 20656 bytes .../test/cli/crushtool/crush-classes/beesly | Bin 0 -> 64806 bytes ceph/src/test/cli/crushtool/crush-classes/c | Bin 0 -> 8801 bytes ceph/src/test/cli/crushtool/crush-classes/d | Bin 0 -> 3657 bytes ceph/src/test/cli/crushtool/crush-classes/e | Bin 0 -> 7094 bytes ceph/src/test/cli/crushtool/crush-classes/f | Bin 0 -> 61002 bytes .../src/test/cli/crushtool/crush-classes/flax | Bin 0 -> 8184 bytes ceph/src/test/cli/crushtool/crush-classes/g | Bin 0 -> 43071 bytes .../src/test/cli/crushtool/crush-classes/gabe | Bin 0 -> 61114 bytes .../test/cli/crushtool/crush-classes/gabe2 | Bin 0 -> 61002 bytes ceph/src/test/cli/crushtool/help.t | 9 + ceph/src/test/cli/crushtool/reclassify.t | 588 +++++++++++++ ceph/src/test/cli/radosgw-admin/help.t | 2 + ceph/src/test/cls_lock/test_cls_lock.cc | 178 +++- ceph/src/test/cls_rgw/test_cls_rgw.cc | 12 +- ceph/src/test/compressor/CMakeLists.txt | 2 +- ceph/src/test/encoding/readable.sh | 6 + ceph/src/test/librados/aio.cc | 805 +++++++----------- ceph/src/test/librados/lock.cc | 16 +- .../librados_test_stub/LibradosTestStub.cc | 6 + ceph/src/test/objectstore/store_test.cc | 67 ++ ceph/src/test/osd/TestOSDMap.cc | 137 +++ ceph/src/test/rgw/rgw_multi/multisite.py | 5 +- ceph/src/test/rgw/rgw_multi/tests.py | 33 +- ceph/src/test/rgw/test_rgw_iam_policy.cc | 4 +- ceph/src/tools/cephfs/JournalTool.cc | 57 +- ceph/src/tools/cephfs/JournalTool.h | 11 + ceph/src/tools/cephfs/RoleSelector.cc | 5 +- ceph/src/tools/cephfs/RoleSelector.h | 3 +- ceph/src/tools/crushtool.cc | 106 ++- ceph/src/tools/rados/rados.cc | 8 +- ceph/src/tools/rbd_mirror/ImageReplayer.cc | 4 +- 345 files changed, 9014 insertions(+), 2848 deletions(-) rename ceph/{qa/suites/upgrade/luminous-p2p/% => doc/README.md} (100%) create mode 100644 ceph/doc/_ext/edit_on_github.py create mode 100644 ceph/doc/_static/js/ceph.js create mode 100644 ceph/doc/_templates/page.html create mode 100644 ceph/qa/cephfs/clusters/1-mds-1-client-coloc.yaml create mode 100644 ceph/qa/cephfs/clusters/1-mds-2-client-coloc.yaml create mode 100644 ceph/qa/cephfs/clusters/1-mds-3-client.yaml create mode 100644 ceph/qa/cephfs/clusters/1-mds-4-client-coloc.yaml create mode 120000 ceph/qa/suites/fs/basic_functional/clusters/1-mds-4-client-coloc.yaml delete mode 100644 ceph/qa/suites/fs/basic_functional/clusters/4-remote-clients.yaml create mode 120000 ceph/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml create mode 120000 ceph/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml delete mode 100644 ceph/qa/suites/fs/multiclient/clusters/three_clients.yaml delete mode 100644 ceph/qa/suites/fs/multiclient/clusters/two_clients.yaml delete mode 100644 ceph/qa/suites/fs/multifs/clusters/2-remote-clients.yaml create mode 120000 ceph/qa/suites/fs/thrash/clusters/1-mds-1-client-coloc.yaml delete mode 100644 ceph/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml create mode 100644 ceph/qa/suites/rados/singleton/all/mon-config-key-caps.yaml create mode 100644 ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/normal_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/short_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/normal_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/short_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/normal_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/short_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/% create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/.qa rename ceph/qa/suites/upgrade/luminous-p2p/{ => luminous-p2p-parallel}/point-to-point-upgrade.yaml (85%) create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/supported create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/% create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/+ rename ceph/qa/suites/upgrade/luminous-p2p/{ => luminous-p2p-stress-split/0-cluster}/.qa (100%) create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/openstack.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/start.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1-ceph-install/luminous.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/normal_pg_log.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/short_pg_log.yaml create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/.qa create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/firsthalf.yaml create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/.qa create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/default.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/+ create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/.qa create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/radosbench.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-cls.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-import-export.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd_api.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/readwrite.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/snaps-few-objects.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/5-finish-upgrade.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/+ create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/.qa create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rbd-python.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rgw-swift.yaml create mode 100644 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/snaps-many-objects.yaml create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/supported create mode 120000 ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/thrashosds-health.yaml delete mode 120000 ceph/qa/suites/upgrade/luminous-p2p/supported create mode 100755 ceph/qa/workunits/mon/test_config_key_caps.sh create mode 100644 ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/strategies.py create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type-dmcrypt/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type-dmcrypt/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test_zap.yml create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test_zap.yml create mode 120000 ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test_zap.yml create mode 100644 ceph/src/common/CondVar.h create mode 100644 ceph/src/test/cli/crushtool/crush-classes/a create mode 100644 ceph/src/test/cli/crushtool/crush-classes/b create mode 100644 ceph/src/test/cli/crushtool/crush-classes/beesly create mode 100644 ceph/src/test/cli/crushtool/crush-classes/c create mode 100644 ceph/src/test/cli/crushtool/crush-classes/d create mode 100644 ceph/src/test/cli/crushtool/crush-classes/e create mode 100644 ceph/src/test/cli/crushtool/crush-classes/f create mode 100644 ceph/src/test/cli/crushtool/crush-classes/flax create mode 100644 ceph/src/test/cli/crushtool/crush-classes/g create mode 100644 ceph/src/test/cli/crushtool/crush-classes/gabe create mode 100644 ceph/src/test/cli/crushtool/crush-classes/gabe2 mode change 100755 => 100644 ceph/src/test/cli/crushtool/help.t create mode 100644 ceph/src/test/cli/crushtool/reclassify.t diff --git a/Makefile b/Makefile index 8bb42e96f..505198c63 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -RELEASE=5.2 +RELEASE=5.3 PACKAGE=ceph -VER=12.2.10 +VER=12.2.11 DEBREL=pve1 SRCDIR=ceph diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt index 35c193936..5403de8f4 100644 --- a/ceph/CMakeLists.txt +++ b/ceph/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.11) project(ceph) -set(VERSION 12.2.10) +set(VERSION 12.2.11) if(POLICY CMP0046) # Tweak policies (this one disables "missing" dependency warning) @@ -367,6 +367,8 @@ endif() option(WITH_RADOSGW "Rados Gateway is enabled" ON) option(WITH_RADOSGW_FCGI_FRONTEND "Rados Gateway's FCGI frontend is enabled" OFF) option(WITH_RADOSGW_BEAST_FRONTEND "Rados Gateway's Beast frontend is enabled" ON) +option(WITH_RADOSGW_BEAST_OPENSSL "Rados Gateway's Beast frontend uses OpenSSL" ON) + if(WITH_RADOSGW) find_package(EXPAT REQUIRED) if(WITH_RADOSGW_FCGI_FRONTEND) @@ -376,14 +378,7 @@ if(WITH_RADOSGW) message(WARNING "disabling WITH_RADOSGW_BEAST_FRONTEND, which depends on WITH_BOOST_CONTEXT") set(WITH_RADOSGW_BEAST_FRONTEND OFF) endif() -endif(WITH_RADOSGW) - -if (WITH_RADOSGW) - if (NOT DEFINED OPENSSL_FOUND) - message(STATUS "Looking for openssl anyways, because radosgw selected") - find_package(OpenSSL) - endif() # https://curl.haxx.se/docs/install.html mentions the # configure flags for various ssl backends execute_process( @@ -396,7 +391,13 @@ if (WITH_RADOSGW) if (CURL_CONFIG_ERRORS) message(WARNING "unable to run curl-config; rgw cannot make ssl requests to external systems reliably") endif() - find_package(OpenSSL) + + if (WITH_RADOSGW_BEAST_FRONTEND AND WITH_RADOSGW_BEAST_OPENSSL) + find_package(OpenSSL REQUIRED) + else() + find_package(OpenSSL) + endif() + if (OPENSSL_FOUND) if (NOT NO_CURL_SSL_LINK) message(STATUS "libcurl is linked with openssl: explicitly setting locks") diff --git a/ceph/PendingReleaseNotes b/ceph/PendingReleaseNotes index 00ee957e0..b75c79fb1 100644 --- a/ceph/PendingReleaseNotes +++ b/ceph/PendingReleaseNotes @@ -1,3 +1,13 @@ +>= 12.2.11 +---------- +* `cephfs-journal-tool` makes rank argument (--rank) mandatory. Rank is + of format `filesystem:rank`, where `filesystem` is the cephfs filesystem + and `rank` is the MDS rank on which the operation is to be executed. To + operate on all ranks, use `all` or `*` as the rank specifier. Note that, + operations that dump journal information to file will now dump to per-rank + suffixed dump files. Importing journal information from dump files is + disallowed if operation is targetted for all ranks. + >= 12.1.2 --------- * When running 'df' on a CephFS filesystem comprising exactly one data pool, @@ -122,3 +132,26 @@ a clean upgrade path is added to the pg log hard limit patches. See also: http://tracker.ceph.com/issues/36686 + +12.2.11 +------- + +* The default memory utilization for the mons has been increased + somewhat. Rocksdb now uses 512 MB of RAM by default, which should + be sufficient for small to medium-sized clusters; large clusters + should tune this up. Also, the ``mon_osd_cache_size`` has been + increase from 10 OSDMaps to 500, which will translate to an + additional 500 MB to 1 GB of RAM for large clusters, and much less + for small clusters. + +* New CephFS file system attributes session_timeout and session_autoclose + are configurable via `ceph fs set`. The MDS config options + mds_session_timeout, mds_session_autoclose, and mds_max_file_size are now + obsolete. + +* This release fixes the pg log hard limit bug(https://tracker.ceph.com/issues/23979). + A flag called pglog_hardlimit has been introduced. It is off by default. + This flag enables the feature that limits the length of the pg log. Users should run + 'ceph osd set pglog_hardlimit' after completely upgrading to 12.2.11. Once all the OSDs + have this flag set, the length of the pg log will be capped by a hard limit. We do not + recommend unsetting this flag beyond this point. diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD index 26f824c7a..220346e45 100644 --- a/ceph/alpine/APKBUILD +++ b/ceph/alpine/APKBUILD @@ -1,7 +1,7 @@ # Contributor: John Coyle # Maintainer: John Coyle pkgname=ceph -pkgver=12.2.10 +pkgver=12.2.11 pkgrel=0 pkgdesc="Ceph is a distributed object store and file system" pkgusers="ceph" @@ -63,7 +63,7 @@ makedepends=" xmlstarlet yasm " -source="ceph-12.2.10.tar.bz2" +source="ceph-12.2.11.tar.bz2" subpackages=" $pkgname-base $pkgname-common @@ -116,7 +116,7 @@ _sysconfdir=/etc _udevrulesdir=/etc/udev/rules.d _python_sitelib=/usr/lib/python2.7/site-packages -builddir=$srcdir/ceph-12.2.10 +builddir=$srcdir/ceph-12.2.11 build() { export CEPH_BUILD_VIRTUALENV=$builddir diff --git a/ceph/ceph.spec b/ceph/ceph.spec index 94d44b690..d10206738 100644 --- a/ceph/ceph.spec +++ b/ceph/ceph.spec @@ -61,7 +61,7 @@ # main package definition ################################################################################# Name: ceph -Version: 12.2.10 +Version: 12.2.11 Release: 0%{?dist} %if 0%{?fedora} || 0%{?rhel} Epoch: 2 @@ -77,7 +77,7 @@ License: LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and Group: System/Filesystems %endif URL: http://ceph.com/ -Source0: http://ceph.com/download/ceph-12.2.10.tar.bz2 +Source0: http://ceph.com/download/ceph-12.2.11.tar.bz2 %if 0%{?suse_version} %if 0%{?is_opensuse} ExclusiveArch: x86_64 aarch64 ppc64 ppc64le @@ -788,7 +788,7 @@ python-rbd, python-rgw or python-cephfs instead. # common ################################################################################# %prep -%autosetup -p1 -n ceph-12.2.10 +%autosetup -p1 -n ceph-12.2.11 %build %if 0%{with cephfs_java} @@ -806,6 +806,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'` export CPPFLAGS="$java_inc" export CFLAGS="$RPM_OPT_FLAGS" export CXXFLAGS="$RPM_OPT_FLAGS" +export LDFLAGS="$RPM_LD_FLAGS" env | sort diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in index d708aea33..fa34ade2d 100644 --- a/ceph/ceph.spec.in +++ b/ceph/ceph.spec.in @@ -806,6 +806,7 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'` export CPPFLAGS="$java_inc" export CFLAGS="$RPM_OPT_FLAGS" export CXXFLAGS="$RPM_OPT_FLAGS" +export LDFLAGS="$RPM_LD_FLAGS" env | sort diff --git a/ceph/debian/changelog b/ceph/debian/changelog index 2b61ec882..eaed6bfbb 100644 --- a/ceph/debian/changelog +++ b/ceph/debian/changelog @@ -1,3 +1,9 @@ +ceph (12.2.11-1) stable; urgency=medium + + * New upstream release + + -- Ceph Release Team Wed, 30 Jan 2019 15:51:24 +0000 + ceph (12.2.10-1) stable; urgency=medium * New upstream release diff --git a/ceph/debian/control b/ceph/debian/control index 6d01e3115..65c29ed0d 100644 --- a/ceph/debian/control +++ b/ceph/debian/control @@ -402,11 +402,13 @@ Replaces: ceph (<< 10), ceph-test (<< 9.0.3-1646), librbd1 (<< 0.92-1238), python-ceph (<< 0.92-1223), + radosgw (<< 12.0.3) Breaks: ceph (<< 10), ceph-fs-common (<< 11.0), ceph-test (<< 9.0.3-1646), librbd1 (<< 0.92-1238), python-ceph (<< 0.92-1223), + radosgw (<< 12.0.3) Suggests: ceph-base (= ${binary:Version}), ceph-mds (= ${binary:Version}), Description: common utilities to mount and interact with a ceph storage cluster diff --git a/ceph/qa/suites/upgrade/luminous-p2p/% b/ceph/doc/README.md similarity index 100% rename from ceph/qa/suites/upgrade/luminous-p2p/% rename to ceph/doc/README.md diff --git a/ceph/doc/_ext/edit_on_github.py b/ceph/doc/_ext/edit_on_github.py new file mode 100644 index 000000000..290f4b424 --- /dev/null +++ b/ceph/doc/_ext/edit_on_github.py @@ -0,0 +1,43 @@ +""" +Adapted from https://gist.github.com/mgedmin/6052926 + +Sphinx extension to add ReadTheDocs-style "Edit on GitHub" links to the +sidebar. + +Loosely based on https://github.com/astropy/astropy/pull/347 +""" + +import os +import warnings + + +__licence__ = 'BSD (3 clause)' + + +def get_github_url(app, view, path): + return 'https://github.com/{project}/{view}/{branch}/doc/{path}'.format( + project=app.config.edit_on_github_project, + view=view, + branch=app.config.edit_on_github_branch, + path=path) + + +def html_page_context(app, pagename, templatename, context, doctree): + if templatename != 'page.html': + return + + if not app.config.edit_on_github_project: + warnings.warn("edit_on_github_project not specified") + return + + path = os.path.relpath(doctree.get('source'), app.builder.srcdir) + show_url = get_github_url(app, 'blob', path) + edit_url = get_github_url(app, 'edit', path) + + context['show_on_github_url'] = show_url + context['edit_on_github_url'] = edit_url + +def setup(app): + app.add_config_value('edit_on_github_project', '', True) + app.add_config_value('edit_on_github_branch', 'master', True) + app.connect('html-page-context', html_page_context) diff --git a/ceph/doc/_static/js/ceph.js b/ceph/doc/_static/js/ceph.js new file mode 100644 index 000000000..61f95fb6a --- /dev/null +++ b/ceph/doc/_static/js/ceph.js @@ -0,0 +1,41 @@ +$(function() { + var releases_url = "http://docs.ceph.com/docs/master/releases.json"; + + function show_edit(branch, data) { + if (branch) { + if (branch === "master") { + $("#dev-warning").show(); + return true; + } + if (data && data.releases && branch in data.releases) { + var eol = ("actual_eol" in data.releases[branch]); + if (eol) { + $("#eol-warning").show(); + } + return !eol; + } + } + $("#dev-warning").show(); + return false; + } + + function get_branch() { + var url = window.location.href; + var res = url.match(/docs.ceph.com\/docs\/([a-z]+)\/?/i) + if (res) { + return res[1] + } + return null; + } + + $.getJSON(releases_url, function(data) { + var branch = get_branch(); + if (show_edit(branch, data)) { + // patch the edit-on-github URL for correct branch + var url = $("#edit-on-github").attr("href"); + url = url.replace("master", branch); + $("#edit-on-github").attr("href", url); + $("#docubetter").show(); + } + }); +}); diff --git a/ceph/doc/_templates/page.html b/ceph/doc/_templates/page.html new file mode 100644 index 000000000..914a752fa --- /dev/null +++ b/ceph/doc/_templates/page.html @@ -0,0 +1,21 @@ +{% extends "!page.html" %} +{% block body %} + + + + + +{%- if edit_on_github_url %} + +{%- endif %} + + {{ super() }} +{% endblock %} diff --git a/ceph/doc/ceph-volume/lvm/zap.rst b/ceph/doc/ceph-volume/lvm/zap.rst index 2236ad4ef..367d74693 100644 --- a/ceph/doc/ceph-volume/lvm/zap.rst +++ b/ceph/doc/ceph-volume/lvm/zap.rst @@ -15,18 +15,51 @@ on the given lv or partition will be removed and all data will be purged. Zapping a logical volume:: - ceph-volume lvm zap {vg name/lv name} + ceph-volume lvm zap {vg name/lv name} Zapping a partition:: - ceph-volume lvm zap /dev/sdc1 + ceph-volume lvm zap /dev/sdc1 -If you are zapping a raw device or partition and would like any vgs or lvs created -from that device removed use the ``--destroy`` flag. A common use case is to simply -deploy OSDs using a whole raw device. If you do so and then wish to reuse that device for -another OSD you must use the ``--destroy`` flag when zapping so that the vgs and lvs that -ceph-volume created on the raw device will be removed. +Removing Devices +---------------- +When zapping, and looking for full removal of the device (lv, vg, or partition) +use the ``--destroy`` flag. A common use case is to simply deploy OSDs using +a whole raw device. If you do so and then wish to reuse that device for another +OSD you must use the ``--destroy`` flag when zapping so that the vgs and lvs +that ceph-volume created on the raw device will be removed. + +.. note:: Multiple devices can be accepted at once, to zap them all Zapping a raw device and destroying any vgs or lvs present:: - ceph-volume lvm zap /dev/sdc --destroy + ceph-volume lvm zap /dev/sdc --destroy + + +This action can be performed on partitions, and logical volumes as well:: + + ceph-volume lvm zap /dev/sdc1 --destroy + ceph-volume lvm zap osd-vg/data-lv --destroy + + +Finally, multiple devices can be detected if filtering by OSD ID and/or OSD +FSID. Either identifier can be used or both can be used at the same time. This +is useful in situations where multiple devices associated with a specific ID +need to be purged. When using the FSID, the filtering is stricter, and might +not match other (possibly invalid) devices associated to an ID. + +By ID only:: + + ceph-volume lvm zap --destroy --osd-id 1 + +By FSID:: + + ceph-volume lvm zap --destroy --osd-fsid 2E8FBE58-0328-4E3B-BFB7-3CACE4E9A6CE + +By both:: + + ceph-volume lvm zap --destroy --osd-fsid 2E8FBE58-0328-4E3B-BFB7-3CACE4E9A6CE --osd-id 1 + + +.. warning:: If the systemd unit associated with the OSD ID to be zapped is + detected as running, the tool will refuse to zap until the daemon is stopped. diff --git a/ceph/doc/cephfs/dirfrags.rst b/ceph/doc/cephfs/dirfrags.rst index 717553fea..24b05edfc 100644 --- a/ceph/doc/cephfs/dirfrags.rst +++ b/ceph/doc/cephfs/dirfrags.rst @@ -25,10 +25,9 @@ fragments may be *merged* to reduce the number of fragments in the directory. Splitting and merging ===================== -An MDS will only consider doing splits and merges if the ``mds_bal_frag`` -setting is true in the MDS's configuration file, and the allow_dirfrags -setting is true in the filesystem map (set on the mons). These settings -are both true by default since the *Luminous* (12.2.x) release of Ceph. +An MDS will only consider doing splits if the allow_dirfrags setting is true in +the file system map (set on the mons). This setting is true by default since +the *Luminous* release (12.2.X). When an MDS identifies a directory fragment to be split, it does not do the split immediately. Because splitting interrupts metadata IO, diff --git a/ceph/doc/cephfs/eviction.rst b/ceph/doc/cephfs/eviction.rst index 8f0f20b84..e803da179 100644 --- a/ceph/doc/cephfs/eviction.rst +++ b/ceph/doc/cephfs/eviction.rst @@ -23,9 +23,9 @@ Automatic client eviction There are three situations in which a client may be evicted automatically: -On an active MDS daemon, if a client has not communicated with the MDS for -over ``mds_session_autoclose`` seconds (300 seconds by default), then it -will be evicted automatically. +On an active MDS daemon, if a client has not communicated with the MDS for over +``session_autoclose`` (a file system variable) seconds (300 seconds by +default), then it will be evicted automatically. On an active MDS daemon, if a client has not responded to cap revoke messages for over ``mds_cap_revoke_eviction_timeout`` (configuration option) seconds. diff --git a/ceph/doc/cephfs/fuse.rst b/ceph/doc/cephfs/fuse.rst index 02a4d485c..251253703 100644 --- a/ceph/doc/cephfs/fuse.rst +++ b/ceph/doc/cephfs/fuse.rst @@ -26,7 +26,7 @@ For additional details on ``cephx`` configuration, see To mount the Ceph file system as a FUSE, you may use the ``ceph-fuse`` command. For example:: - sudo mkdir /home/usernname/cephfs + sudo mkdir /home/username/cephfs sudo ceph-fuse -m 192.168.0.1:6789 /home/username/cephfs If you have more than one filesystem, specify which one to mount using @@ -48,5 +48,5 @@ A persistent mount point can be setup via:: sudo systemctl enable ceph-fuse@/mnt.service .. _ceph-fuse: ../../man/8/ceph-fuse/ -.. _fstab: ./fstab +.. _fstab: ../fstab/#fuse .. _CEPHX Config Reference: ../../rados/configuration/auth-config-ref diff --git a/ceph/doc/cephfs/health-messages.rst b/ceph/doc/cephfs/health-messages.rst index 7b82c2f87..3a6217c7b 100644 --- a/ceph/doc/cephfs/health-messages.rst +++ b/ceph/doc/cephfs/health-messages.rst @@ -67,7 +67,7 @@ are like locks. Sometimes, for example when another client needs access, the MDS will request clients release their capabilities. If the client is unresponsive or buggy, it might fail to do so promptly or fail to do so at all. This message appears if a client has taken longer than -``mds_session_timeout`` (default 60s) to comply. +``session_timeout`` (default 60s) to comply. Message: "Client *name* failing to respond to cache pressure" Code: MDS_HEALTH_CLIENT_RECALL, MDS_HEALTH_CLIENT_RECALL_MANY diff --git a/ceph/doc/cephfs/mds-config-ref.rst b/ceph/doc/cephfs/mds-config-ref.rst index 2fd47ae33..70a97c90f 100644 --- a/ceph/doc/cephfs/mds-config-ref.rst +++ b/ceph/doc/cephfs/mds-config-ref.rst @@ -10,15 +10,6 @@ :Type: Boolean :Default: ``true`` - -``mds max file size`` - -:Description: The maximum allowed file size to set when creating a - new file system. - -:Type: 64-bit Integer Unsigned -:Default: ``1ULL << 40`` - ``mds cache memory limit`` :Description: The memory limit the MDS should enforce for its cache. @@ -102,24 +93,6 @@ :Default: ``24.0*60.0`` -``mds session timeout`` - -:Description: The interval (in seconds) of client inactivity before Ceph - times out capabilities and leases. - -:Type: Float -:Default: ``60`` - - -``mds session autoclose`` - -:Description: The interval (in seconds) before Ceph closes - a laggy client's session. - -:Type: Float -:Default: ``300`` - - ``mds reconnect timeout`` :Description: The interval (in seconds) to wait for clients to reconnect @@ -249,13 +222,6 @@ :Default: ``0`` -``mds bal frag`` - -:Description: Determines whether the MDS will fragment directories. -:Type: Boolean -:Default: ``false`` - - ``mds bal split size`` :Description: The maximum directory size before the MDS will split a directory diff --git a/ceph/doc/conf.py b/ceph/doc/conf.py index ce1e5af97..6bd56ba40 100644 --- a/ceph/doc/conf.py +++ b/ceph/doc/conf.py @@ -33,16 +33,20 @@ html_logo = 'logo.png' html_favicon = 'favicon.ico' html_use_smartypants = True html_show_sphinx = False +html_static_path = ["_static"] html_sidebars = { '**': ['smarttoc.html', 'searchbox.html'], } +sys.path.insert(0, os.path.abspath('_ext')) + extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.graphviz', 'sphinx.ext.todo', 'sphinxcontrib.ditaa', 'breathe', + 'edit_on_github', ] ditaa = 'ditaa' todo_include_todos = True @@ -66,3 +70,13 @@ breathe_domain_by_extension = {'py': 'py', 'c': 'c', 'h': 'c', 'cc': 'cxx', 'hpp pybind = os.path.join(top_level, 'src/pybind') if pybind not in sys.path: sys.path.insert(0, pybind) + +# the docs are rendered with github links pointing to master. the javascript +# snippet in _static/ceph.js rewrites the edit links when a page is loaded, to +# point to the correct branch. +edit_on_github_project = 'ceph/ceph' +edit_on_github_branch = 'master' + +# handles edit-on-github and old version warning display +def setup(app): + app.add_javascript('js/ceph.js') diff --git a/ceph/doc/man/8/ceph-volume.rst b/ceph/doc/man/8/ceph-volume.rst index af5775997..9ad5a5237 100644 --- a/ceph/doc/man/8/ceph-volume.rst +++ b/ceph/doc/man/8/ceph-volume.rst @@ -226,6 +226,17 @@ Usage, for logical partitions:: ceph-volume lvm zap /dev/sdc1 +For full removal of the device use the ``--destroy`` flag (allowed for all +device types):: + + ceph-volume lvm zap --destroy /dev/sdc1 + +Multiple devices can be removed by specifying the OSD ID and/or the OSD FSID:: + + ceph-volume lvm zap --destroy --osd-id 1 + ceph-volume lvm zap --destroy --osd-id 1 --osd-fsid C9605912-8395-4D76-AFC0-7DFDAC315D59 + + Positional arguments: * Either in the form of ``vg/lv`` for logical volumes, diff --git a/ceph/doc/man/8/crushtool.rst b/ceph/doc/man/8/crushtool.rst index 897f62ec4..c5fae504a 100644 --- a/ceph/doc/man/8/crushtool.rst +++ b/ceph/doc/man/8/crushtool.rst @@ -258,6 +258,14 @@ creating a new Ceph cluster. They can be further edited with:: # recompile crushtool -c map.txt -o crushmap +Reclassify +========== + +The *reclassify* function allows users to transition from older maps that +maintain parallel hierarchies for OSDs of different types to a modern CRUSH +map that makes use of the *device class* feature. For more information, +see http://docs.ceph.com/docs/master/rados/operations/crush-map-edits/#migrating-from-a-legacy-ssd-rule-to-device-classes. + Example output from --test ========================== diff --git a/ceph/doc/mgr/balancer.rst b/ceph/doc/mgr/balancer.rst index 191c45593..f3cb86f7d 100644 --- a/ceph/doc/mgr/balancer.rst +++ b/ceph/doc/mgr/balancer.rst @@ -129,6 +129,10 @@ The name is provided by the user and can be any useful identifying string. The ceph balancer show +All plans can be shown with:: + + ceph balancer ls + Old plans can be discarded with:: ceph balancer rm diff --git a/ceph/doc/rados/configuration/bluestore-config-ref.rst b/ceph/doc/rados/configuration/bluestore-config-ref.rst index 542ba151a..d7e70ee92 100644 --- a/ceph/doc/rados/configuration/bluestore-config-ref.rst +++ b/ceph/doc/rados/configuration/bluestore-config-ref.rst @@ -51,8 +51,164 @@ To specify a WAL device and/or DB device, :: ceph-disk prepare --bluestore --block.wal --block-db -Cache size -========== +Provisioning strategies +----------------------- +Although there are multiple ways to deploy a Bluestore OSD (unlike Filestore +which had 1) here are two common use cases that should help clarify the +initial deployment strategy: + +.. _bluestore-single-type-device-config: + +**block (data) only** +^^^^^^^^^^^^^^^^^^^^^ +If all the devices are the same type, for example all are spinning drives, and +there are no fast devices to combine these, it makes sense to just deploy with +block only and not try to separate ``block.db`` or ``block.wal``. The +:ref:`ceph-volume-lvm` call for a single ``/dev/sda`` device would look like:: + + ceph-volume lvm create --bluestore --data /dev/sda + +If logical volumes have already been created for each device (1 LV using 100% +of the device), then the :ref:`ceph-volume-lvm` call for an lv named +``ceph-vg/block-lv`` would look like:: + + ceph-volume lvm create --bluestore --data ceph-vg/block-lv + +.. _bluestore-mixed-device-config: + +**block and block.db** +^^^^^^^^^^^^^^^^^^^^^^ +If there is a mix of fast and slow devices (spinning and solid state), +it is recommended to place ``block.db`` on the faster device while ``block`` +(data) lives on the slower (spinning drive). Sizing for ``block.db`` should be +as large as possible to avoid performance penalties otherwise. The +``ceph-volume`` tool is currently not able to create these automatically, so +the volume groups and logical volumes need to be created manually. + +For the below example, lets assume 4 spinning drives (sda, sdb, sdc, and sdd) +and 1 solid state drive (sdx). First create the volume groups:: + + $ vgcreate ceph-block-0 /dev/sda + $ vgcreate ceph-block-1 /dev/sdb + $ vgcreate ceph-block-2 /dev/sdc + $ vgcreate ceph-block-3 /dev/sdd + +Now create the logical volumes for ``block``:: + + $ lvcreate -l 100%FREE -n block-0 ceph-block-0 + $ lvcreate -l 100%FREE -n block-1 ceph-block-1 + $ lvcreate -l 100%FREE -n block-2 ceph-block-2 + $ lvcreate -l 100%FREE -n block-3 ceph-block-3 + +We are creating 4 OSDs for the four slow spinning devices, so assuming a 200GB +SSD in ``/dev/sdx`` we will create 4 logical volumes, each of 50GB:: + + $ vgcreate ceph-db-0 /dev/sdx + $ lvcreate -L 50GB -n db-0 ceph-db-0 + $ lvcreate -L 50GB -n db-1 ceph-db-0 + $ lvcreate -L 50GB -n db-2 ceph-db-0 + $ lvcreate -L 50GB -n db-3 ceph-db-0 + +Finally, create the 4 OSDs with ``ceph-volume``:: + + $ ceph-volume lvm create --bluestore --data ceph-block-0/block-0 --block.db ceph-db-0/db-0 + $ ceph-volume lvm create --bluestore --data ceph-block-1/block-1 --block.db ceph-db-0/db-1 + $ ceph-volume lvm create --bluestore --data ceph-block-2/block-2 --block.db ceph-db-0/db-2 + $ ceph-volume lvm create --bluestore --data ceph-block-3/block-3 --block.db ceph-db-0/db-3 + +These operations should end up creating 4 OSDs, with ``block`` on the slower +spinning drives and a 50GB logical volume for each coming from the solid state +drive. + +Sizing +====== +When using a :ref:`mixed spinning and solid drive setup +` it is important to make a large-enough +``block.db`` logical volume for Bluestore. Generally, ``block.db`` should have +*as large as possible* logical volumes. + +It is recommended that the ``block.db`` size isn't smaller than 4% of +``block``. For example, if the ``block`` size is 1TB, then ``block.db`` +shouldn't be less than 40GB. + +If *not* using a mix of fast and slow devices, it isn't required to create +separate logical volumes for ``block.db`` (or ``block.wal``). Bluestore will +automatically manage these within the space of ``block``. + + +Automatic Cache Sizing +====================== + +Bluestore can be configured to automatically resize it's caches when tc_malloc +is configured as the memory allocator and the ``bluestore_cache_autotune`` +setting is enabled. This option is currently enabled by default. Bluestore +will attempt to keep OSD heap memory usage under a designated target size via +the ``osd_memory_target`` configuration option. This is a best effort +algorithm and caches will not shrink smaller than the amount specified by +``osd_memory_cache_min``. Cache ratios will be chosen based on a hierarchy +of priorities. If priority information is not availabe, the +``bluestore_cache_meta_ratio`` and ``bluestore_cache_kv_ratio`` options are +used as fallbacks. + +``bluestore_cache_autotune`` + +:Description: Automatically tune the ratios assigned to different bluestore caches while respecting minimum values. +:Type: Boolean +:Requered: Yes +:Default: ``True`` + +``osd_memory_target`` + +:Description: When tcmalloc is available and cache autotuning is enabled, try to keep this many bytes mapped in memory. Note: This may not exactly match the RSS memory usage of the process. While the total amount of heap memory mapped by the process should generally stay close to this target, there is no guarantee that the kernel will actually reclaim memory that has been unmapped. During initial developement, it was found that some kernels result in the OSD's RSS Memory exceeding the mapped memory by up to 20%. It is hypothesised however, that the kernel generally may be more aggressive about reclaiming unmapped memory when there is a high amount of memory pressure. Your mileage may vary. +:Type: Unsigned Integer +:Requered: Yes +:Default: ``4294967296`` + +``bluestore_cache_autotune_chunk_size`` + +:Description: The chunk size in bytes to allocate to caches when cache autotune is enabled. When the autotuner assigns memory to different caches, it will allocate memory in chunks. This is done to avoid evictions when there are minor fluctuations in the heap size or autotuned cache ratios. +:Type: Unsigned Integer +:Requered: No +:Default: ``33554432`` + +``bluestore_cache_autotune_interval`` + +:Description: The number of seconds to wait between rebalances when cache autotune is enabled. This setting changes how quickly the ratios of the difference caches are recomputed. Note: Setting the interval too small can result in high CPU usage and lower performance. +:Type: Float +:Requered: No +:Default: ``5`` + +``osd_memory_base`` + +:Description: When tcmalloc and cache autotuning is enabled, estimate the minimum amount of memory in bytes the OSD will need. This is used to help the autotuner estimate the expected aggregate memory consumption of the caches. +:Type: Unsigned Interger +:Required: No +:Default: ``805306368`` + +``osd_memory_expected_fragmentation`` + +:Description: When tcmalloc and cache autotuning is enabled, estimate the percent of memory fragmentation. This is used to help the autotuner estimate the expected aggregate memory consumption of the caches. +:Type: Float +:Required: No +:Default: ``0.15`` + +``osd_memory_cache_min`` + +:Description: When tcmalloc and cache autotuning is enabled, set the minimum amount of memory used for caches. Note: Setting this value too low can result in significant cache thrashing. +:Type: Unsigned Integer +:Required: No +:Default: ``134217728`` + +``osd_memory_cache_resize_interval`` + +:Description: When tcmalloc and cache autotuning is enabled, wait this many seconds between resizing caches. This setting changes the total amount of memory available for bluestore to use for caching. Note: Setting the interval too small can result in memory allocator thrashing and lower performance. +:Type: Float +:Required: No +:Default: ``1`` + + +Manual Cache Sizing +=================== The amount of memory consumed by each OSD for BlueStore's cache is determined by the ``bluestore_cache_size`` configuration option. If diff --git a/ceph/doc/rados/operations/add-or-rm-mons.rst b/ceph/doc/rados/operations/add-or-rm-mons.rst index 0cdc4313c..20cba1bca 100644 --- a/ceph/doc/rados/operations/add-or-rm-mons.rst +++ b/ceph/doc/rados/operations/add-or-rm-mons.rst @@ -1,3 +1,5 @@ +.. _adding-and-removing-monitors: + ========================== Adding/Removing Monitors ========================== @@ -6,6 +8,8 @@ When you have a cluster up and running, you may add or remove monitors from the cluster at runtime. To bootstrap a monitor, see `Manual Deployment`_ or `Monitor Bootstrap`_. +.. _adding-monitors: + Adding Monitors =============== @@ -121,6 +125,7 @@ on ``mon.a``). ceph-mon -i {mon-id} --public-addr {ip:port} +.. _removing-monitors: Removing Monitors ================= diff --git a/ceph/doc/rados/operations/crush-map-edits.rst b/ceph/doc/rados/operations/crush-map-edits.rst index 36a902083..64d37c714 100644 --- a/ceph/doc/rados/operations/crush-map-edits.rst +++ b/ceph/doc/rados/operations/crush-map-edits.rst @@ -475,144 +475,161 @@ A rule takes the following form:: .. important:: A given CRUSH rule may be assigned to multiple pools, but it is not possible for a single pool to have multiple CRUSH rules. - -Placing Different Pools on Different OSDS: -========================================== - -Suppose you want to have most pools default to OSDs backed by large hard drives, -but have some pools mapped to OSDs backed by fast solid-state drives (SSDs). -It's possible to have multiple independent CRUSH hierarchies within the same -CRUSH map. Define two hierarchies with two different root nodes--one for hard -disks (e.g., "root platter") and one for SSDs (e.g., "root ssd") as shown -below:: - - device 0 osd.0 - device 1 osd.1 - device 2 osd.2 - device 3 osd.3 - device 4 osd.4 - device 5 osd.5 - device 6 osd.6 - device 7 osd.7 - - host ceph-osd-ssd-server-1 { - id -1 - alg straw - hash 0 - item osd.0 weight 1.00 - item osd.1 weight 1.00 - } - - host ceph-osd-ssd-server-2 { - id -2 - alg straw - hash 0 - item osd.2 weight 1.00 - item osd.3 weight 1.00 - } - - host ceph-osd-platter-server-1 { - id -3 - alg straw - hash 0 - item osd.4 weight 1.00 - item osd.5 weight 1.00 - } - - host ceph-osd-platter-server-2 { - id -4 - alg straw - hash 0 - item osd.6 weight 1.00 - item osd.7 weight 1.00 - } - - root platter { - id -5 - alg straw - hash 0 - item ceph-osd-platter-server-1 weight 2.00 - item ceph-osd-platter-server-2 weight 2.00 - } - - root ssd { - id -6 - alg straw - hash 0 - item ceph-osd-ssd-server-1 weight 2.00 - item ceph-osd-ssd-server-2 weight 2.00 - } - - rule data { - ruleset 0 - type replicated - min_size 2 - max_size 2 - step take platter - step chooseleaf firstn 0 type host - step emit - } - - rule metadata { - ruleset 1 - type replicated - min_size 0 - max_size 10 - step take platter - step chooseleaf firstn 0 type host - step emit - } - - rule rbd { - ruleset 2 - type replicated - min_size 0 - max_size 10 - step take platter - step chooseleaf firstn 0 type host - step emit - } - - rule platter { - ruleset 3 - type replicated - min_size 0 - max_size 10 - step take platter - step chooseleaf firstn 0 type host - step emit - } - - rule ssd { - ruleset 4 - type replicated - min_size 0 - max_size 4 - step take ssd - step chooseleaf firstn 0 type host - step emit - } - - rule ssd-primary { - ruleset 5 - type replicated - min_size 5 - max_size 10 - step take ssd - step chooseleaf firstn 1 type host - step emit - step take platter - step chooseleaf firstn -1 type host - step emit - } - -You can then set a pool to use the SSD rule by:: - - ceph osd pool set crush_ruleset 4 - -Similarly, using the ``ssd-primary`` rule will cause each placement group in the -pool to be placed with an SSD as the primary and platters as the replicas. - +.. _crush-reclassify: + +Migrating from a legacy SSD rule to device classes +-------------------------------------------------- + +It used to be necessary to manually edit your CRUSH map and maintain a +parallel hierarchy for each specialized device type (e.g., SSD) in order to +write rules that apply to those devices. Since the Luminous release, +the *device class* feature has enabled this transparently. + +However, migrating from an existing, manually customized per-device map to +the new device class rules in the trivial way will cause all data in the +system to be reshuffled. + +The ``crushtool`` has a few commands that can transform a legacy rule +and hierarchy so that you can start using the new class-based rules. +There are three types of transformations possible: + +#. ``--reclassify-root `` + + This will take everything in the hierarchy beneath root-name and + adjust any rules that reference that root via a ``take + `` to instead ``take class ``. + It renumbers the buckets in such a way that the old IDs are instead + used for the specified class's "shadow tree" so that no data + movement takes place. + + For example, imagine you have an existing rule like:: + + rule replicated_ruleset { + id 0 + type replicated + min_size 1 + max_size 10 + step take default + step chooseleaf firstn 0 type rack + step emit + } + + If you reclassify the root `default` as class `hdd`, the rule will + become:: + + rule replicated_ruleset { + id 0 + type replicated + min_size 1 + max_size 10 + step take default class hdd + step chooseleaf firstn 0 type rack + step emit + } + +#. ``--set-subtree-class `` + + This will mark every device in the subtree rooted at *bucket-name* + with the specified device class. + + This is normally used in conjunction with the ``--reclassify-root`` + option to ensure that all devices in that root are labeled with the + correct class. In some situations, however, some of those devices + (correctly) have a different class and we do not want to relabel + them. In such cases, one can exclude the ``--set-subtree-class`` + option. This means that the remapping process will not be perfect, + since the previous rule distributed across devices of multiple + classes but the adjusted rules will only map to devices of the + specified *device-class*, but that often is an accepted level of + data movement when the nubmer of outlier devices is small. + +#. ``--reclassify-bucket `` + + This will allow you to merge a parallel type-specific hiearchy with the normal hierarchy. For example, many users have maps like:: + + host node1 { + id -2 # do not change unnecessarily + # weight 109.152 + alg straw + hash 0 # rjenkins1 + item osd.0 weight 9.096 + item osd.1 weight 9.096 + item osd.2 weight 9.096 + item osd.3 weight 9.096 + item osd.4 weight 9.096 + item osd.5 weight 9.096 + ... + } + + host node1-ssd { + id -10 # do not change unnecessarily + # weight 2.000 + alg straw + hash 0 # rjenkins1 + item osd.80 weight 2.000 + ... + } + + root default { + id -1 # do not change unnecessarily + alg straw + hash 0 # rjenkins1 + item node1 weight 110.967 + ... + } + + root ssd { + id -18 # do not change unnecessarily + # weight 16.000 + alg straw + hash 0 # rjenkins1 + item node1-ssd weight 2.000 + ... + } + + This function will reclassify each bucket that matches a + pattern. The pattern can look like ``%suffix`` or ``prefix%``. + For example, in the above example, we would use the pattern + ``%-ssd``. For each matched bucket, the remaining portion of the + name (that matches the ``%`` wildcard) specifies the *base bucket*. + All devices in the matched bucket are labeled with the specified + device class and then moved to the base bucket. If the base bucket + does not exist (e.g., ``node12-ssd`` exists but ``node12`` does + not), then it is created and linked underneath the specified + *default parent* bucket. In each case, we are careful to preserve + the old bucket IDs for the new shadow buckets to prevent data + movement. Any rules with ``take`` steps referencing the old + buckets are adjusted. + +#. ``--reclassify-bucket `` + + The same command can also be used without a wildcard to map a + single bucket. For example, in the previous example, we want the + ``ssd`` bucket to be mapped to the ``default`` bucket. + +The final command to convert the map comprised of the above fragments would be something like:: + + $ ceph osd getcrushmap -o original + $ crushtool -i original --reclassify \ + --set-subtree-class default hdd \ + --reclassify-root default hdd \ + --reclassify-bucket %-ssd ssd default \ + --reclassify-bucket ssd ssd default \ + -o adjusted + +In order to ensure that the conversion is correct, there is a ``--compare`` command that will test a large sample of inputs to the CRUSH map and ensure that the same result comes back out. These inputs are controlled by the same options that apply to the ``--test`` command. For the above example,:: + + $ crushtool -i original --compare adjusted + rule 0 had 0/10240 mismatched mappings (0) + rule 1 had 0/10240 mismatched mappings (0) + maps appear equivalent + +If there were difference, you'd see what ratio of inputs are remapped +in the parentheses. + +If you are satisfied with the adjusted map, you can apply it to the cluster with something like:: + + ceph osd setcrushmap -i adjusted Tuning CRUSH, the hard way -------------------------- diff --git a/ceph/doc/rados/operations/crush-map.rst b/ceph/doc/rados/operations/crush-map.rst index 05fa4ff69..e9d667344 100644 --- a/ceph/doc/rados/operations/crush-map.rst +++ b/ceph/doc/rados/operations/crush-map.rst @@ -243,6 +243,11 @@ with:: ceph osd crush tree --show-shadow +For older clusters created before Luminous that relied on manually +crafted CRUSH maps to maintain per-device-type hierarchies, there is a +*reclassify* tool available to help transition to device classes +without triggering data movement (see :ref:`crush-reclassify`). + Weights sets ------------ diff --git a/ceph/doc/rados/operations/user-management.rst b/ceph/doc/rados/operations/user-management.rst index 8a35a501a..8c0874107 100644 --- a/ceph/doc/rados/operations/user-management.rst +++ b/ceph/doc/rados/operations/user-management.rst @@ -387,12 +387,6 @@ For example:: ceph auth caps client.paul mon 'allow rw' osd 'allow rwx pool=liverpool' ceph auth caps client.brian-manager mon 'allow *' osd 'allow *' -To remove a capability, you may reset the capability. If you want the user -to have no access to a particular daemon that was previously set, specify -an empty string. For example:: - - ceph auth caps client.ringo mon ' ' osd ' ' - See `Authorization (Capabilities)`_ for additional details on capabilities. diff --git a/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst b/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst index 89fb94c32..642b2e07b 100644 --- a/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst +++ b/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst @@ -402,8 +402,8 @@ or:: Recovery using healthy monitor(s) --------------------------------- -If there is any survivers, we can always `replace`_ the corrupted one with a -new one. And after booting up, the new joiner will sync up with a healthy +If there are any survivors, we can always :ref:`replace ` the corrupted one with a +new one. After booting up, the new joiner will sync up with a healthy peer, and once it is fully sync'ed, it will be able to serve the clients. Recovery using OSDs @@ -563,5 +563,4 @@ Finally, you should reach out to us on the mailing lists, on IRC or file a new issue on the `tracker`_. .. _cluster map: ../../architecture#cluster-map -.. _replace: ../operation/add-or-rm-mons .. _tracker: http://tracker.ceph.com/projects/ceph/issues/new diff --git a/ceph/doc/radosgw/adminops.rst b/ceph/doc/radosgw/adminops.rst index 5da13a8b9..16efd5f84 100644 --- a/ceph/doc/radosgw/adminops.rst +++ b/ceph/doc/radosgw/adminops.rst @@ -1858,8 +1858,9 @@ Valid parameters for quotas include: the maximum number of objects. A negative value disables this setting. - **Maximum Size:** The ``max-size`` option allows you to specify a quota - for the maximum number of bytes. A negative value disables this setting. - + for the maximum number of bytes. The ``max-size-kb`` option allows you + to specify it in KiB. A negative value disables this setting. + - **Quota Type:** The ``quota-type`` option sets the scope for the quota. The options are ``bucket`` and ``user``. diff --git a/ceph/doc/radosgw/config-ref.rst b/ceph/doc/radosgw/config-ref.rst index 45054a9ec..d86baf126 100644 --- a/ceph/doc/radosgw/config-ref.rst +++ b/ceph/doc/radosgw/config-ref.rst @@ -576,6 +576,17 @@ Swift Settings :Default: ``false`` +``rgw trust forwarded https`` + +:Description: When a proxy in front of radosgw is used for ssl termination, radosgw + does not know whether incoming http connections are secure. Enable + this option to trust the ``Forwarded`` and ``X-Forwarded-Proto`` headers + sent by the proxy when determining whether the connection is secure. + This is required for some features, such as server side encryption. +:Type: Boolean +:Default: ``false`` + + Logging Settings ================ diff --git a/ceph/doc/radosgw/encryption.rst b/ceph/doc/radosgw/encryption.rst index a7bb7e2e9..ea89e502a 100644 --- a/ceph/doc/radosgw/encryption.rst +++ b/ceph/doc/radosgw/encryption.rst @@ -9,6 +9,11 @@ with 3 options for the management of encryption keys. Server-side encryption means that the data is sent over HTTP in its unencrypted form, and the Ceph Object Gateway stores that data in the Ceph Storage Cluster in encrypted form. +.. note:: Requests for server-side encryption must be sent over a secure HTTPS + connection to avoid sending secrets in plaintext. If a proxy is used + for SSL termination, ``rgw trust forwarded https`` must be enabled + before forwarded requests will be trusted as secure. + Customer-Provided Keys ====================== diff --git a/ceph/doc/radosgw/frontends.rst b/ceph/doc/radosgw/frontends.rst index ff6323ee4..7c0b2cced 100644 --- a/ceph/doc/radosgw/frontends.rst +++ b/ceph/doc/radosgw/frontends.rst @@ -18,7 +18,7 @@ and the Boost.Asio library for asynchronous network i/o. Options ------- -``port`` +``port`` and ``ssl_port`` :Description: Sets the listening port number. Can be specified multiple times as in ``port=80 port=8000``. @@ -27,18 +27,37 @@ Options :Default: ``80`` -``endpoint`` +``endpoint`` and ``ssl_endpoint`` :Description: Sets the listening address in the form ``address[:port]``, where the address is an IPv4 address string in dotted decimal - form, or an IPv6 address in hexadecimal notation. The - optional port defaults to 80. Can be specified multiple times - as in ``endpoint=::1 endpoint=192.168.0.100:8000``. + form, or an IPv6 address in hexadecimal notation surrounded + by square brackets. The optional port defaults to 80 for + ``endpoint`` and 443 for ``ssl_endpoint``. Can be specified + multiple times as in ``endpoint=[::1] endpoint=192.168.0.100:8000``. :Type: Integer :Default: None +``ssl_certificate`` + +:Description: Path to the SSL certificate file used for SSL-enabled endpoints. + +:Type: String +:Default: None + + +``ssl_private_key`` + +:Description: Optional path to the private key file used for SSL-enabled + endpoints. If one is not given, the ``ssl_certificate`` file + is used as the private key. + +:Type: String +:Default: None + + Civetweb ======== diff --git a/ceph/doc/start/hardware-recommendations.rst b/ceph/doc/start/hardware-recommendations.rst index eac5dc8c9..2ad982e39 100644 --- a/ceph/doc/start/hardware-recommendations.rst +++ b/ceph/doc/start/hardware-recommendations.rst @@ -39,11 +39,29 @@ separate hosts. RAM === -Metadata servers and monitors must be capable of serving their data quickly, so -they should have plenty of RAM (e.g., 1GB of RAM per daemon instance). OSDs do -not require as much RAM for regular operations (e.g., 500MB of RAM per daemon -instance); however, during recovery they need significantly more RAM (e.g., ~1GB -per 1TB of storage per daemon). Generally, more RAM is better. +Generally, more RAM is better. + +Monitors and managers (ceph-mon and ceph-mgr) +--------------------------------------------- + +Monitor and manager daemon memory usage generally scales with the size of the +cluster. For small clusters, 1-2 GB is generally sufficient. For +large clusters, you should provide more (5-10 GB). You may also want +to consider tuning settings like ``mon_osd_cache_size`` or +``rocksdb_cache_size``. + +Metadata servers (ceph-mds) +--------------------------- + +The metadata daemon memory utilization depends on how much memory its cache is +configured to consume. We recommend 1 GB as a minimum for most systems. See +``mds_cache_memory``. + +OSDs (ceph-osd) +--------------- + +By default, OSDs that use the BlueStore backend require 3-5 GB of RAM. You can +adjust the amount of memory the OSD consumes with the ``osd_memory_target`` configuration option when BlueStore is in use. When using the legacy FileStore backend, the operating system page cache is used for caching data, so no tuning is normally needed, and the OSD memory consumption is generally related to the number of PGs per daemon in the system. Data Storage diff --git a/ceph/doc/start/quick-ceph-deploy.rst b/ceph/doc/start/quick-ceph-deploy.rst index 50b7f307f..dcb01e7a0 100644 --- a/ceph/doc/start/quick-ceph-deploy.rst +++ b/ceph/doc/start/quick-ceph-deploy.rst @@ -124,7 +124,7 @@ configuration details, perform the following steps using ``ceph-deploy``. ceph-deploy mgr create node1 *Required only for luminous+ builds, i.e >= 12.x builds* #. Add three OSDs. For the purposes of these instructions, we assume you have an - unused disk in each node called ``/dev/vdb``. *Be sure that the device is not currently in use and does not contain any important data.* + unused disk in each node called ``/dev/vdb``. *Be sure that the device is not currently in use and does not contain any important data.* :: ceph-deploy osd create {ceph-node}:{device} diff --git a/ceph/examples/librados/Makefile b/ceph/examples/librados/Makefile index 2b6109c4c..e51c045a6 100644 --- a/ceph/examples/librados/Makefile +++ b/ceph/examples/librados/Makefile @@ -3,13 +3,13 @@ CXX?=g++ CXX_FLAGS?=-std=c++11 -Wall -Wextra -Werror -g CXX_LIBS?=-lrados -lradosstriper CXX_INC?=$(LOCAL_LIBRADOS_INC) -CXX_CC=$(CXX) $(CXX_FLAGS) $(CXX_INC) $(LOCAL_LIBRADOS) $(CXX_LIBS) +CXX_CC=$(CXX) $(CXX_FLAGS) $(CXX_INC) $(LOCAL_LIBRADOS) CC?=gcc CC_FLAGS=-Wall -Wextra -Werror -g CC_INC=$(LOCAL_LIBRADOS_INC) CC_LIBS?=-lrados -CC_CC=$(CC) $(CC_FLAGS) $(CC_INC) $(LOCAL_LIBRADOS) $(CC_LIBS) +CC_CC=$(CC) $(CC_FLAGS) $(CC_INC) $(LOCAL_LIBRADOS) # Relative path to the Ceph source: CEPH_SRC_HOME?=../../src @@ -26,13 +26,13 @@ all-system: LOCAL_LIBRADOS_INC= all-system: all hello_world_cpp: hello_world.cc - $(CXX_CC) -o hello_world_cpp hello_world.cc + $(CXX_CC) -o hello_world_cpp hello_world.cc $(CXX_LIBS) hello_radosstriper_cpp: hello_radosstriper.cc - $(CXX_CC) -o hello_radosstriper_cpp hello_radosstriper.cc + $(CXX_CC) -o hello_radosstriper_cpp hello_radosstriper.cc $(CXX_LIBS) hello_world_c: hello_world_c.c - $(CC_CC) -o hello_world_c hello_world_c.c + $(CC_CC) -o hello_world_c hello_world_c.c $(CC_LIBS) clean: rm -f hello_world_cpp hello_radosstriper_cpp hello_world_c diff --git a/ceph/examples/librados/hello_world.readme b/ceph/examples/librados/hello_world.readme index d438f932e..afa1cb32e 100644 --- a/ceph/examples/librados/hello_world.readme +++ b/ceph/examples/librados/hello_world.readme @@ -6,7 +6,7 @@ build tree (ie. using relative paths). If you would like to build the examples a your system librados and headers, use "make all-system". And executed using -./librados_hello_world -c ../../src/ceph.conf +./hello_world_cpp -c ../../src/ceph.conf (or whatever path to a ceph.conf is appropriate to you, or by explicitly specifying monitors, user id, and keys). diff --git a/ceph/install-deps.sh b/ceph/install-deps.sh index 9ead1056d..e73e05f6b 100755 --- a/ceph/install-deps.sh +++ b/ceph/install-deps.sh @@ -90,6 +90,7 @@ if [ x`uname`x = xFreeBSDx ]; then net/socat \ textproc/expat2 \ textproc/gsed \ + lang/gawk \ textproc/libxml2 \ textproc/xmlstarlet \ textproc/jq \ diff --git a/ceph/qa/cephfs/clusters/1-mds-1-client-coloc.yaml b/ceph/qa/cephfs/clusters/1-mds-1-client-coloc.yaml new file mode 100644 index 000000000..abcfffec6 --- /dev/null +++ b/ceph/qa/cephfs/clusters/1-mds-1-client-coloc.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.a-s, osd.4, osd.5, osd.6, osd.7] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/1-mds-1-client.yaml b/ceph/qa/cephfs/clusters/1-mds-1-client.yaml index e64b0b88d..966f0dcc8 100644 --- a/ceph/qa/cephfs/clusters/1-mds-1-client.yaml +++ b/ceph/qa/cephfs/clusters/1-mds-1-client.yaml @@ -5,4 +5,9 @@ roles: openstack: - volumes: # attached to each instance count: 4 - size: 10 # GB + size: 20 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/1-mds-2-client-coloc.yaml b/ceph/qa/cephfs/clusters/1-mds-2-client-coloc.yaml new file mode 100644 index 000000000..9f0f0dc39 --- /dev/null +++ b/ceph/qa/cephfs/clusters/1-mds-2-client-coloc.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.a-s, osd.4, osd.5, osd.6, osd.7, client.1] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/1-mds-2-client.yaml b/ceph/qa/cephfs/clusters/1-mds-2-client.yaml index 006e15a7b..656178c0f 100644 --- a/ceph/qa/cephfs/clusters/1-mds-2-client.yaml +++ b/ceph/qa/cephfs/clusters/1-mds-2-client.yaml @@ -6,4 +6,9 @@ roles: openstack: - volumes: # attached to each instance count: 4 - size: 10 # GB + size: 30 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/1-mds-3-client.yaml b/ceph/qa/cephfs/clusters/1-mds-3-client.yaml new file mode 100644 index 000000000..02e6d6dc6 --- /dev/null +++ b/ceph/qa/cephfs/clusters/1-mds-3-client.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mon.c, mgr.x, mds.a-s, osd.4, osd.5, osd.6, osd.7] +- [client.0] +- [client.1] +- [client.2] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/1-mds-4-client-coloc.yaml b/ceph/qa/cephfs/clusters/1-mds-4-client-coloc.yaml new file mode 100644 index 000000000..6ff916c4e --- /dev/null +++ b/ceph/qa/cephfs/clusters/1-mds-4-client-coloc.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0, client.1] +- [mon.b, mon.c, mgr.x, mds.a-s, osd.4, osd.5, osd.6, osd.7, client.2, client.3] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/1-mds-4-client.yaml b/ceph/qa/cephfs/clusters/1-mds-4-client.yaml index a6be36dea..f17c83b82 100644 --- a/ceph/qa/cephfs/clusters/1-mds-4-client.yaml +++ b/ceph/qa/cephfs/clusters/1-mds-4-client.yaml @@ -8,4 +8,9 @@ roles: openstack: - volumes: # attached to each instance count: 4 - size: 10 # GB + size: 30 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/3-mds.yaml b/ceph/qa/cephfs/clusters/3-mds.yaml index c0d463a90..f9fc10808 100644 --- a/ceph/qa/cephfs/clusters/3-mds.yaml +++ b/ceph/qa/cephfs/clusters/3-mds.yaml @@ -5,4 +5,9 @@ roles: openstack: - volumes: # attached to each instance count: 4 - size: 10 # GB + size: 30 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/9-mds.yaml b/ceph/qa/cephfs/clusters/9-mds.yaml index 0bf240272..414fb2ba4 100644 --- a/ceph/qa/cephfs/clusters/9-mds.yaml +++ b/ceph/qa/cephfs/clusters/9-mds.yaml @@ -5,4 +5,9 @@ roles: openstack: - volumes: # attached to each instance count: 4 - size: 10 # GB + size: 30 # GB +- machine: + disk: 200 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/ceph/qa/cephfs/clusters/fixed-2-ucephfs.yaml b/ceph/qa/cephfs/clusters/fixed-2-ucephfs.yaml index 94948f4c3..129aac6ce 100644 --- a/ceph/qa/cephfs/clusters/fixed-2-ucephfs.yaml +++ b/ceph/qa/cephfs/clusters/fixed-2-ucephfs.yaml @@ -4,7 +4,9 @@ roles: openstack: - volumes: # attached to each instance count: 4 - size: 10 # GB + size: 30 # GB +- machine: + disk: 200 # GB log-rotate: ceph-mds: 10G ceph-osd: 10G diff --git a/ceph/qa/run-standalone.sh b/ceph/qa/run-standalone.sh index 9321cba65..2c7ceaa34 100755 --- a/ceph/qa/run-standalone.sh +++ b/ceph/qa/run-standalone.sh @@ -6,7 +6,8 @@ if [ ! -e Makefile -o ! -d bin ]; then exit 1 fi -if [ ! -d /tmp/ceph-disk-virtualenv -o ! -d /tmp/ceph-detect-init-virtualenv ]; then +TEMP_DIR=${TMPDIR:-/tmp} +if [ ! -d $TEMP_DIR/ceph-disk-virtualenv -o ! -d $TEMP_DIR/ceph-detect-init-virtualenv ]; then echo '/tmp/*-virtualenv directories not built. Please run "make check" first.' exit 1 fi diff --git a/ceph/qa/standalone/ceph-helpers.sh b/ceph/qa/standalone/ceph-helpers.sh index f12f0698a..3883a6f58 100755 --- a/ceph/qa/standalone/ceph-helpers.sh +++ b/ceph/qa/standalone/ceph-helpers.sh @@ -19,7 +19,9 @@ # TIMEOUT=300 PG_NUM=4 -: ${CEPH_BUILD_VIRTUALENV:=/tmp} +TMPDIR=${TMPDIR:-/tmp} +CEPH_BUILD_VIRTUALENV=${TMPDIR} +TESTDIR=${TESTDIR:-${TMPDIR}} if type xmlstarlet > /dev/null 2>&1; then XMLSTARLET=xmlstarlet @@ -32,10 +34,12 @@ fi if [ `uname` = FreeBSD ]; then SED=gsed + AWK=gawk DIFFCOLOPTS="" KERNCORE="kern.corefile" else SED=sed + AWK=awk termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/') if [ -n "$termwidth" -a "$termwidth" != "0" ]; then termwidth="-W ${termwidth}" @@ -202,8 +206,8 @@ function teardown() { function __teardown_btrfs() { local btrfs_base_dir=$1 - local btrfs_root=$(df -P . | tail -1 | awk '{print $NF}') - local btrfs_dirs=$(cd $btrfs_base_dir; sudo btrfs subvolume list . -t | awk '/^[0-9]/ {print $4}' | grep "$btrfs_base_dir/$btrfs_dir") + local btrfs_root=$(df -P . | tail -1 | $AWK '{print $NF}') + local btrfs_dirs=$(cd $btrfs_base_dir; sudo btrfs subvolume list -t . | $AWK '/^[0-9]/ {print $4}' | grep "$btrfs_base_dir/$btrfs_dir") for subvolume in $btrfs_dirs; do sudo btrfs subvolume delete $btrfs_root/$subvolume done @@ -1350,7 +1354,7 @@ function test_is_clean() { ####################################################################### -calc() { awk "BEGIN{print $*}"; } +calc() { $AWK "BEGIN{print $*}"; } ## # Return a list of numbers that are increasingly larger and whose @@ -1757,7 +1761,7 @@ function run_in_background() { local pid_variable=$1 shift # Execute the command and prepend the output with its pid - # We enforce to return the exit status of the command and not the awk one. + # We enforce to return the exit status of the command and not the sed one. ("$@" |& sed 's/^/'$$': /'; return "${PIPESTATUS[0]}") >&2 & eval "$pid_variable+=\" $!\"" } diff --git a/ceph/qa/standalone/scrub/osd-scrub-repair.sh b/ceph/qa/standalone/scrub/osd-scrub-repair.sh index a266aed90..b6d541bb3 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh @@ -5565,6 +5565,67 @@ EOF teardown $dir || return 1 } +function TEST_request_scrub_priority() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local OBJECTS=64 + local PGS=8 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0" + run_osd $dir 0 $ceph_osd_args || return 1 + + create_pool $poolname $PGS $PGS || return 1 + wait_for_clean || return 1 + + local osd=0 + add_something $dir $poolname $objname noscrub || return 1 + local primary=$(get_primary $poolname $objname) + local pg=$(get_pg $poolname $objname) + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + local otherpgs + for i in $(seq 0 $(expr $PGS - 1)) + do + opg="${poolid}.${i}" + if [ "$opg" = "$pg" ]; then + continue + fi + otherpgs="${otherpgs}${opg} " + local other_last_scrub=$(get_last_scrub_stamp $pg) + # Fake a schedule scrub + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) \ + trigger_scrub $opg || return 1 + done + + sleep 15 + flush_pg_stats + + # Request a regular scrub and it will be done + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + ceph osd unset noscrub || return 1 + ceph osd unset nodeep-scrub || return 1 + + wait_for_scrub $pg "$last_scrub" + + for opg in $otherpgs $pg + do + wait_for_scrub $opg "$other_last_scrub" + done + + # Verify that the requested scrub ran first + grep "log_channel.*scrub ok" $dir/osd.${primary}.log | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1 + + return 0 +} + + main osd-scrub-repair "$@" # Local Variables: diff --git a/ceph/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml b/ceph/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml index 5ca4bd609..750fa6dd7 100644 --- a/ceph/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml +++ b/ceph/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml @@ -3,8 +3,8 @@ meta: overrides: ceph_ansible: + branch: stable-3.2 vars: - branch: stable-3.2 ceph_conf_overrides: global: osd default pool size: 2 diff --git a/ceph/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml b/ceph/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml index a1e2ada19..37e315f7e 100644 --- a/ceph/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml +++ b/ceph/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml @@ -6,6 +6,7 @@ overrides: fuse default permissions: false tasks: - workunit: + timeout: 6h clients: all: - suites/pjd.sh diff --git a/ceph/qa/suites/fs/basic_functional/clusters/1-mds-4-client-coloc.yaml b/ceph/qa/suites/fs/basic_functional/clusters/1-mds-4-client-coloc.yaml new file mode 120000 index 000000000..e5444ae22 --- /dev/null +++ b/ceph/qa/suites/fs/basic_functional/clusters/1-mds-4-client-coloc.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-4-client-coloc.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/basic_functional/clusters/4-remote-clients.yaml b/ceph/qa/suites/fs/basic_functional/clusters/4-remote-clients.yaml deleted file mode 100644 index 1c540a4ef..000000000 --- a/ceph/qa/suites/fs/basic_functional/clusters/4-remote-clients.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: -- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, mds.a, mds.b, client.1, client.2, client.3] -- [client.0, osd.4, osd.5, osd.6, osd.7] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -log-rotate: - ceph-mds: 10G - ceph-osd: 10G diff --git a/ceph/qa/suites/fs/basic_functional/tasks/damage.yaml b/ceph/qa/suites/fs/basic_functional/tasks/damage.yaml index 3f4aac9e5..9ae738f01 100644 --- a/ceph/qa/suites/fs/basic_functional/tasks/damage.yaml +++ b/ceph/qa/suites/fs/basic_functional/tasks/damage.yaml @@ -17,6 +17,8 @@ overrides: - Corrupt dentry - Scrub error on inode - Metadata damage detected + - MDS_READ_ONLY + - force file system read-only tasks: - cephfs_test_runner: diff --git a/ceph/qa/suites/fs/basic_workload/tasks/cfuse_workunit_suites_pjd.yaml b/ceph/qa/suites/fs/basic_workload/tasks/cfuse_workunit_suites_pjd.yaml index a1e2ada19..37e315f7e 100644 --- a/ceph/qa/suites/fs/basic_workload/tasks/cfuse_workunit_suites_pjd.yaml +++ b/ceph/qa/suites/fs/basic_workload/tasks/cfuse_workunit_suites_pjd.yaml @@ -6,6 +6,7 @@ overrides: fuse default permissions: false tasks: - workunit: + timeout: 6h clients: all: - suites/pjd.sh diff --git a/ceph/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml b/ceph/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml index 12047bd7a..5cd97a3ae 100644 --- a/ceph/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml +++ b/ceph/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml @@ -4,6 +4,8 @@ openstack: - volumes: # attached to each instance count: 2 size: 10 # GB +- machine: + disk: 100 # GB log-rotate: ceph-mds: 10G ceph-osd: 10G diff --git a/ceph/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml b/ceph/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml new file mode 120000 index 000000000..9f4f161a3 --- /dev/null +++ b/ceph/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-2-client.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml b/ceph/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml new file mode 120000 index 000000000..6b25e07c4 --- /dev/null +++ b/ceph/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-3-client.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/multiclient/clusters/three_clients.yaml b/ceph/qa/suites/fs/multiclient/clusters/three_clients.yaml deleted file mode 100644 index a533af5c6..000000000 --- a/ceph/qa/suites/fs/multiclient/clusters/three_clients.yaml +++ /dev/null @@ -1,15 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1, osd.2, osd.3] -- [client.2] -- [client.1] -- [client.0] - -openstack: -- volumes: # attached to each instance - count: 1 - size: 10 # GB - -log-rotate: - ceph-mds: 10G - ceph-osd: 10G - diff --git a/ceph/qa/suites/fs/multiclient/clusters/two_clients.yaml b/ceph/qa/suites/fs/multiclient/clusters/two_clients.yaml deleted file mode 100644 index 00f3815cb..000000000 --- a/ceph/qa/suites/fs/multiclient/clusters/two_clients.yaml +++ /dev/null @@ -1,14 +0,0 @@ -roles: -- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1, osd.2, osd.3] -- [client.1] -- [client.0] - -openstack: -- volumes: # attached to each instance - count: 3 - size: 10 # GB - -log-rotate: - ceph-mds: 10G - ceph-osd: 10G - diff --git a/ceph/qa/suites/fs/multifs/clusters/2-remote-clients.yaml b/ceph/qa/suites/fs/multifs/clusters/2-remote-clients.yaml deleted file mode 100644 index 2ae772c3f..000000000 --- a/ceph/qa/suites/fs/multifs/clusters/2-remote-clients.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: -- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, mon.b, mds.a, mds.b, client.1] -- [mds.c, mds.d, mon.c, client.0, osd.4, osd.5, osd.6, osd.7] -openstack: -- volumes: # attached to each instance - count: 2 - size: 10 # GB -log-rotate: - ceph-mds: 10G - ceph-osd: 10G diff --git a/ceph/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml b/ceph/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml index 2dd8ac779..09be26675 100644 --- a/ceph/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml +++ b/ceph/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml @@ -7,6 +7,7 @@ overrides: client acl type: posix_acl tasks: - workunit: + timeout: 6h clients: all: - suites/pjd.sh diff --git a/ceph/qa/suites/fs/thrash/clusters/1-mds-1-client-coloc.yaml b/ceph/qa/suites/fs/thrash/clusters/1-mds-1-client-coloc.yaml new file mode 120000 index 000000000..d15ecfda0 --- /dev/null +++ b/ceph/qa/suites/fs/thrash/clusters/1-mds-1-client-coloc.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-1-client-coloc.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml b/ceph/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml deleted file mode 100644 index d02524866..000000000 --- a/ceph/qa/suites/fs/thrash/clusters/mds-1active-1standby.yaml +++ /dev/null @@ -1,10 +0,0 @@ -roles: -- [mon.a, mon.c, osd.0, osd.1, osd.2, mds.b-s-a] -- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5, client.0] -openstack: -- volumes: # attached to each instance - count: 3 - size: 10 # GB -log-rotate: - ceph-mds: 10G - ceph-osd: 10G diff --git a/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml b/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml index adcebc0ba..4dc0086e6 100644 --- a/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml +++ b/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml @@ -3,6 +3,6 @@ overrides: conf: global: ms inject socket failures: 2500 - mds inject delay type: osd mds + ms inject delay type: osd mds ms inject delay probability: .005 ms inject delay max: 1 diff --git a/ceph/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml b/ceph/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml index a1e2ada19..37e315f7e 100644 --- a/ceph/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml +++ b/ceph/qa/suites/fs/thrash/tasks/cfuse_workunit_suites_pjd.yaml @@ -6,6 +6,7 @@ overrides: fuse default permissions: false tasks: - workunit: + timeout: 6h clients: all: - suites/pjd.sh diff --git a/ceph/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml b/ceph/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml index 09abaeb6e..1f24a5506 100644 --- a/ceph/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml +++ b/ceph/qa/suites/kcephfs/cephfs/tasks/kclient_workunit_suites_pjd.yaml @@ -1,6 +1,7 @@ tasks: - kclient: - workunit: + timeout: 6h clients: all: - suites/pjd.sh diff --git a/ceph/qa/suites/kcephfs/recovery/tasks/damage.yaml b/ceph/qa/suites/kcephfs/recovery/tasks/damage.yaml index 3f4aac9e5..9ae738f01 100644 --- a/ceph/qa/suites/kcephfs/recovery/tasks/damage.yaml +++ b/ceph/qa/suites/kcephfs/recovery/tasks/damage.yaml @@ -17,6 +17,8 @@ overrides: - Corrupt dentry - Scrub error on inode - Metadata damage detected + - MDS_READ_ONLY + - force file system read-only tasks: - cephfs_test_runner: diff --git a/ceph/qa/suites/multimds/basic/tasks/cfuse_workunit_suites_pjd.yaml b/ceph/qa/suites/multimds/basic/tasks/cfuse_workunit_suites_pjd.yaml index a1e2ada19..37e315f7e 100644 --- a/ceph/qa/suites/multimds/basic/tasks/cfuse_workunit_suites_pjd.yaml +++ b/ceph/qa/suites/multimds/basic/tasks/cfuse_workunit_suites_pjd.yaml @@ -6,6 +6,7 @@ overrides: fuse default permissions: false tasks: - workunit: + timeout: 6h clients: all: - suites/pjd.sh diff --git a/ceph/qa/suites/rados/singleton/all/mon-config-key-caps.yaml b/ceph/qa/suites/rados/singleton/all/mon-config-key-caps.yaml new file mode 100644 index 000000000..0b0b95c52 --- /dev/null +++ b/ceph/qa/suites/rados/singleton/all/mon-config-key-caps.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + log-whitelist: + - overall HEALTH_ + - \(AUTH_BAD_CAPS\) +- workunit: + clients: + all: + - mon/test_config_key_caps.sh diff --git a/ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/normal_pg_log.yaml b/ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/normal_pg_log.yaml @@ -0,0 +1 @@ + diff --git a/ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/short_pg_log.yaml b/ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/short_pg_log.yaml new file mode 100644 index 000000000..20cc101de --- /dev/null +++ b/ceph/qa/suites/upgrade/jewel-x/parallel/1.1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/normal_pg_log.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/normal_pg_log.yaml @@ -0,0 +1 @@ + diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/short_pg_log.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/short_pg_log.yaml new file mode 100644 index 000000000..20cc101de --- /dev/null +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/1.1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/normal_pg_log.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/normal_pg_log.yaml @@ -0,0 +1 @@ + diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/short_pg_log.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/short_pg_log.yaml new file mode 100644 index 000000000..20cc101de --- /dev/null +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split/1.1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split/2-partial-upgrade/firsthalf.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split/2-partial-upgrade/firsthalf.yaml index 442dcf105..a73b87beb 100644 --- a/ceph/qa/suites/upgrade/jewel-x/stress-split/2-partial-upgrade/firsthalf.yaml +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split/2-partial-upgrade/firsthalf.yaml @@ -10,3 +10,8 @@ tasks: - ceph.restart: daemons: [mon.a,mon.b,mon.c,osd.0, osd.1, osd.2] - print: "**** done ceph.restart 1st half" +- exec: + osd.0: + - ceph osd set pglog_hardlimit && exit 1 || true + - ceph osd dump --format=json-pretty | grep "flags" +- print: "**** try to set pglog_hardlimit, should not succeed" diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split/5-finish-upgrade.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split/5-finish-upgrade.yaml index 1d528cd5d..faea6fdbf 100644 --- a/ceph/qa/suites/upgrade/jewel-x/stress-split/5-finish-upgrade.yaml +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split/5-finish-upgrade.yaml @@ -6,4 +6,14 @@ tasks: daemons: [osd.3, osd.4, osd.5] wait-for-healthy: false wait-for-osds-up: true +- exec: + osd.0: + - ceph osd require-osd-release luminous +- print: "**** done `ceph osd require-osd-release luminous`" +- exec: + osd.0: + - ceph osd dump --format=json-pretty | grep "flags" + - ceph osd set pglog_hardlimit + - ceph osd dump --format=json-pretty | grep "flags" +- print: "**** try to set pglog_hardlimit again, should succeed" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/% b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/% new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/.qa b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/.qa new file mode 120000 index 000000000..a23f7e045 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/.qa @@ -0,0 +1 @@ +../../.qa \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/point-to-point-upgrade.yaml similarity index 85% rename from ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml rename to ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/point-to-point-upgrade.yaml index 9deeb4c49..c0b6ebbd5 100644 --- a/ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/point-to-point-upgrade.yaml @@ -11,6 +11,11 @@ meta: run workload and upgrade-sequence in parallel install ceph/luminous v12.2.8 point version run workload and upgrade-sequence in parallel + install ceph/luminous v12.2.9 point version + run workload and upgrade-sequence in parallel + install ceph/luminous v12.2.10 point version + run workload and upgrade-sequence in parallel + install ceph/luminous latest version run workload and upgrade-sequence in parallel overrides: @@ -119,6 +124,34 @@ tasks: - upgrade-sequence_luminous - print: "**** done parallel luminous v12.2.8" + +#### upgrade to v12.2.9 +- install.upgrade: + #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev'] + mon.a: + tag: v12.2.9 + mon.b: + tag: v12.2.9 + # Note that client.a IS NOT upgraded at this point +- parallel: + - workload_luminous + - upgrade-sequence_luminous +- print: "**** done parallel luminous v12.2.9" + +#### upgrade to v12.2.10 +- install.upgrade: + #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev'] + mon.a: + tag: v12.2.10 + mon.b: + tag: v12.2.10 + # Note that client.a IS NOT upgraded at this point +- parallel: + - workload_luminous + - upgrade-sequence_luminous +- print: "**** done parallel luminous v12.2.10" + + #### upgrade to latest luminous - install.upgrade: #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev'] diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/supported b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/supported new file mode 120000 index 000000000..79010c36a --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-parallel/supported @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/% b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/% new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/+ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/+ new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/upgrade/luminous-p2p/.qa b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/.qa similarity index 100% rename from ceph/qa/suites/upgrade/luminous-p2p/.qa rename to ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/.qa diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/openstack.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/openstack.yaml new file mode 100644 index 000000000..a0d5c2019 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/openstack.yaml @@ -0,0 +1,6 @@ +openstack: + - machine: + disk: 100 # GB + - volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/start.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/start.yaml new file mode 100644 index 000000000..4f40219b5 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/0-cluster/start.yaml @@ -0,0 +1,20 @@ +meta: +- desc: | + Run ceph on two nodes, + with a separate client-only node. + Use xfs beneath the osds. +overrides: + ceph: + fs: xfs +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 +- - client.0 diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1-ceph-install/luminous.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1-ceph-install/luminous.yaml new file mode 100644 index 000000000..b66e0ca99 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1-ceph-install/luminous.yaml @@ -0,0 +1,19 @@ +meta: +- desc: install ceph/luminous latest +tasks: +- install: + tag: v12.2.10 + exclude_packages: ['librados3'] + extra_packages: ['librados2'] +- print: "**** done install luminous v12.2.10" +- ceph: +- exec: + osd.0: + - ceph osd require-osd-release luminous + - ceph osd set-require-min-compat-client luminous +- print: "**** done ceph" +overrides: + ceph: + conf: + mon: + mon warn on osd down out interval zero: false diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/normal_pg_log.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/normal_pg_log.yaml @@ -0,0 +1 @@ + diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/short_pg_log.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/short_pg_log.yaml new file mode 100644 index 000000000..20cc101de --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/1.1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/.qa b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/firsthalf.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/firsthalf.yaml new file mode 100644 index 000000000..a73b87beb --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/2-partial-upgrade/firsthalf.yaml @@ -0,0 +1,17 @@ +meta: +- desc: | + install upgrade ceph/-x on one node only + 1st half + restart : osd.0,1,2 +tasks: +- install.upgrade: + osd.0: +- print: "**** done install.upgrade osd.0" +- ceph.restart: + daemons: [mon.a,mon.b,mon.c,osd.0, osd.1, osd.2] +- print: "**** done ceph.restart 1st half" +- exec: + osd.0: + - ceph osd set pglog_hardlimit && exit 1 || true + - ceph osd dump --format=json-pretty | grep "flags" +- print: "**** try to set pglog_hardlimit, should not succeed" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/.qa b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/default.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/default.yaml new file mode 100644 index 000000000..b3fddefc7 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/3-thrash/default.yaml @@ -0,0 +1,25 @@ +meta: +- desc: | + randomly kill and revive osd + small chance to increase the number of pgs +overrides: + ceph: + log-whitelist: + - but it is still running + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch +tasks: +- parallel: + - stress-tasks +stress-tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + chance_thrash_cluster_full: 0 + chance_thrash_pg_upmap: 0 + chance_thrash_pg_upmap_items: 0 + disable_objectstore_tool_tests: true + chance_force_recovery: 0 +- print: "**** done thrashosds 3-thrash" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/+ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/+ new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/.qa b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/radosbench.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/radosbench.yaml new file mode 100644 index 000000000..626ae8ea6 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/radosbench.yaml @@ -0,0 +1,40 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +stress-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 +- print: "**** done radosbench 7-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-cls.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-cls.yaml new file mode 100644 index 000000000..f8cc4d8ac --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-cls.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + run basic cls tests for rbd +stress-tasks: +- workunit: + branch: luminous + clients: + client.0: + - cls/test_cls_rbd.sh +- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-import-export.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-import-export.yaml new file mode 100644 index 000000000..30a677af6 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +stress-tasks: +- workunit: + branch: luminous + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd_api.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd_api.yaml new file mode 100644 index 000000000..9079aa33b --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/rbd_api.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + librbd C and C++ api tests +stress-tasks: +- workunit: + branch: luminous + clients: + client.0: + - rbd/test_librbd.sh +- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/readwrite.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/readwrite.yaml new file mode 100644 index 000000000..41e34d6d7 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/readwrite.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool, + using only reads, writes, and deletes +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 5-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/snaps-few-objects.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/snaps-few-objects.yaml new file mode 100644 index 000000000..f56d0de0f --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/4-workload/snaps-few-objects.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/5-finish-upgrade.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/5-finish-upgrade.yaml new file mode 100644 index 000000000..9d5a96c6c --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/5-finish-upgrade.yaml @@ -0,0 +1,14 @@ +tasks: +- install.upgrade: + osd.3: + client.0: +- ceph.restart: + daemons: [osd.3, osd.4, osd.5] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + osd.0: + - ceph osd set pglog_hardlimit + - ceph osd dump --format=json-pretty | grep "flags" +- print: "**** try to set pglog_hardlimit again, should succeed" + diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/+ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/+ new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/.qa b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rbd-python.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rbd-python.yaml new file mode 100644 index 000000000..56ba21d7a --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rbd-python.yaml @@ -0,0 +1,9 @@ +meta: +- desc: | + librbd python api tests +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh +- print: "**** done rbd/test_librbd_python.sh 9-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rgw-swift.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rgw-swift.yaml new file mode 100644 index 000000000..76e5d6fc2 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/rgw-swift.yaml @@ -0,0 +1,11 @@ +meta: +- desc: | + swift api tests for rgw +tasks: +- rgw: + client.0: +- print: "**** done rgw 9-workload" +- swift: + client.0: + rgw_server: client.0 +- print: "**** done swift 9-workload" diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/snaps-many-objects.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/snaps-many-objects.yaml new file mode 100644 index 000000000..805bf97c3 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/7-final-workload/snaps-many-objects.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/supported b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/supported new file mode 120000 index 000000000..79010c36a --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/supported @@ -0,0 +1 @@ +../../../../distros/supported \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/thrashosds-health.yaml b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/thrashosds-health.yaml new file mode 120000 index 000000000..e0426dbe4 --- /dev/null +++ b/ceph/qa/suites/upgrade/luminous-p2p/luminous-p2p-stress-split/thrashosds-health.yaml @@ -0,0 +1 @@ +../../../../tasks/thrashosds-health.yaml \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/luminous-p2p/supported b/ceph/qa/suites/upgrade/luminous-p2p/supported deleted file mode 120000 index dd0d7f1d5..000000000 --- a/ceph/qa/suites/upgrade/luminous-p2p/supported +++ /dev/null @@ -1 +0,0 @@ -../../../distros/supported/ \ No newline at end of file diff --git a/ceph/qa/tasks/cephfs/filesystem.py b/ceph/qa/tasks/cephfs/filesystem.py index 7f9253aab..e03001225 100644 --- a/ceph/qa/tasks/cephfs/filesystem.py +++ b/ceph/qa/tasks/cephfs/filesystem.py @@ -171,8 +171,10 @@ class CephCluster(object): del self._ctx.ceph['ceph'].conf[subsys][key] write_conf(self._ctx) - def json_asok(self, command, service_type, service_id): - proc = self.mon_manager.admin_socket(service_type, service_id, command) + def json_asok(self, command, service_type, service_id, timeout=None): + if timeout is None: + timeout = 15*60 + proc = self.mon_manager.admin_socket(service_type, service_id, command, timeout=timeout) response_data = proc.stdout.getvalue() log.info("_json_asok output: {0}".format(response_data)) if response_data.strip(): @@ -444,10 +446,10 @@ class Filesystem(MDSCluster): self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a) def set_max_mds(self, max_mds): - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "max_mds", "%d" % max_mds) + self.set_var("max_mds", "%d" % max_mds) def set_allow_dirfrags(self, yes): - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it') + self.set_var("allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it') def get_pgs_per_fs_pool(self): """ @@ -559,8 +561,10 @@ class Filesystem(MDSCluster): def _df(self): return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) - def get_mds_map(self): - return self.status().get_fsmap(self.id)['mdsmap'] + def get_mds_map(self, status=None): + if status is None: + status = self.status() + return status.get_fsmap(self.id)['mdsmap'] def get_var(self, var): return self.status().get_fsmap(self.id)['mdsmap'][var] @@ -855,15 +859,15 @@ class Filesystem(MDSCluster): return version - def mds_asok(self, command, mds_id=None): + def mds_asok(self, command, mds_id=None, timeout=None): if mds_id is None: mds_id = self.get_lone_mds_id() - return self.json_asok(command, 'mds', mds_id) + return self.json_asok(command, 'mds', mds_id, timeout=timeout) - def rank_asok(self, command, rank=0): - info = self.get_rank(rank=rank) - return self.json_asok(command, 'mds', info['name']) + def rank_asok(self, command, rank=0, status=None, timeout=None): + info = self.get_rank(rank=rank, status=status) + return self.json_asok(command, 'mds', info['name'], timeout=timeout) def read_cache(self, path, depth=None): cmd = ["dump", "tree", path] @@ -893,9 +897,17 @@ class Filesystem(MDSCluster): while True: status = self.status() if rank is not None: - mds_info = status.get_rank(self.id, rank) - current_state = mds_info['state'] if mds_info else None - log.info("Looked up MDS state for mds.{0}: {1}".format(rank, current_state)) + try: + mds_info = status.get_rank(self.id, rank) + current_state = mds_info['state'] if mds_info else None + log.info("Looked up MDS state for mds.{0}: {1}".format(rank, current_state)) + except: + mdsmap = self.get_mds_map(status=status) + if rank in mdsmap['failed']: + log.info("Waiting for rank {0} to come back.".format(rank)) + current_state = None + else: + raise elif mds_id is not None: # mds_info is None if no daemon with this ID exists in the map mds_info = status.get_mds(mds_id) @@ -1166,6 +1178,9 @@ class Filesystem(MDSCluster): """ return "" + def _make_rank(self, rank): + return "{}:{}".format(self.name, rank) + def _run_tool(self, tool, args, rank=None, quiet=False): # Tests frequently have [client] configuration that jacks up # the objecter log level (unlikely to be interesting here) @@ -1176,7 +1191,7 @@ class Filesystem(MDSCluster): base_args = [os.path.join(self._prefix, tool), '--debug-mds=4', '--debug-objecter=1'] if rank is not None: - base_args.extend(["--rank", "%d" % rank]) + base_args.extend(["--rank", "%s" % str(rank)]) t1 = datetime.datetime.now() r = self.tool_remote.run( @@ -1198,11 +1213,12 @@ class Filesystem(MDSCluster): mds_id = self.mds_ids[0] return self.mds_daemons[mds_id].remote - def journal_tool(self, args, rank=None, quiet=False): + def journal_tool(self, args, rank, quiet=False): """ - Invoke cephfs-journal-tool with the passed arguments, and return its stdout + Invoke cephfs-journal-tool with the passed arguments for a rank, and return its stdout """ - return self._run_tool("cephfs-journal-tool", args, rank, quiet) + fs_rank = self._make_rank(rank) + return self._run_tool("cephfs-journal-tool", args, fs_rank, quiet) def table_tool(self, args, quiet=False): """ diff --git a/ceph/qa/tasks/cephfs/fuse_mount.py b/ceph/qa/tasks/cephfs/fuse_mount.py index b121680b0..33bcf8c60 100644 --- a/ceph/qa/tasks/cephfs/fuse_mount.py +++ b/ceph/qa/tasks/cephfs/fuse_mount.py @@ -50,6 +50,7 @@ class FuseMount(CephFSMount): '--', self.mountpoint, ], + timeout=(15*60) ) run_cmd = [ @@ -88,12 +89,14 @@ class FuseMount(CephFSMount): def list_connections(): self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], - check_status=False + check_status=False, + timeout=(15*60) ) p = self.client_remote.run( args=["ls", "/sys/fs/fuse/connections"], stdout=StringIO(), - check_status=False + check_status=False, + timeout=(15*60) ) if p.exitstatus != 0: return [] @@ -163,7 +166,8 @@ class FuseMount(CephFSMount): ], stdout=StringIO(), stderr=StringIO(), - wait=False + wait=False, + timeout=(15*60) ) try: proc.wait() @@ -202,11 +206,18 @@ class FuseMount(CephFSMount): # Now that we're mounted, set permissions so that the rest of the test will have # unrestricted access to the filesystem mount. - self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint]) + try: + stderr = StringIO() + self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), stderr=stderr) + except run.CommandFailedError: + stderr = stderr.getvalue() + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise def _mountpoint_exists(self): - return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False).exitstatus == 0 + return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, timeout=(15*60)).exitstatus == 0 def umount(self): try: @@ -218,6 +229,7 @@ class FuseMount(CephFSMount): '-u', self.mountpoint, ], + timeout=(30*60), ) except run.CommandFailedError: log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) @@ -229,7 +241,7 @@ class FuseMount(CephFSMount): run.Raw(';'), 'ps', 'auxf', - ]) + ], timeout=(60*15)) # abort the fuse mount, killing all hung processes if self._fuse_conn: @@ -252,7 +264,8 @@ class FuseMount(CephFSMount): '-f', self.mountpoint, ], - stderr=stderr + stderr=stderr, + timeout=(60*15) ) except CommandFailedError: if self.is_mounted(): @@ -307,7 +320,8 @@ class FuseMount(CephFSMount): '--', self.mountpoint, ], - stderr=stderr + stderr=stderr, + timeout=(60*5) ) except CommandFailedError: if "No such file or directory" in stderr.getvalue(): @@ -354,6 +368,7 @@ class FuseMount(CephFSMount): '-rf', self.mountpoint, ], + timeout=(60*5) ) def _asok_path(self): @@ -392,15 +407,15 @@ print find_socket("{client_name}") # Find the admin socket p = self.client_remote.run(args=[ - 'python', '-c', pyscript - ], stdout=StringIO()) + 'sudo', 'python2', '-c', pyscript + ], stdout=StringIO(), timeout=(15*60)) asok_path = p.stdout.getvalue().strip() log.info("Found client admin socket at {0}".format(asok_path)) # Query client ID from admin socket p = self.client_remote.run( args=['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args, - stdout=StringIO()) + stdout=StringIO(), timeout=(15*60)) return json.loads(p.stdout.getvalue()) def get_global_id(self): diff --git a/ceph/qa/tasks/cephfs/kernel_mount.py b/ceph/qa/tasks/cephfs/kernel_mount.py index 80271a6eb..4fdbd1b0c 100644 --- a/ceph/qa/tasks/cephfs/kernel_mount.py +++ b/ceph/qa/tasks/cephfs/kernel_mount.py @@ -43,6 +43,7 @@ class KernelMount(CephFSMount): run.Raw('>'), filename, ], + timeout=(5*60), ) def mount(self, mount_path=None, mount_fs_name=None): @@ -60,6 +61,7 @@ class KernelMount(CephFSMount): '--', self.mountpoint, ], + timeout=(5*60), ) if mount_path is None: @@ -84,10 +86,11 @@ class KernelMount(CephFSMount): '-o', opts ], + timeout=(30*60), ) self.client_remote.run( - args=['sudo', 'chmod', '1777', self.mountpoint]) + args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(5*60)) self.mounted = True @@ -99,7 +102,7 @@ class KernelMount(CephFSMount): cmd.append('-f') try: - self.client_remote.run(args=cmd, timeout=(5*60)) + self.client_remote.run(args=cmd, timeout=(15*60)) except Exception as e: self.client_remote.run(args=[ 'sudo', @@ -107,7 +110,7 @@ class KernelMount(CephFSMount): 'lsof', run.Raw(';'), 'ps', 'auxf', - ]) + ], timeout=(15*60)) raise e rproc = self.client_remote.run( @@ -194,6 +197,7 @@ class KernelMount(CephFSMount): '--', self.mountpoint, ], + timeout=(5*60), ) def _find_debug_dir(self): @@ -219,7 +223,7 @@ class KernelMount(CephFSMount): p = self.client_remote.run(args=[ 'sudo', 'python', '-c', pyscript - ], stdout=StringIO()) + ], stdout=StringIO(), timeout=(5*60)) client_id_to_dir = json.loads(p.stdout.getvalue()) try: @@ -241,7 +245,7 @@ class KernelMount(CephFSMount): p = self.client_remote.run(args=[ 'sudo', 'python', '-c', pyscript - ], stdout=StringIO()) + ], stdout=StringIO(), timeout=(5*60)) return p.stdout.getvalue() def get_global_id(self): diff --git a/ceph/qa/tasks/cephfs/test_client_limits.py b/ceph/qa/tasks/cephfs/test_client_limits.py index b06d5123d..1f1d54670 100644 --- a/ceph/qa/tasks/cephfs/test_client_limits.py +++ b/ceph/qa/tasks/cephfs/test_client_limits.py @@ -134,10 +134,10 @@ class TestClientLimits(CephFSTestCase): # Client B tries to stat the file that client A created rproc = self.mount_b.write_background("file1") - # After mds_session_timeout, we should see a health warning (extra lag from + # After session_timeout, we should see a health warning (extra lag from # MDS beacon period) - mds_session_timeout = float(self.fs.get_config("mds_session_timeout")) - self.wait_for_health("MDS_CLIENT_LATE_RELEASE", mds_session_timeout + 10) + session_timeout = self.fs.get_var("session_timeout") + self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10) # Client B should still be stuck self.assertFalse(rproc.finished) diff --git a/ceph/qa/tasks/cephfs/test_client_recovery.py b/ceph/qa/tasks/cephfs/test_client_recovery.py index 829ca3d5c..2b91cbfe6 100644 --- a/ceph/qa/tasks/cephfs/test_client_recovery.py +++ b/ceph/qa/tasks/cephfs/test_client_recovery.py @@ -30,10 +30,9 @@ class TestClientNetworkRecovery(CephFSTestCase): REQUIRE_ONE_CLIENT_REMOTE = True CLIENTS_REQUIRED = 2 - LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"] + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] # Environment references - mds_session_timeout = None mds_reconnect_timeout = None ms_max_backoff = None @@ -45,6 +44,8 @@ class TestClientNetworkRecovery(CephFSTestCase): I/O after failure. """ + session_timeout = self.fs.get_var("session_timeout") + # We only need one client self.mount_b.umount_wait() @@ -67,7 +68,7 @@ class TestClientNetworkRecovery(CephFSTestCase): # ...then it should block self.assertFalse(write_blocked.finished) self.assert_session_state(client_id, "open") - time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale self.assertFalse(write_blocked.finished) self.assert_session_state(client_id, "stale") @@ -87,10 +88,9 @@ class TestClientRecovery(CephFSTestCase): REQUIRE_KCLIENT_REMOTE = True CLIENTS_REQUIRED = 2 - LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"] + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] # Environment references - mds_session_timeout = None mds_reconnect_timeout = None ms_max_backoff = None @@ -214,6 +214,8 @@ class TestClientRecovery(CephFSTestCase): self.mount_a.create_destroy() def test_stale_caps(self): + session_timeout = self.fs.get_var("session_timeout") + # Capability release from stale session # ===================================== cap_holder = self.mount_a.open_background() @@ -226,7 +228,7 @@ class TestClientRecovery(CephFSTestCase): self.mount_a.kill() try: - # Now, after mds_session_timeout seconds, the waiter should + # Now, after session_timeout seconds, the waiter should # complete their operation when the MDS marks the holder's # session stale. cap_waiter = self.mount_b.write_background() @@ -239,9 +241,9 @@ class TestClientRecovery(CephFSTestCase): cap_waited = b - a log.info("cap_waiter waited {0}s".format(cap_waited)) - self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0, + self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0, "Capability handover took {0}, expected approx {1}".format( - cap_waited, self.mds_session_timeout + cap_waited, session_timeout )) cap_holder.stdin.close() @@ -261,6 +263,8 @@ class TestClientRecovery(CephFSTestCase): # Eviction while holding a capability # =================================== + session_timeout = self.fs.get_var("session_timeout") + # Take out a write capability on a file on client A, # and then immediately kill it. cap_holder = self.mount_a.open_background() @@ -290,9 +294,9 @@ class TestClientRecovery(CephFSTestCase): log.info("cap_waiter waited {0}s".format(cap_waited)) # This is the check that it happened 'now' rather than waiting # for the session timeout - self.assertLess(cap_waited, self.mds_session_timeout / 2.0, + self.assertLess(cap_waited, session_timeout / 2.0, "Capability handover took {0}, expected less than {1}".format( - cap_waited, self.mds_session_timeout / 2.0 + cap_waited, session_timeout / 2.0 )) cap_holder.stdin.close() @@ -479,6 +483,8 @@ class TestClientRecovery(CephFSTestCase): if not isinstance(self.mount_a, FuseMount): raise SkipTest("Require FUSE client to handle signal STOP/CONT") + session_timeout = self.fs.get_var("session_timeout") + self.mount_a.run_shell(["mkdir", "testdir"]) self.mount_a.run_shell(["touch", "testdir/file1"]) # populate readdir cache @@ -497,7 +503,7 @@ class TestClientRecovery(CephFSTestCase): self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid]) self.assert_session_state(mount_b_gid, "open") - time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale self.assert_session_state(mount_b_gid, "stale") self.mount_a.run_shell(["touch", "testdir/file2"]) diff --git a/ceph/qa/tasks/cephfs/test_damage.py b/ceph/qa/tasks/cephfs/test_damage.py index 380b49c4b..01e9d5803 100644 --- a/ceph/qa/tasks/cephfs/test_damage.py +++ b/ceph/qa/tasks/cephfs/test_damage.py @@ -12,6 +12,7 @@ DAMAGED_ON_START = "damaged_on_start" DAMAGED_ON_LS = "damaged_on_ls" CRASHED = "server crashed" NO_DAMAGE = "no damage" +READONLY = "readonly" FAILED_CLIENT = "client failed" FAILED_SERVER = "server failed" @@ -134,8 +135,8 @@ class TestDamage(CephFSTestCase): mutations = [] # Removals - for obj_id in objects: - if obj_id in [ + for o in objects: + if o in [ # JournalPointers are auto-replaced if missing (same path as upgrade) "400.00000000", # Missing dirfrags for non-system dirs result in empty directory @@ -148,29 +149,37 @@ class TestDamage(CephFSTestCase): expectation = DAMAGED_ON_START log.info("Expectation on rm '{0}' will be '{1}'".format( - obj_id, expectation + o, expectation )) mutations.append(MetadataMutation( - obj_id, - "Delete {0}".format(obj_id), - lambda o=obj_id: self.fs.rados(["rm", o]), + o, + "Delete {0}".format(o), + lambda o=o: self.fs.rados(["rm", o]), expectation )) # Blatant corruptions - mutations.extend([ - MetadataMutation( - o, - "Corrupt {0}".format(o), - lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk), - DAMAGED_ON_START - ) for o in data_objects - ]) - - # Truncations for obj_id in data_objects: if obj_id == "500.00000000": + # purge queue corruption results in read-only FS + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk), + READONLY + )) + else: + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk), + DAMAGED_ON_START + )) + + # Truncations + for o in data_objects: + if o == "500.00000000": # The PurgeQueue is allowed to be empty: Journaler interprets # an empty header object as an empty journal. expectation = NO_DAMAGE @@ -182,7 +191,7 @@ class TestDamage(CephFSTestCase): o, "Truncate {0}".format(o), lambda o=o: self.fs.rados(["truncate", o, "0"]), - DAMAGED_ON_START + expectation )) # OMAP value corruptions @@ -204,22 +213,22 @@ class TestDamage(CephFSTestCase): ) # OMAP header corruptions - for obj_id in omap_header_objs: - if re.match("60.\.00000000", obj_id) \ - or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]: + for o in omap_header_objs: + if re.match("60.\.00000000", o) \ + or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]: expectation = DAMAGED_ON_START else: expectation = NO_DAMAGE log.info("Expectation on corrupt header '{0}' will be '{1}'".format( - obj_id, expectation + o, expectation )) mutations.append( MetadataMutation( - obj_id, - "Corrupt omap header on {0}".format(obj_id), - lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]), + o, + "Corrupt omap header on {0}".format(o), + lambda o=o: self.fs.rados(["setomapheader", o, junk]), expectation ) ) @@ -314,7 +323,17 @@ class TestDamage(CephFSTestCase): else: log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc)) results[mutation] = FAILED_SERVER - + elif mutation.expectation == READONLY: + proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False) + try: + proc.wait() + except CommandFailedError: + stderr = proc.stderr.getvalue() + log.info(stderr) + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise else: try: wait([proc], 20) @@ -480,7 +499,7 @@ class TestDamage(CephFSTestCase): # Drop everything from the MDS cache self.mds_cluster.mds_stop() - self.fs.journal_tool(['journal', 'reset']) + self.fs.journal_tool(['journal', 'reset'], 0) self.mds_cluster.mds_fail_restart() self.fs.wait_for_daemons() diff --git a/ceph/qa/tasks/cephfs/test_data_scan.py b/ceph/qa/tasks/cephfs/test_data_scan.py index a2d315768..1e7745541 100644 --- a/ceph/qa/tasks/cephfs/test_data_scan.py +++ b/ceph/qa/tasks/cephfs/test_data_scan.py @@ -362,9 +362,9 @@ class TestDataScan(CephFSTestCase): if False: with self.assertRaises(CommandFailedError): # Normal reset should fail when no objects are present, we'll use --force instead - self.fs.journal_tool(["journal", "reset"]) + self.fs.journal_tool(["journal", "reset"], 0) - self.fs.journal_tool(["journal", "reset", "--force"]) + self.fs.journal_tool(["journal", "reset", "--force"], 0) self.fs.data_scan(["init"]) self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers) self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers) diff --git a/ceph/qa/tasks/cephfs/test_flush.py b/ceph/qa/tasks/cephfs/test_flush.py index 1f84e4200..ee0b1c92b 100644 --- a/ceph/qa/tasks/cephfs/test_flush.py +++ b/ceph/qa/tasks/cephfs/test_flush.py @@ -44,7 +44,7 @@ class TestFlush(CephFSTestCase): # ...and the journal is truncated to just a single subtreemap from the # newly created segment - summary_output = self.fs.journal_tool(["event", "get", "summary"]) + summary_output = self.fs.journal_tool(["event", "get", "summary"], 0) try: self.assertEqual(summary_output, dedent( @@ -72,7 +72,7 @@ class TestFlush(CephFSTestCase): ).strip()) flush_data = self.fs.mds_asok(["flush", "journal"]) self.assertEqual(flush_data['return_code'], 0) - self.assertEqual(self.fs.journal_tool(["event", "get", "summary"]), + self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0), dedent( """ Events by type: diff --git a/ceph/qa/tasks/cephfs/test_forward_scrub.py b/ceph/qa/tasks/cephfs/test_forward_scrub.py index 1f80366af..e165780f3 100644 --- a/ceph/qa/tasks/cephfs/test_forward_scrub.py +++ b/ceph/qa/tasks/cephfs/test_forward_scrub.py @@ -242,10 +242,10 @@ class TestForwardScrub(CephFSTestCase): # is all that will be in the InoTable in memory) self.fs.journal_tool(["event", "splice", - "--inode={0}".format(inos["./file2_sixmegs"]), "summary"]) + "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0) self.fs.journal_tool(["event", "splice", - "--inode={0}".format(inos["./file3_sixmegs"]), "summary"]) + "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0) # Revert to old inotable. for key, value in inotable_copy.iteritems(): diff --git a/ceph/qa/tasks/cephfs/test_fragment.py b/ceph/qa/tasks/cephfs/test_fragment.py index a62ef7432..54a49cea2 100644 --- a/ceph/qa/tasks/cephfs/test_fragment.py +++ b/ceph/qa/tasks/cephfs/test_fragment.py @@ -33,7 +33,6 @@ class TestFragmentation(CephFSTestCase): Apply kwargs as MDS configuration settings, enable dirfrags and restart the MDSs. """ - kwargs['mds_bal_frag'] = "true" for k, v in kwargs.items(): self.ceph_cluster.set_ceph_conf("mds", k, v.__str__()) diff --git a/ceph/qa/tasks/cephfs/test_journal_migration.py b/ceph/qa/tasks/cephfs/test_journal_migration.py index 64fe93980..5f956be93 100644 --- a/ceph/qa/tasks/cephfs/test_journal_migration.py +++ b/ceph/qa/tasks/cephfs/test_journal_migration.py @@ -82,13 +82,14 @@ class TestJournalMigration(CephFSTestCase): )) # Verify that cephfs-journal-tool can now read the rewritten journal - inspect_out = self.fs.journal_tool(["journal", "inspect"]) + inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) if not inspect_out.endswith(": OK"): raise RuntimeError("Unexpected journal-tool result: '{0}'".format( inspect_out )) - self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"]) + self.fs.journal_tool(["event", "get", "json", + "--path", "/tmp/journal.json"], 0) p = self.fs.tool_remote.run( args=[ "python", diff --git a/ceph/qa/tasks/cephfs/test_journal_repair.py b/ceph/qa/tasks/cephfs/test_journal_repair.py index 62cbbb068..9832f91a1 100644 --- a/ceph/qa/tasks/cephfs/test_journal_repair.py +++ b/ceph/qa/tasks/cephfs/test_journal_repair.py @@ -77,7 +77,7 @@ class TestJournalRepair(CephFSTestCase): self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) # Execute the dentry recovery, this should populate the backing store - self.fs.journal_tool(['event', 'recover_dentries', 'list']) + self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) # Dentries in ROOT_INO are present self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) @@ -87,7 +87,7 @@ class TestJournalRepair(CephFSTestCase): # Now check the MDS can read what we wrote: truncate the journal # and start the mds. - self.fs.journal_tool(['journal', 'reset']) + self.fs.journal_tool(['journal', 'reset'], 0) self.fs.mds_fail_restart() self.fs.wait_for_daemons() @@ -265,10 +265,10 @@ class TestJournalRepair(CephFSTestCase): self.fs.mds_stop(active_mds_names[0]) self.fs.mds_fail(active_mds_names[0]) # Invoke recover_dentries quietly, because otherwise log spews millions of lines - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=0, quiet=True) - self.fs.journal_tool(["event", "recover_dentries", "summary"], rank=1, quiet=True) + self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True) + self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True) self.fs.table_tool(["0", "reset", "session"]) - self.fs.journal_tool(["journal", "reset"], rank=0) + self.fs.journal_tool(["journal", "reset"], 0) self.fs.erase_mds_objects(1) self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, '--yes-i-really-mean-it') diff --git a/ceph/qa/tasks/cephfs/test_misc.py b/ceph/qa/tasks/cephfs/test_misc.py index 4158538fd..c27278008 100644 --- a/ceph/qa/tasks/cephfs/test_misc.py +++ b/ceph/qa/tasks/cephfs/test_misc.py @@ -7,15 +7,13 @@ import errno import time import json import logging +import time log = logging.getLogger(__name__) class TestMisc(CephFSTestCase): CLIENTS_REQUIRED = 2 - LOAD_SETTINGS = ["mds_session_autoclose"] - mds_session_autoclose = None - def test_getattr_caps(self): """ Check if MDS recognizes the 'mask' parameter of open request. @@ -43,6 +41,16 @@ class TestMisc(CephFSTestCase): self.mount_a.kill_background(p) + def test_root_rctime(self): + """ + Check that the root inode has a non-default rctime on startup. + """ + + t = time.time() + rctime = self.mount_a.getfattr(".", "ceph.dir.rctime") + log.info("rctime = {}".format(rctime)) + self.assertGreaterEqual(rctime, t-10) + def test_fs_new(self): data_pool_name = self.fs.get_data_pool_name() @@ -106,6 +114,8 @@ class TestMisc(CephFSTestCase): only session """ + session_autoclose = self.fs.get_var("session_autoclose") + self.mount_b.umount_wait() ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -113,7 +123,7 @@ class TestMisc(CephFSTestCase): self.mount_a.kill() self.mount_a.kill_cleanup() - time.sleep(self.mds_session_autoclose * 1.5) + time.sleep(session_autoclose * 1.5) ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -128,7 +138,7 @@ class TestMisc(CephFSTestCase): self.mount_a.kill() self.mount_a.kill_cleanup() - time.sleep(self.mds_session_autoclose * 1.5) + time.sleep(session_autoclose * 1.5) ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -202,3 +212,75 @@ class TestMisc(CephFSTestCase): ratio = raw_avail / fs_avail assert 0.9 < ratio < 1.1 + + def _run_drop_cache_cmd(self, timeout, use_tell): + drop_res = None + if use_tell: + mds_id = self.fs.get_lone_mds_id() + drop_res = json.loads( + self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), + "cache", "drop", str(timeout))) + else: + drop_res = self.fs.mds_asok(["cache", "drop", str(timeout)]) + return drop_res + + def _drop_cache_command(self, timeout, use_tell=True): + self.mount_b.umount_wait() + ls_data = self.fs.mds_asok(['session', 'ls']) + self.assert_session_count(1, ls_data) + + # create some files + self.mount_a.create_n_files("dc-dir/dc-file", 1000) + # drop cache + drop_res = self._run_drop_cache_cmd(timeout, use_tell) + + self.assertTrue(drop_res['client_recall']['return_code'] == 0) + self.assertTrue(drop_res['flush_journal']['return_code'] == 0) + + def _drop_cache_command_timeout(self, timeout, use_tell=True): + self.mount_b.umount_wait() + ls_data = self.fs.mds_asok(['session', 'ls']) + self.assert_session_count(1, ls_data) + + # create some files + self.mount_a.create_n_files("dc-dir/dc-file-t", 1000) + + # simulate client death and try drop cache + self.mount_a.kill() + drop_res = self._run_drop_cache_cmd(timeout, use_tell) + + self.assertTrue(drop_res['client_recall']['return_code'] == -errno.ETIMEDOUT) + self.assertTrue(drop_res['flush_journal']['return_code'] == 0) + + self.mount_a.kill_cleanup() + self.mount_a.mount() + self.mount_a.wait_until_mounted() + + def test_drop_cache_command_asok(self): + """ + Basic test for checking drop cache command using admin socket. + Note that the cache size post trimming is not checked here. + """ + self._drop_cache_command(10, use_tell=False) + + def test_drop_cache_command_tell(self): + """ + Basic test for checking drop cache command using tell interface. + Note that the cache size post trimming is not checked here. + """ + self._drop_cache_command(10) + + def test_drop_cache_command_timeout_asok(self): + """ + Check drop cache command with non-responding client using admin + socket. Note that the cache size post trimming is not checked here. + """ + self._drop_cache_command_timeout(5, use_tell=False) + + def test_drop_cache_command_timeout_tell(self): + """ + Check drop cache command with non-responding client using tell + interface. Note that the cache size post trimming is not checked + here. + """ + self._drop_cache_command_timeout(5) diff --git a/ceph/qa/tasks/cephfs/test_recovery_pool.py b/ceph/qa/tasks/cephfs/test_recovery_pool.py index 097342a9d..97049b9c0 100644 --- a/ceph/qa/tasks/cephfs/test_recovery_pool.py +++ b/ceph/qa/tasks/cephfs/test_recovery_pool.py @@ -141,10 +141,6 @@ class TestRecoveryPool(CephFSTestCase): self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, '--yes-i-really-mean-it') - def get_state(mds_id): - info = self.mds_cluster.get_mds_info(mds_id) - return info['state'] if info is not None else None - self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) @@ -153,7 +149,7 @@ class TestRecoveryPool(CephFSTestCase): if False: with self.assertRaises(CommandFailedError): # Normal reset should fail when no objects are present, we'll use --force instead - self.fs.journal_tool(["journal", "reset"]) + self.fs.journal_tool(["journal", "reset"], 0) self.fs.mds_stop() self.fs.data_scan(['scan_extents', '--alternate-pool', @@ -163,22 +159,18 @@ class TestRecoveryPool(CephFSTestCase): recovery_pool, '--filesystem', self.fs.name, '--force-corrupt', '--force-init', self.fs.get_data_pool_name()]) - self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event', - 'recover_dentries', 'list', - '--alternate-pool', recovery_pool]) + self.fs.journal_tool(['event', 'recover_dentries', 'list', + '--alternate-pool', recovery_pool], 0) self.fs.data_scan(['init', '--force-init', '--filesystem', self.fs.name]) self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name, '--force-corrupt', '--force-init', self.fs.get_data_pool_name()]) - self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event', - 'recover_dentries', 'list']) + self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) - self.fs.journal_tool(['--rank=' + recovery_fs + ":0", 'journal', - 'reset', '--force']) - self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'journal', - 'reset', '--force']) + self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0) + self.fs.journal_tool(['journal', 'reset', '--force'], 0) self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', recovery_fs + ":0") @@ -190,12 +182,11 @@ class TestRecoveryPool(CephFSTestCase): self.recovery_fs.mds_restart() self.fs.wait_for_daemons() self.recovery_fs.wait_for_daemons() - for mds_id in self.recovery_fs.mds_ids: - self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + mds_id, + status = self.recovery_fs.status() + for rank in self.recovery_fs.get_ranks(status=status): + self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'], 'injectargs', '--debug-mds=20') - self.fs.mon_manager.raw_cluster_cmd('daemon', "mds." + mds_id, - 'scrub_path', '/', - 'recursive', 'repair') + self.fs.rank_asok(['scrub_path', '/', 'recursive', 'repair'], rank=rank['rank'], status=status) log.info(str(self.mds_cluster.status())) # Mount a client diff --git a/ceph/qa/tasks/qemu.py b/ceph/qa/tasks/qemu.py index f597c08d6..b2bca00d8 100644 --- a/ceph/qa/tasks/qemu.py +++ b/ceph/qa/tasks/qemu.py @@ -115,7 +115,7 @@ def generate_iso(ctx, config): (remote,) = ctx.cluster.only(client).remotes.keys() - clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client) + clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=client) remote.run(args=refspec.clone(git_url, clone_dir)) src_dir = os.path.dirname(__file__) @@ -212,7 +212,7 @@ def generate_iso(ctx, config): os.path.join(testdir, 'qemu', 'userdata.' + client), os.path.join(testdir, 'qemu', 'metadata.' + client), '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), - '{tdir}/clone.{client}'.format(tdir=testdir, client=client), + '{tdir}/qemu_clone.{client}'.format(tdir=testdir, client=client), ], ) diff --git a/ceph/qa/tasks/thrashosds-health.yaml b/ceph/qa/tasks/thrashosds-health.yaml index 111e2d8c4..9a4d35abf 100644 --- a/ceph/qa/tasks/thrashosds-health.yaml +++ b/ceph/qa/tasks/thrashosds-health.yaml @@ -12,4 +12,4 @@ overrides: - \(REQUEST_SLOW\) - \(TOO_FEW_PGS\) - \(MON_DOWN\) - - slow requests + - slow request diff --git a/ceph/qa/tasks/workunit.py b/ceph/qa/tasks/workunit.py index 0a46ade76..17cfaf7f1 100644 --- a/ceph/qa/tasks/workunit.py +++ b/ceph/qa/tasks/workunit.py @@ -410,7 +410,7 @@ def _run_tests(ctx, refspec, role, tests, env, basedir, ) if cleanup: args=['sudo', 'rm', '-rf', '--', scratch_tmp] - remote.run(logger=log.getChild(role), args=args, timeout=(15*60)) + remote.run(logger=log.getChild(role), args=args, timeout=(60*60)) finally: log.info('Stopping %s on %s...', tests, role) args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir] diff --git a/ceph/qa/workunits/ceph-tests/ceph-admin-commands.sh b/ceph/qa/workunits/ceph-tests/ceph-admin-commands.sh index 30e74cce5..4a9f0a66f 100755 --- a/ceph/qa/workunits/ceph-tests/ceph-admin-commands.sh +++ b/ceph/qa/workunits/ceph-tests/ceph-admin-commands.sh @@ -1,12 +1,9 @@ -#!/bin/sh -e +#!/bin/sh -ex -#check ceph health ceph -s -#list pools rados lspools -#lisr rbd images rbd ls -#check that the monitors work +# check that the monitors work ceph osd set nodown ceph osd unset nodown diff --git a/ceph/qa/workunits/mon/test_config_key_caps.sh b/ceph/qa/workunits/mon/test_config_key_caps.sh new file mode 100755 index 000000000..77b4b53b7 --- /dev/null +++ b/ceph/qa/workunits/mon/test_config_key_caps.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash + +set -x +set -e + +tmp=$(mktemp -d -p /tmp test_mon_config_key_caps.XXXXX) +entities=() + +function cleanup() +{ + set +e + set +x + if [[ -e $tmp/keyring ]] && [[ -e $tmp/keyring.orig ]]; then + grep '\[.*\..*\]' $tmp/keyring.orig > $tmp/entities.orig + for e in $(grep '\[.*\..*\]' $tmp/keyring | \ + diff $tmp/entities.orig - | \ + sed -n 's/^.*\[\(.*\..*\)\]/\1/p'); + do + ceph auth rm $e 2>&1 >& /dev/null + done + fi + #rm -fr $tmp +} + +trap cleanup 0 # cleanup on exit + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +# for cleanup purposes +ceph auth export -o $tmp/keyring.orig + +k=$tmp/keyring + +# setup a few keys +ceph config-key ls +ceph config-key set daemon-private/osd.123/test-foo +ceph config-key set mgr/test-foo +ceph config-key set device/test-foo +ceph config-key set test/foo + +allow_aa=client.allow_aa +allow_bb=client.allow_bb +allow_cc=client.allow_cc + +mgr_a=mgr.a +mgr_b=mgr.b +osd_a=osd.100 +osd_b=osd.200 + +prefix_aa=client.prefix_aa +prefix_bb=client.prefix_bb +prefix_cc=client.prefix_cc +match_aa=client.match_aa +match_bb=client.match_bb + +fail_aa=client.fail_aa +fail_bb=client.fail_bb +fail_cc=client.fail_cc +fail_dd=client.fail_dd +fail_ee=client.fail_ee +fail_ff=client.fail_ff +fail_gg=client.fail_gg +fail_writes=client.fail_writes + +ceph auth get-or-create $allow_aa mon 'allow *' +ceph auth get-or-create $allow_bb mon 'allow service config-key rwx' +ceph auth get-or-create $allow_cc mon 'allow command "config-key get"' + +ceph auth get-or-create $mgr_a mon 'allow profile mgr' +ceph auth get-or-create $mgr_b mon 'allow profile mgr' +ceph auth get-or-create $osd_a mon 'allow profile osd' +ceph auth get-or-create $osd_b mon 'allow profile osd' + +ceph auth get-or-create $prefix_aa mon \ + "allow command \"config-key get\" with key prefix client/$prefix_aa" + +cap="allow command \"config-key set\" with key prefix client/" +cap="$cap,allow command \"config-key get\" with key prefix client/$prefix_bb" +ceph auth get-or-create $prefix_bb mon "$cap" + +cap="allow command \"config-key get\" with key prefix client/" +cap="$cap, allow command \"config-key set\" with key prefix client/" +cap="$cap, allow command \"config-key ls\"" +ceph auth get-or-create $prefix_cc mon "$cap" + +cap="allow command \"config-key get\" with key=client/$match_aa/foo" +ceph auth get-or-create $match_aa mon "$cap" +cap="allow command \"config-key get\" with key=client/$match_bb/foo" +cap="$cap,allow command \"config-key set\" with key=client/$match_bb/foo" +ceph auth get-or-create $match_bb mon "$cap" + +ceph auth get-or-create $fail_aa mon 'allow rx' +ceph auth get-or-create $fail_bb mon 'allow r,allow w' +ceph auth get-or-create $fail_cc mon 'allow rw' +ceph auth get-or-create $fail_dd mon 'allow rwx' +ceph auth get-or-create $fail_ee mon 'allow profile bootstrap-rgw' +ceph auth get-or-create $fail_ff mon 'allow profile bootstrap-rbd' +# write commands will require rw; wx is not enough +ceph auth get-or-create $fail_gg mon 'allow service config-key wx' +# read commands will only require 'r'; 'rx' should be enough. +ceph auth get-or-create $fail_writes mon 'allow service config-key rx' + +# grab keyring +ceph auth export -o $k + +# keys will all the caps can do whatever +for c in $allow_aa $allow_bb $allow_cc $mgr_a $mgr_b; do + ceph -k $k --name $c config-key get daemon-private/osd.123/test-foo + ceph -k $k --name $c config-key get mgr/test-foo + ceph -k $k --name $c config-key get device/test-foo + ceph -k $k --name $c config-key get test/foo +done + +for c in $osd_a $osd_b; do + ceph -k $k --name $c config-key put daemon-private/$c/test-foo + ceph -k $k --name $c config-key get daemon-private/$c/test-foo + expect_false ceph -k $k --name $c config-key ls + expect_false ceph -k $k --name $c config-key get mgr/test-foo + expect_false ceph -k $k --name $c config-key get device/test-foo + expect_false ceph -k $k --name $c config-key get test/foo +done + +expect_false ceph -k $k --name $osd_a get daemon-private/$osd_b/test-foo +expect_false ceph -k $k --name $osd_b get daemon-private/$osd_a/test-foo + +expect_false ceph -k $k --name $prefix_aa \ + config-key ls +expect_false ceph -k $k --name $prefix_aa \ + config-key get daemon-private/osd.123/test-foo +expect_false ceph -k $k --name $prefix_aa \ + config-key set test/bar +expect_false ceph -k $k --name $prefix_aa \ + config-key set client/$prefix_aa/foo + +# write something so we can read, use a custom entity +ceph -k $k --name $allow_bb config-key set client/$prefix_aa/foo +ceph -k $k --name $prefix_aa config-key get client/$prefix_aa/foo +# check one writes to the other's prefix, the other is able to read +ceph -k $k --name $prefix_bb config-key set client/$prefix_aa/bar +ceph -k $k --name $prefix_aa config-key get client/$prefix_aa/bar + +ceph -k $k --name $prefix_bb config-key set client/$prefix_bb/foo +ceph -k $k --name $prefix_bb config-key get client/$prefix_bb/foo + +expect_false ceph -k $k --name $prefix_bb config-key get client/$prefix_aa/bar +expect_false ceph -k $k --name $prefix_bb config-key ls +expect_false ceph -k $k --name $prefix_bb \ + config-key get daemon-private/osd.123/test-foo +expect_false ceph -k $k --name $prefix_bb config-key get mgr/test-foo +expect_false ceph -k $k --name $prefix_bb config-key get device/test-foo +expect_false ceph -k $k --name $prefix_bb config-key get test/bar +expect_false ceph -k $k --name $prefix_bb config-key set test/bar + +ceph -k $k --name $prefix_cc config-key set client/$match_aa/foo +ceph -k $k --name $prefix_cc config-key set client/$match_bb/foo +ceph -k $k --name $prefix_cc config-key get client/$match_aa/foo +ceph -k $k --name $prefix_cc config-key get client/$match_bb/foo +expect_false ceph -k $k --name $prefix_cc config-key set other/prefix +expect_false ceph -k $k --name $prefix_cc config-key get mgr/test-foo +ceph -k $k --name $prefix_cc config-key ls >& /dev/null + +ceph -k $k --name $match_aa config-key get client/$match_aa/foo +expect_false ceph -k $k --name $match_aa config-key get client/$match_bb/foo +expect_false ceph -k $k --name $match_aa config-key set client/$match_aa/foo +ceph -k $k --name $match_bb config-key get client/$match_bb/foo +ceph -k $k --name $match_bb config-key set client/$match_bb/foo +expect_false ceph -k $k --name $match_bb config-key get client/$match_aa/foo +expect_false ceph -k $k --name $match_bb config-key set client/$match_aa/foo + +keys=(daemon-private/osd.123/test-foo + mgr/test-foo + device/test-foo + test/foo + client/$prefix_aa/foo + client/$prefix_bb/foo + client/$match_aa/foo + client/$match_bb/foo +) +# expect these all to fail accessing config-key +for c in $fail_aa $fail_bb $fail_cc \ + $fail_dd $fail_ee $fail_ff \ + $fail_gg; do + for m in get set; do + for key in ${keys[*]} client/$prefix_aa/foo client/$prefix_bb/foo; do + expect_false ceph -k $k --name $c config-key $m $key + done + done +done + +# fail writes but succeed on reads +expect_false ceph -k $k --name $fail_writes config-key set client/$match_aa/foo +expect_false ceph -k $k --name $fail_writes config-key set test/foo +ceph -k $k --name $fail_writes config-key ls +ceph -k $k --name $fail_writes config-key get client/$match_aa/foo +ceph -k $k --name $fail_writes config-key get daemon-private/osd.123/test-foo + +echo "OK" diff --git a/ceph/qa/workunits/rados/test_librados_build.sh b/ceph/qa/workunits/rados/test_librados_build.sh index 43ded25b2..3aaaec7eb 100755 --- a/ceph/qa/workunits/rados/test_librados_build.sh +++ b/ceph/qa/workunits/rados/test_librados_build.sh @@ -20,8 +20,8 @@ hello_world_cpp " BINARIES="${BINARIES_TO_RUN}hello_radosstriper_cpp " -DL_PREFIX="http://git.ceph.com/?p=ceph.git;a=blob_plain;f=examples/librados/" -#DL_PREFIX="https://raw.githubusercontent.com/ceph/ceph/master/examples/librados/" +DL_PREFIX="http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=luminous;f=examples/librados/" +#DL_PREFIX="https://raw.githubusercontent.com/ceph/ceph/luminous/examples/librados/" DESTDIR=$(pwd) function cleanup () { diff --git a/ceph/qa/workunits/rbd/run_devstack_tempest.sh b/ceph/qa/workunits/rbd/run_devstack_tempest.sh index 7ee21f09f..65a45d8b7 100755 --- a/ceph/qa/workunits/rbd/run_devstack_tempest.sh +++ b/ceph/qa/workunits/rbd/run_devstack_tempest.sh @@ -1,7 +1,7 @@ #!/bin/bash -ex -STACK_BRANCH=stable/pike -TEMPEST_BRANCH=17.2.0 +STACK_BRANCH=stable/rocky +TEMPEST_BRANCH=19.0.0 STACK_USER=${STACK_USER:-stack} STACK_GROUP=${STACK_GROUP:-stack} diff --git a/ceph/qa/workunits/suites/cephfs_journal_tool_smoke.sh b/ceph/qa/workunits/suites/cephfs_journal_tool_smoke.sh index 60e914965..7e4ad3bd3 100755 --- a/ceph/qa/workunits/suites/cephfs_journal_tool_smoke.sh +++ b/ceph/qa/workunits/suites/cephfs_journal_tool_smoke.sh @@ -3,7 +3,7 @@ set -e set -x -export BIN="${BIN:-cephfs-journal-tool}" +export BIN="${BIN:-cephfs-journal-tool --rank=cephfs:0}" export JOURNAL_FILE=/tmp/journal.bin export JSON_OUTPUT=/tmp/json.tmp export BINARY_OUTPUT=/tmp/binary.tmp diff --git a/ceph/run-make-check.sh b/ceph/run-make-check.sh index 078345422..2244e5ea5 100755 --- a/ceph/run-make-check.sh +++ b/ceph/run-make-check.sh @@ -13,8 +13,31 @@ # # -# Return MAX(1, (number of processors / 2)) by default or NPROC +# To just look at what this script will do, run it like this: # +# $ DRY_RUN=echo ./run-make-check.sh +# + +set -e + +trap clean_up_after_myself EXIT + +ORIGINAL_CCACHE_CONF="$HOME/.ccache/ccache.conf" +SAVED_CCACHE_CONF="$HOME/.run-make-check-saved-ccache-conf" + +function save_ccache_conf() { + test -f $ORIGINAL_CCACHE_CONF && cp $ORIGINAL_CCACHE_CONF $SAVED_CCACHE_CONF || true +} + +function restore_ccache_conf() { + test -f $SAVED_CCACHE_CONF && mv $SAVED_CCACHE_CONF $ORIGINAL_CCACHE_CONF || true +} + +function clean_up_after_myself() { + rm -fr ${CEPH_BUILD_VIRTUALENV:-/tmp}/*virtualenv* + restore_ccache_conf +} + function get_processors() { if test -n "$NPROC" ; then echo $NPROC @@ -54,26 +77,72 @@ function run() { exit 1 fi if [ -n "$install_cmd" ]; then - $DRY_RUN sudo $install_cmd ccache jq $which_pkg + $DRY_RUN sudo $install_cmd ccache $which_pkg else echo "WARNING: Don't know how to install packages" >&2 echo "This probably means distribution $ID is not supported by run-make-check.sh" >&2 fi + if ! type ccache > /dev/null 2>&1 ; then + echo "ERROR: ccache could not be installed" + exit 1 + fi + if test -f ./install-deps.sh ; then $DRY_RUN ./install-deps.sh || return 1 + trap clean_up_after_myself EXIT fi # Init defaults after deps are installed. get_processors() depends on coreutils nproc. DEFAULT_MAKEOPTS=${DEFAULT_MAKEOPTS:--j$(get_processors)} BUILD_MAKEOPTS=${BUILD_MAKEOPTS:-$DEFAULT_MAKEOPTS} + test "$BUILD_MAKEOPTS" && echo "make will run with option(s) $BUILD_MAKEOPTS" CHECK_MAKEOPTS=${CHECK_MAKEOPTS:-$DEFAULT_MAKEOPTS} - $DRY_RUN ./do_cmake.sh $@ || return 1 + if type python2 > /dev/null 2>&1 ; then + # gtest-parallel requires Python 2 + CMAKE_PYTHON_OPTS="-DWITH_GTEST_PARALLEL=ON" + else + CMAKE_PYTHON_OPTS="-DWITH_PYTHON2=OFF -DWITH_PYTHON3=ON -DMGR_PYTHON_VERSION=3 -DWITH_GTEST_PARALLEL=OFF" + fi + + CMAKE_BUILD_OPTS="" + + cat <= 1024 + $DRY_RUN ulimit -n $(ulimit -Hn) + if [ $(ulimit -n) -lt 1024 ];then + echo "***ulimit -n too small, better bigger than 1024 for test***" + return 1 + fi + if ! $DRY_RUN ctest $CHECK_MAKEOPTS --output-on-failure; then rm -f ${TMPDIR:-/tmp}/ceph-asok.* return 1 @@ -86,21 +155,14 @@ function main() { echo "with the ability to run commands as root via sudo." fi echo -n "Checking hostname sanity... " - if hostname --fqdn >/dev/null 2>&1 ; then + if $DRY_RUN hostname --fqdn >/dev/null 2>&1 ; then echo "OK" else echo "NOT OK" echo "Please fix 'hostname --fqdn', otherwise 'make check' will fail" return 1 fi - if run "$@" ; then - rm -fr ${CEPH_BUILD_VIRTUALENV:-/tmp}/*virtualenv* - echo "cmake check: successful run on $(git rev-parse HEAD)" - return 0 - else - rm -fr ${CEPH_BUILD_VIRTUALENV:-/tmp}/*virtualenv* - return 1 - fi + run "$@" && echo "make check: successful run on $(git rev-parse HEAD)" } main "$@" diff --git a/ceph/src/.git_version b/ceph/src/.git_version index fc407817f..268e02b63 100644 --- a/ceph/src/.git_version +++ b/ceph/src/.git_version @@ -1,2 +1,2 @@ -177915764b752804194937482a39e95e0ca3de94 -v12.2.10 +26dc3775efc7bb286a1d6d66faee0ba30ea23eee +v12.2.11 diff --git a/ceph/src/auth/AuthSessionHandler.cc b/ceph/src/auth/AuthSessionHandler.cc index ab46b60c5..286e383f6 100644 --- a/ceph/src/auth/AuthSessionHandler.cc +++ b/ceph/src/auth/AuthSessionHandler.cc @@ -30,6 +30,10 @@ AuthSessionHandler *get_auth_session_handler(CephContext *cct, int protocol, Cry switch (protocol) { case CEPH_AUTH_CEPHX: + // if there is no session key, there is no session handler. + if (key.get_type() == CEPH_CRYPTO_NONE) { + return nullptr; + } return new CephxSessionHandler(cct, key, features); case CEPH_AUTH_NONE: return new AuthNoneSessionHandler(cct, key); diff --git a/ceph/src/ceph-create-keys b/ceph/src/ceph-create-keys index c14c02f28..41d76e157 100755 --- a/ceph/src/ceph-create-keys +++ b/ceph/src/ceph-create-keys @@ -91,12 +91,12 @@ def get_key(cluster, mon_id, wait_count=600): pathdir = os.path.dirname(path) if not os.path.exists(pathdir): os.makedirs(pathdir) - os.chmod(pathdir, 0770) + os.chmod(pathdir, 0o770) os.chown(pathdir, get_ceph_uid(), get_ceph_gid()) while wait_count > 0: try: - with file(tmp, 'w') as f: - os.fchmod(f.fileno(), 0600) + with open(tmp, 'w') as f: + os.fchmod(f.fileno(), 0o600) os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid()) LOG.info('Talking to monitor...') @@ -201,13 +201,13 @@ def bootstrap_key(cluster, type_, wait_count=600): pathdir = os.path.dirname(path) if not os.path.exists(pathdir): os.makedirs(pathdir) - os.chmod(pathdir, 0770) + os.chmod(pathdir, 0o770) os.chown(pathdir, get_ceph_uid(), get_ceph_gid()) while wait_count > 0: try: - with file(tmp, 'w') as f: - os.fchmod(f.fileno(), 0600) + with open(tmp, 'w') as f: + os.fchmod(f.fileno(), 0o600) os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid()) LOG.info('Talking to monitor...') returncode = subprocess.call( diff --git a/ceph/src/ceph-volume/ceph_volume/api/lvm.py b/ceph/src/ceph-volume/ceph_volume/api/lvm.py index aed4a8f64..bcb54d65b 100644 --- a/ceph/src/ceph-volume/ceph_volume/api/lvm.py +++ b/ceph/src/ceph-volume/ceph_volume/api/lvm.py @@ -466,6 +466,9 @@ def remove_vg(vg_name): """ Removes a volume group. """ + if not vg_name: + logger.warning('Skipping removal of invalid VG name: "%s"', vg_name) + return fail_msg = "Unable to remove vg %s" % vg_name process.run( [ diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py index 852c314c2..1ad15bc80 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py @@ -63,6 +63,9 @@ def activate_filestore(lvs, no_systemd=False): if not system.device_is_mounted(source, destination=destination): prepare_utils.mount_osd(source, osd_id, is_vdo=is_vdo) + # ensure that the OSD destination is always chowned properly + system.chown(destination) + # always re-do the symlink regardless if it exists, so that the journal # device path that may have changed can be mapped correctly every time destination = '/var/lib/ceph/osd/%s-%s/journal' % (conf.cluster, osd_id) @@ -151,7 +154,10 @@ def activate_bluestore(lvs, no_systemd=False): db_device_path = get_osd_device_path(osd_lv, lvs, 'db', dmcrypt_secret=dmcrypt_secret) wal_device_path = get_osd_device_path(osd_lv, lvs, 'wal', dmcrypt_secret=dmcrypt_secret) - # Once symlinks are removed, the osd dir can be 'primed again. + # Once symlinks are removed, the osd dir can be 'primed again. chown first, + # regardless of what currently exists so that ``prime-osd-dir`` can succeed + # even if permissions are somehow messed up + system.chown(osd_path) prime_command = [ 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster, 'prime-osd-dir', '--dev', osd_lv_path, diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py index cce58b166..76a52f37d 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py @@ -139,14 +139,11 @@ class Batch(object): self.argv = argv def get_devices(self): - all_devices = disk.get_devices() # remove devices with partitions - # XXX Should be optional when getting device info - for device, detail in all_devices.items(): - if detail.get('partitions') != {}: - del all_devices[device] - devices = sorted(all_devices.items(), key=lambda x: (x[0], x[1]['size'])) - return device_formatter(devices) + devices = [(device, details) for device, details in + disk.get_devices().items() if details.get('partitions') == {}] + size_sort = lambda x: (x[0], x[1]['size']) + return device_formatter(sorted(devices, key=size_sort)) def print_help(self): return self._help.format( diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py index 92dc3a2e9..ee269a394 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py @@ -1,47 +1,28 @@ from __future__ import print_function -import json from ceph_volume.util import disk, prepare from ceph_volume.api import lvm from . import validators +from .strategies import Strategy +from .strategies import MixedStrategy from ceph_volume.devices.lvm.create import Create from ceph_volume.devices.lvm.prepare import Prepare from ceph_volume.util import templates from ceph_volume.exceptions import SizeAllocationError -class SingleType(object): +class SingleType(Strategy): """ Support for all SSDs, or all HDDS """ def __init__(self, devices, args): - self.args = args - self.osds_per_device = args.osds_per_device - self.devices = devices - # TODO: add --fast-devices and --slow-devices so these can be customized - self.hdds = [device for device in devices if device.sys_api['rotational'] == '1'] - self.ssds = [device for device in devices if device.sys_api['rotational'] == '0'] - self.computed = {'osds': [], 'vgs': [], 'filtered_devices': args.filtered_devices} - if self.devices: - self.validate() - self.compute() - else: - self.computed["changed"] = False + super(SingleType, self).__init__(devices, args) + self.validate_compute() @staticmethod def type(): return "bluestore.SingleType" - @property - def total_osds(self): - if self.hdds: - return len(self.hdds) * self.osds_per_device - else: - return len(self.ssds) * self.osds_per_device - - def report_json(self): - print(json.dumps(self.computed, indent=4, sort_keys=True)) - def report_pretty(self): string = "" if self.args.filtered_devices: @@ -141,32 +122,19 @@ class SingleType(object): Create(command).main() -class MixedType(object): +class MixedType(MixedStrategy): def __init__(self, devices, args): - self.args = args - self.devices = devices - self.osds_per_device = args.osds_per_device - # TODO: add --fast-devices and --slow-devices so these can be customized - self.hdds = [device for device in devices if device.sys_api['rotational'] == '1'] - self.ssds = [device for device in devices if device.sys_api['rotational'] == '0'] - self.computed = {'osds': [], 'filtered_devices': args.filtered_devices} + super(MixedType, self).__init__(devices, args) self.block_db_size = self.get_block_size() self.system_vgs = lvm.VolumeGroups() self.dbs_needed = len(self.hdds) * self.osds_per_device - if self.devices: - self.validate() - self.compute() - else: - self.computed["changed"] = False + self.validate_compute() @staticmethod def type(): return "bluestore.MixedType" - def report_json(self): - print(json.dumps(self.computed, indent=4, sort_keys=True)) - def get_block_size(self): if self.args.block_db_size: return disk.Size(b=self.args.block_db_size) @@ -319,17 +287,6 @@ class MixedType(object): else: Create(command).main() - def get_common_vg(self): - # find all the vgs associated with the current device - for ssd in self.ssds: - for pv in ssd.pvs_api: - vg = self.system_vgs.get(vg_name=pv.vg_name) - if not vg: - continue - # this should give us just one VG, it would've been caught by - # the validator otherwise - return vg - def validate(self): """ HDDs represent data devices, and solid state devices are for block.db, diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py index b94cc6ea3..c01e83721 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py @@ -1,8 +1,9 @@ from __future__ import print_function -import json from ceph_volume.util import disk, prepare from ceph_volume.api import lvm from . import validators +from .strategies import Strategy +from .strategies import MixedStrategy from ceph_volume.devices.lvm.create import Create from ceph_volume.devices.lvm.prepare import Prepare from ceph_volume.util import templates @@ -20,40 +21,21 @@ def get_journal_size(args): return prepare.get_journal_size(lv_format=False) -class SingleType(object): +class SingleType(Strategy): """ Support for all SSDs, or all HDDs, data and journal LVs will be colocated in the same device """ def __init__(self, devices, args): - self.args = args - self.osds_per_device = args.osds_per_device - self.devices = devices - self.hdds = [device for device in devices if device.sys_api['rotational'] == '1'] - self.ssds = [device for device in devices if device.sys_api['rotational'] == '0'] - self.computed = {'osds': [], 'vgs': [], 'filtered_devices': args.filtered_devices} + super(SingleType, self).__init__(devices, args) self.journal_size = get_journal_size(args) - if self.devices: - self.validate() - self.compute() - else: - self.computed["changed"] = False + self.validate_compute() @staticmethod def type(): return "filestore.SingleType" - @property - def total_osds(self): - if self.hdds: - return len(self.hdds) * self.osds_per_device - else: - return len(self.ssds) * self.osds_per_device - - def report_json(self): - print(json.dumps(self.computed, indent=4, sort_keys=True)) - def report_pretty(self): string = "" if self.args.filtered_devices: @@ -176,7 +158,7 @@ class SingleType(object): Create(command).main() -class MixedType(object): +class MixedType(MixedStrategy): """ Supports HDDs with SSDs, journals will be placed on SSDs, while HDDs will be used fully for data. @@ -186,36 +168,17 @@ class MixedType(object): """ def __init__(self, devices, args): - self.args = args - self.osds_per_device = args.osds_per_device - self.devices = devices - self.hdds = [device for device in devices if device.sys_api['rotational'] == '1'] - self.ssds = [device for device in devices if device.sys_api['rotational'] == '0'] - self.computed = {'osds': [], 'vg': None, 'filtered_devices': args.filtered_devices} + super(MixedType, self).__init__(devices, args) self.blank_ssds = [] self.journals_needed = len(self.hdds) * self.osds_per_device self.journal_size = get_journal_size(args) self.system_vgs = lvm.VolumeGroups() - if self.devices: - self.validate() - self.compute() - else: - self.computed["changed"] = False + self.validate_compute() @staticmethod def type(): return "filestore.MixedType" - def report_json(self): - print(json.dumps(self.computed, indent=4, sort_keys=True)) - - @property - def total_osds(self): - if self.hdds: - return len(self.hdds) * self.osds_per_device - else: - return len(self.ssds) * self.osds_per_device - def report_pretty(self): string = "" if self.args.filtered_devices: @@ -252,17 +215,6 @@ class MixedType(object): print(string) - def get_common_vg(self): - # find all the vgs associated with the current device - for ssd in self.ssds: - for pv in ssd.pvs_api: - vg = self.system_vgs.get(vg_name=pv.vg_name) - if not vg: - continue - # this should give us just one VG, it would've been caught by - # the validator otherwise - return vg - def validate(self): """ Ensure that the minimum requirements for this type of scenario is diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/strategies.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/strategies.py new file mode 100644 index 000000000..d4ec5a730 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/strategies.py @@ -0,0 +1,50 @@ +import json + +class Strategy(object): + + def __init__(self, devices, args): + self.args = args + self.osds_per_device = args.osds_per_device + self.devices = devices + self.hdds = [device for device in devices if device.sys_api['rotational'] == '1'] + self.ssds = [device for device in devices if device.sys_api['rotational'] == '0'] + self.computed = {'osds': [], 'vgs': [], 'filtered_devices': args.filtered_devices} + + def validate_compute(self): + if self.devices: + self.validate() + self.compute() + else: + self.computed["changed"] = False + + def report_json(self): + print(json.dumps(self.computed, indent=4, sort_keys=True)) + + @property + def total_osds(self): + if self.hdds: + return len(self.hdds) * self.osds_per_device + else: + return len(self.ssds) * self.osds_per_device + + # protect against base class instantiation and incomplete implementations. + # We could also use the abc module and implement this as an + # AbstractBaseClass + def compute(self): + raise NotImplementedError('compute() must be implemented in a child class') + + def execute(self): + raise NotImplementedError('execute() must be implemented in a child class') + +class MixedStrategy(Strategy): + + def get_common_vg(self): + # find all the vgs associated with the current device + for ssd in self.ssds: + for pv in ssd.pvs_api: + vg = self.system_vgs.get(vg_name=pv.vg_name) + if not vg: + continue + # this should give us just one VG, it would've been caught by + # the validator otherwise + return vg diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py index 8e0e3a3c5..328a03615 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py @@ -1,11 +1,14 @@ import argparse +import os import logging from textwrap import dedent from ceph_volume import decorators, terminal, process from ceph_volume.api import lvm as api -from ceph_volume.util import system, encryption, disk +from ceph_volume.util import system, encryption, disk, arg_validators +from ceph_volume.util.device import Device +from ceph_volume.systemd import systemctl logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) @@ -39,6 +42,79 @@ def zap_data(path): ]) +def find_associated_devices(osd_id=None, osd_fsid=None): + """ + From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the + system that match those tag values, further detect if any partitions are + part of the OSD, and then return the set of LVs and partitions (if any). + """ + lv_tags = {} + if osd_id: + lv_tags['ceph.osd_id'] = osd_id + if osd_fsid: + lv_tags['ceph.osd_fsid'] = osd_fsid + lvs = api.Volumes() + lvs.filter(lv_tags=lv_tags) + if not lvs: + raise RuntimeError('Unable to find any LV for zapping OSD: %s' % osd_id or osd_fsid) + + devices_to_zap = ensure_associated_lvs(lvs) + + return [Device(path) for path in set(devices_to_zap) if path] + + +def ensure_associated_lvs(lvs): + """ + Go through each LV and ensure if backing devices (journal, wal, block) + are LVs or partitions, so that they can be accurately reported. + """ + # look for many LVs for each backing type, because it is possible to + # receive a filtering for osd.1, and have multiple failed deployments + # leaving many journals with osd.1 - usually, only a single LV will be + # returned + journal_lvs = lvs._filter(lv_tags={'ceph.type': 'journal'}) + db_lvs = lvs._filter(lv_tags={'ceph.type': 'db'}) + wal_lvs = lvs._filter(lv_tags={'ceph.type': 'wal'}) + backing_devices = [ + (journal_lvs, 'journal'), + (db_lvs, 'block'), + (wal_lvs, 'wal') + ] + + verified_devices = [] + + for lv in lvs: + # go through each lv and append it, otherwise query `blkid` to find + # a physical device. Do this for each type (journal,db,wal) regardless + # if they have been processed in the previous LV, so that bad devices + # with the same ID can be caught + for ceph_lvs, _type in backing_devices: + if ceph_lvs: + verified_devices.extend([l.lv_path for l in ceph_lvs]) + continue + + # must be a disk partition, by querying blkid by the uuid we are + # ensuring that the device path is always correct + try: + device_uuid = lv.tags['ceph.%s_uuid' % _type] + except KeyError: + # Bluestore will not have ceph.journal_uuid, and Filestore + # will not not have ceph.db_uuid + continue + + osd_device = disk.get_device_from_partuuid(device_uuid) + if not osd_device: + # if the osd_device is not found by the partuuid, then it is + # not possible to ensure this device exists anymore, so skip it + continue + verified_devices.append(osd_device) + + verified_devices.append(lv.lv_path) + + # reduce the list from all the duplicates that were added + return list(set(verified_devices)) + + class Zap(object): help = 'Removes all data and filesystems from a logical volume or partition.' @@ -59,70 +135,128 @@ class Zap(object): if dmcrypt and dmcrypt_uuid: self.dmcrypt_close(dmcrypt_uuid) + def zap_lv(self, device): + """ + Device examples: vg-name/lv-name, /dev/vg-name/lv-name + Requirements: Must be a logical volume (LV) + """ + lv = api.get_lv(lv_name=device.lv_name, vg_name=device.vg_name) + self.unmount_lv(lv) + + wipefs(device.abspath) + zap_data(device.abspath) + + if self.args.destroy: + lvs = api.Volumes() + lvs.filter(vg_name=device.vg_name) + if len(lvs) <= 1: + mlogger.info('Only 1 LV left in VG, will proceed to destroy volume group %s', device.vg_name) + api.remove_vg(device.vg_name) + else: + mlogger.info('More than 1 LV left in VG, will proceed to destroy LV only') + mlogger.info('Removing LV because --destroy was given: %s', device.abspath) + api.remove_lv(device.abspath) + elif lv: + # just remove all lvm metadata, leaving the LV around + lv.clear_tags() + + def zap_partition(self, device): + """ + Device example: /dev/sda1 + Requirements: Must be a partition + """ + if device.is_encrypted: + # find the holder + holders = [ + '/dev/%s' % holder for holder in device.sys_api.get('holders', []) + ] + for mapper_uuid in os.listdir('/dev/mapper'): + mapper_path = os.path.join('/dev/mapper', mapper_uuid) + if os.path.realpath(mapper_path) in holders: + self.dmcrypt_close(mapper_uuid) + + if system.device_is_mounted(device.abspath): + mlogger.info("Unmounting %s", device.abspath) + system.unmount(device.abspath) + + wipefs(device.abspath) + zap_data(device.abspath) + + if self.args.destroy: + mlogger.info("Destroying partition since --destroy was used: %s" % device.abspath) + disk.remove_partition(device) + + def zap_lvm_member(self, device): + """ + An LVM member may have more than one LV and or VG, for example if it is + a raw device with multiple partitions each belonging to a different LV + + Device example: /dev/sda + Requirements: An LV or VG present in the device, making it an LVM member + """ + for lv in device.lvs: + self.zap_lv(Device(lv.lv_path)) + + + def zap_raw_device(self, device): + """ + Any whole (raw) device passed in as input will be processed here, + checking for LVM membership and partitions (if any). + + Device example: /dev/sda + Requirements: None + """ + if not self.args.destroy: + # the use of dd on a raw device causes the partition table to be + # destroyed + mlogger.warning( + '--destroy was not specified, but zapping a whole device will remove the partition table' + ) + + # look for partitions and zap those + for part_name in device.sys_api.get('partitions', {}).keys(): + self.zap_partition(Device('/dev/%s' % part_name)) + + wipefs(device.abspath) + zap_data(device.abspath) + @decorators.needs_root - def zap(self, args): - for device in args.devices: - if disk.is_mapper_device(device): + def zap(self, devices=None): + devices = devices or self.args.devices + + for device in devices: + mlogger.info("Zapping: %s", device.abspath) + if device.is_mapper: terminal.error("Refusing to zap the mapper device: {}".format(device)) raise SystemExit(1) - lv = api.get_lv_from_argument(device) - if lv: - # we are zapping a logical volume - path = lv.lv_path - self.unmount_lv(lv) - else: - # we are zapping a partition - #TODO: ensure device is a partition - path = device - # check to if it is encrypted to close - partuuid = disk.get_partuuid(device) - if encryption.status("/dev/mapper/{}".format(partuuid)): - dmcrypt_uuid = partuuid - self.dmcrypt_close(dmcrypt_uuid) - - mlogger.info("Zapping: %s", path) - - # check if there was a pv created with the - # name of device - pvs = api.PVolumes() - pvs.filter(pv_name=device) - vgs = set([pv.vg_name for pv in pvs]) - for pv in pvs: - vg_name = pv.vg_name - lv = None - if pv.lv_uuid: - lv = api.get_lv(vg_name=vg_name, lv_uuid=pv.lv_uuid) - - if lv: - self.unmount_lv(lv) - - if args.destroy: - for vg_name in vgs: - mlogger.info("Destroying volume group %s because --destroy was given", vg_name) - api.remove_vg(vg_name) - if not lv: - mlogger.info("Destroying physical volume %s because --destroy was given", device) - api.remove_pv(device) - - wipefs(path) - zap_data(path) - - if lv and not pvs: - if args.destroy: - lvs = api.Volumes() - lvs.filter(vg_name=lv.vg_name) - if len(lvs) <= 1: - mlogger.info('Only 1 LV left in VG, will proceed to destroy volume group %s', lv.vg_name) - api.remove_vg(lv.vg_name) - else: - mlogger.info('More than 1 LV left in VG, will proceed to destroy LV only') - mlogger.info('Removing LV because --destroy was given: %s', lv) - api.remove_lv(lv) - else: - # just remove all lvm metadata, leaving the LV around - lv.clear_tags() - - terminal.success("Zapping successful for: %s" % ", ".join(args.devices)) + if device.is_lvm_member: + self.zap_lvm_member(device) + if device.is_lv: + self.zap_lv(device) + if device.is_partition: + self.zap_partition(device) + if device.is_device: + self.zap_raw_device(device) + + if self.args.devices: + terminal.success( + "Zapping successful for: %s" % ", ".join([str(d) for d in self.args.devices]) + ) + else: + terminal.success( + "Zapping successful for OSD: %s" % self.args.osd_id or self.args.osd_fsid + ) + + @decorators.needs_root + def zap_osd(self): + if self.args.osd_id: + osd_is_running = systemctl.osd_is_active(self.args.osd_id) + if osd_is_running: + mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id) + mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id) + raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id) + devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid) + self.zap(devices) def dmcrypt_close(self, dmcrypt_uuid): dmcrypt_path = "/dev/mapper/{}".format(dmcrypt_uuid) @@ -155,6 +289,14 @@ class Zap(object): ceph-volume lvm zap /dev/sda /dev/sdb /db/sdc + Zapping devices associated with an OSD ID: + + ceph-volume lvm zap --osd-id 1 + + Optionally include the OSD FSID + + ceph-volume lvm zap --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D + If the --destroy flag is given and you are zapping a raw device or partition then all vgs and lvs that exist on that raw device or partition will be destroyed. @@ -179,17 +321,35 @@ class Zap(object): 'devices', metavar='DEVICES', nargs='*', + type=arg_validators.ValidDevice(gpt_ok=True), default=[], help='Path to one or many lv (as vg/lv), partition (as /dev/sda1) or device (as /dev/sda)' ) + parser.add_argument( '--destroy', action='store_true', default=False, help='Destroy all volume groups and logical volumes if you are zapping a raw device or partition', ) + + parser.add_argument( + '--osd-id', + help='Specify an OSD ID to detect associated devices for zapping', + ) + + parser.add_argument( + '--osd-fsid', + help='Specify an OSD FSID to detect associated devices for zapping', + ) + if len(self.argv) == 0: print(sub_command_help) return - args = parser.parse_args(self.argv) - self.zap(args) + + self.args = parser.parse_args(self.argv) + + if self.args.osd_id or self.args.osd_fsid: + self.zap_osd() + else: + self.zap() diff --git a/ceph/src/ceph-volume/ceph_volume/inventory/main.py b/ceph/src/ceph-volume/ceph_volume/inventory/main.py index f4c732cab..1d821b602 100644 --- a/ceph/src/ceph-volume/ceph_volume/inventory/main.py +++ b/ceph/src/ceph-volume/ceph_volume/inventory/main.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import argparse -import pprint +import json from ceph_volume.util.device import Devices, Device @@ -39,8 +39,8 @@ class Inventory(object): def format_report(self, inventory): if self.args.format == 'json': - print(inventory.json_report()) + print(json.dumps(inventory.json_report())) elif self.args.format == 'json-pretty': - pprint.pprint(inventory.json_report()) + print(json.dumps(inventory.json_report(), indent=4, sort_keys=True)) else: print(inventory.pretty_report()) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py index cf7dd5d8f..8ec99bb84 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py @@ -192,10 +192,11 @@ def tmpfile(tmpdir): @pytest.fixture def device_info(monkeypatch): - def apply(devices=None, lsblk=None, lv=None, blkid=None): + def apply(devices=None, lsblk=None, lv=None, blkid=None, udevadm=None): devices = devices if devices else {} lsblk = lsblk if lsblk else {} blkid = blkid if blkid else {} + udevadm = udevadm if udevadm else {} lv = Factory(**lv) if lv else None monkeypatch.setattr("ceph_volume.sys_info.devices", {}) monkeypatch.setattr("ceph_volume.util.device.disk.get_devices", lambda: devices) @@ -206,4 +207,5 @@ def device_info(monkeypatch): monkeypatch.setattr("ceph_volume.util.device.lvm.get_lv", lambda vg_name, lv_uuid: lv) monkeypatch.setattr("ceph_volume.util.device.disk.lsblk", lambda path: lsblk) monkeypatch.setattr("ceph_volume.util.device.disk.blkid", lambda path: blkid) + monkeypatch.setattr("ceph_volume.util.disk.udevadm_property", lambda *a, **kw: udevadm) return apply diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py index d1f9046a0..50ef61b83 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py @@ -1,6 +1,13 @@ from ceph_volume.devices.lvm import batch +class TestBatchSmoke(object): + + def test_batch_instance(self, is_root): + b = batch.Batch([]) + b.main() + + class TestFilterDevices(object): def test_filter_used_device(self, factory): diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py new file mode 100644 index 000000000..55daa4f87 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py @@ -0,0 +1,153 @@ +import pytest +from ceph_volume.api import lvm as api +from ceph_volume.devices.lvm import zap + + +class TestFindAssociatedDevices(object): + + def test_no_lvs_found_that_match_id(self, volumes, monkeypatch, device_info): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + tags = 'ceph.osd_id=9,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', lv_path='/dev/VolGroup/lv', vg_name='vg', lv_tags=tags) + volumes.append(osd) + with pytest.raises(RuntimeError): + zap.find_associated_devices(osd_id=10) + + def test_no_lvs_found_that_match_fsid(self, volumes, monkeypatch, device_info): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + tags = 'ceph.osd_id=9,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', lv_path='/dev/VolGroup/lv', vg_name='vg', lv_tags=tags) + volumes.append(osd) + with pytest.raises(RuntimeError): + zap.find_associated_devices(osd_fsid='aaaa-lkjh') + + def test_no_lvs_found_that_match_id_fsid(self, volumes, monkeypatch, device_info): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + tags = 'ceph.osd_id=9,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', lv_path='/dev/VolGroup/lv', vg_name='vg', lv_tags=tags) + volumes.append(osd) + with pytest.raises(RuntimeError): + zap.find_associated_devices(osd_id='9', osd_fsid='aaaa-lkjh') + + def test_no_ceph_lvs_found(self, volumes, monkeypatch): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + osd = api.Volume( + lv_name='volume1', lv_uuid='y', lv_path='/dev/VolGroup/lv', lv_tags='') + volumes.append(osd) + with pytest.raises(RuntimeError): + zap.find_associated_devices(osd_id=100) + + def test_lv_is_matched_id(self, volumes, monkeypatch): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) + volumes.append(osd) + result = zap.find_associated_devices(osd_id='0') + assert result[0].abspath == '/dev/VolGroup/lv' + + def test_lv_is_matched_fsid(self, volumes, monkeypatch): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) + volumes.append(osd) + result = zap.find_associated_devices(osd_fsid='asdf-lkjh') + assert result[0].abspath == '/dev/VolGroup/lv' + + def test_lv_is_matched_id_fsid(self, volumes, monkeypatch): + monkeypatch.setattr(zap.api, 'Volumes', lambda: volumes) + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) + volumes.append(osd) + result = zap.find_associated_devices(osd_id='0', osd_fsid='asdf-lkjh') + assert result[0].abspath == '/dev/VolGroup/lv' + + +class TestEnsureAssociatedLVs(object): + + def test_nothing_is_found(self, volumes): + result = zap.ensure_associated_lvs(volumes) + assert result == [] + + def test_data_is_found(self, volumes): + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=data' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/data', lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert result == ['/dev/VolGroup/data'] + + def test_block_is_found(self, volumes): + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=block' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/block', lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert result == ['/dev/VolGroup/block'] + + def test_block_and_partition_are_found(self, volumes, monkeypatch): + monkeypatch.setattr(zap.disk, 'get_device_from_partuuid', lambda x: '/dev/sdb1') + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=block' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/block', lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert '/dev/sdb1' in result + assert '/dev/VolGroup/block' in result + + def test_journal_is_found(self, volumes): + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=journal' + osd = api.Volume( + lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert result == ['/dev/VolGroup/lv'] + + def test_multiple_journals_are_found(self, volumes): + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=journal' + for i in range(3): + osd = api.Volume( + lv_name='volume%s' % i, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % i, lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert '/dev/VolGroup/lv0' in result + assert '/dev/VolGroup/lv1' in result + assert '/dev/VolGroup/lv2' in result + + def test_multiple_dbs_are_found(self, volumes): + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=db' + for i in range(3): + osd = api.Volume( + lv_name='volume%s' % i, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % i, lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert '/dev/VolGroup/lv0' in result + assert '/dev/VolGroup/lv1' in result + assert '/dev/VolGroup/lv2' in result + + def test_multiple_wals_are_found(self, volumes): + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.wal_uuid=x,ceph.type=wal' + for i in range(3): + osd = api.Volume( + lv_name='volume%s' % i, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % i, lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert '/dev/VolGroup/lv0' in result + assert '/dev/VolGroup/lv1' in result + assert '/dev/VolGroup/lv2' in result + + def test_multiple_backing_devs_are_found(self, volumes): + for _type in ['journal', 'db', 'wal']: + tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.wal_uuid=x,ceph.type=%s' % _type + osd = api.Volume( + lv_name='volume%s' % _type, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % _type, lv_tags=tags) + volumes.append(osd) + result = zap.ensure_associated_lvs(volumes) + assert '/dev/VolGroup/lvjournal' in result + assert '/dev/VolGroup/lvwal' in result + assert '/dev/VolGroup/lvdb' in result diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py index 493c74c50..6333e3a4e 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py @@ -19,7 +19,9 @@ class TestZap(object): '/dev/mapper/foo', '/dev/dm-0', ]) - def test_can_not_zap_mapper_device(self, capsys, is_root, device_name): + def test_can_not_zap_mapper_device(self, monkeypatch, device_info, capsys, is_root, device_name): + monkeypatch.setattr('os.path.exists', lambda x: True) + device_info() with pytest.raises(SystemExit): lvm.zap.Zap(argv=[device_name]).main() stdout, stderr = capsys.readouterr() diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type-dmcrypt/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type-dmcrypt/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type-dmcrypt/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/mixed-type/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type-dmcrypt/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type-dmcrypt/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type-dmcrypt/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/mixed-type/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml new file mode 100644 index 000000000..850ecc94e --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml @@ -0,0 +1,31 @@ + +- hosts: osds + become: yes + tasks: + + - name: stop ceph-osd daemons + service: + name: "ceph-osd@{{ item }}" + state: stopped + with_items: "{{ osd_ids }}" + + +- hosts: mons + become: yes + tasks: + + - name: purge osds + command: "ceph --cluster {{ cluster }} osd purge osd.{{ item }} --yes-i-really-mean-it" + with_items: "{{ osd_ids }}" + + +- hosts: osds + become: yes + tasks: + + - name: zap devices used for OSDs + command: "ceph-volume --cluster {{ cluster }} lvm zap --osd-id {{ item }} --destroy" + with_items: "{{ osd_ids }}" + environment: + CEPH_VOLUME_DEBUG: 1 + diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini index c2725a09f..4c3af6811 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini @@ -63,4 +63,7 @@ commands= # retest to ensure cluster came back up correctly testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + # test zap OSDs by ID + ansible-playbook -vv -i {changedir}/hosts {changedir}/test_zap.yml + vagrant destroy {env:VAGRANT_DESTROY_FLAGS:"--force"} diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test_zap.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test_zap.yml new file mode 120000 index 000000000..cb969fa1d --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test_zap.yml @@ -0,0 +1 @@ +../../../playbooks/test_zap.yml \ No newline at end of file diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml index bebe6dc36..8caa1ce38 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml @@ -32,6 +32,17 @@ environment: CEPH_VOLUME_DEBUG: 1 + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/sdd for lvm data usage + parted: + device: /dev/sdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + - name: redeploy osd.2 using /dev/sdd1 command: "ceph-volume --cluster {{ cluster }} lvm create --bluestore --data /dev/sdd1 --osd-id 2" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml index c48e4bece..17b74d524 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml @@ -40,6 +40,27 @@ environment: CEPH_VOLUME_DEBUG: 1 + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/sdd for lvm data usage + parted: + device: /dev/sdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + + - name: re-create partition /dev/sdd lvm journals + parted: + device: /dev/sdd + number: 2 + part_start: 50% + part_end: 100% + unit: '%' + state: present + label: gpt + - name: redeploy osd.2 using /dev/sdd1 command: "ceph-volume --cluster {{ cluster }} lvm create --filestore --data /dev/sdd1 --journal /dev/sdd2 --osd-id 2" environment: @@ -56,6 +77,16 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: re-create partition /dev/sdc1 + parted: + device: /dev/sdc + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + state: present + label: gpt + - name: prepare osd.0 again using test_group/data-lv1 command: "ceph-volume --cluster {{ cluster }} lvm prepare --filestore --data test_group/data-lv1 --journal /dev/sdc1 --osd-id 0" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml index e4e804a70..353df127c 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml @@ -35,6 +35,17 @@ environment: CEPH_VOLUME_DEBUG: 1 + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/sdd for lvm data usage + parted: + device: /dev/sdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + - name: redeploy osd.2 using /dev/sdd1 command: "ceph-volume --cluster {{ cluster }} lvm create --bluestore --data /dev/sdd1 --osd-id 2" environment: @@ -51,6 +62,37 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: find all OSD directories + find: + paths: /var/lib/ceph/osd + recurse: no + file_type: directory + register: osd_directories + + - name: find all OSD symlinks + find: + paths: /var/lib/ceph/osd + recurse: yes + depth: 2 + file_type: link + register: osd_symlinks + + # set the OSD dir and the block/block.db links to root:root permissions, to + # ensure that the OSD will be able to activate regardless + - file: + path: "{{ item.path }}" + owner: root + group: root + with_items: + - "{{ osd_directories.files }}" + + - file: + path: "{{ item.path }}" + owner: root + group: root + with_items: + - "{{ osd_symlinks.files }}" + - name: activate all to start the previously prepared osd.0 command: "ceph-volume lvm activate --all" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml index 4aa3cf19d..e896c41b0 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml @@ -41,6 +41,27 @@ environment: CEPH_VOLUME_DEBUG: 1 + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/sdd for lvm data usage + parted: + device: /dev/sdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + + - name: re-create partition /dev/sdd lvm journals + parted: + device: /dev/sdd + number: 2 + part_start: 50% + part_end: 100% + unit: '%' + state: present + label: gpt + - name: redeploy osd.2 using /dev/sdd1 command: "ceph-volume --cluster {{ cluster }} lvm create --filestore --data /dev/sdd1 --journal /dev/sdd2 --osd-id 2" environment: @@ -65,6 +86,34 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: find all OSD paths + find: + paths: /var/lib/ceph/osd + recurse: no + file_type: directory + register: osd_paths + + # set all OSD paths to root:rootto ensure that the OSD will be able to + # activate regardless + - name: mangle permissions to root + file: + path: "{{ item.path }}" + owner: root + group: root + recurse: yes + with_items: + - "{{ osd_paths.files }}" + + - name: stop ceph-osd@2 daemon + service: + name: ceph-osd@2 + state: stopped + + - name: stop ceph-osd@1 daemon + service: + name: ceph-osd@1 + state: stopped + - name: activate all to start the previously prepared osd.0 command: "ceph-volume lvm activate --filestore --all" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml index 19209b1d2..3e032e202 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml @@ -33,6 +33,17 @@ environment: CEPH_VOLUME_DEBUG: 1 + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/sdd for lvm data usage + parted: + device: /dev/sdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + - name: redeploy osd.2 using /dev/sdd1 command: "ceph-volume --cluster {{ cluster }} lvm create --bluestore --data /dev/sdd1 --osd-id 2" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml index c48e4bece..17b74d524 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml @@ -40,6 +40,27 @@ environment: CEPH_VOLUME_DEBUG: 1 + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/sdd for lvm data usage + parted: + device: /dev/sdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + + - name: re-create partition /dev/sdd lvm journals + parted: + device: /dev/sdd + number: 2 + part_start: 50% + part_end: 100% + unit: '%' + state: present + label: gpt + - name: redeploy osd.2 using /dev/sdd1 command: "ceph-volume --cluster {{ cluster }} lvm create --filestore --data /dev/sdd1 --journal /dev/sdd2 --osd-id 2" environment: @@ -56,6 +77,16 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: re-create partition /dev/sdc1 + parted: + device: /dev/sdc + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + state: present + label: gpt + - name: prepare osd.0 again using test_group/data-lv1 command: "ceph-volume --cluster {{ cluster }} lvm prepare --filestore --data test_group/data-lv1 --journal /dev/sdc1 --osd-id 0" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml index 3564cf3cd..f46fcb1d4 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml @@ -75,10 +75,10 @@ become: True any_errors_fatal: true roles: - - role: ceph-defaults - tags: ['ceph_update_config'] - - role: ceph-handler - - role: ceph-common + - ceph-defaults + - ceph-facts + - ceph-handler + - ceph-common tasks: - name: rsync ceph-volume to test nodes on centos synchronize: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py index 99e1d494c..8be5f8e4b 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py @@ -18,12 +18,37 @@ class TestDevice(object): disk = device.Device("vg/lv") assert disk.is_lv - def test_is_device(self, device_info): + def test_vgs_is_empty(self, device_info, pvolumes, monkeypatch): + BarPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000", pv_tags={}) + pvolumes.append(BarPVolume) + monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + lsblk = {"TYPE": "disk"} + device_info(lsblk=lsblk) + disk = device.Device("/dev/nvme0n1") + assert disk.vgs == [] + + def test_vgs_is_not_empty(self, device_info, pvolumes, monkeypatch): + BarPVolume = api.PVolume(vg_name='foo', lv_uuid='111', pv_name='/dev/nvme0n1', pv_uuid="0000", pv_tags={}) + pvolumes.append(BarPVolume) + monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + lsblk = {"TYPE": "disk"} + device_info(lsblk=lsblk) + disk = device.Device("/dev/nvme0n1") + assert len(disk.vgs) == 1 + + def test_device_is_device(self, device_info, pvolumes): data = {"/dev/sda": {"foo": "bar"}} lsblk = {"TYPE": "device"} device_info(devices=data, lsblk=lsblk) disk = device.Device("/dev/sda") - assert disk.is_device + assert disk.is_device is True + + def test_disk_is_device(self, device_info, pvolumes): + data = {"/dev/sda": {"foo": "bar"}} + lsblk = {"TYPE": "disk"} + device_info(devices=data, lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.is_device is True def test_is_partition(self, device_info, pvolumes): data = {"/dev/sda": {"foo": "bar"}} @@ -51,6 +76,11 @@ class TestDevice(object): disk = device.Device("/dev/mapper/foo") assert disk.is_mapper + def test_dm_is_mapper_device(self, device_info): + device_info() + disk = device.Device("/dev/dm-4") + assert disk.is_mapper + def test_is_not_mapper_device(self, device_info): device_info() disk = device.Device("/dev/sda") @@ -62,6 +92,14 @@ class TestDevice(object): disk = device.Device("/dev/sda") assert disk.is_ceph_disk_member + def test_is_ceph_disk_member_not_available(self, device_info): + lsblk = {"PARTLABEL": "ceph data"} + device_info(lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.is_ceph_disk_member + assert not disk.available + assert "Used by ceph-disk" in disk.rejected_reasons + def test_is_not_ceph_disk_member_lsblk(self, device_info): lsblk = {"PARTLABEL": "gluster partition"} device_info(lsblk=lsblk) @@ -117,6 +155,125 @@ class TestDevice(object): disk = device.Device("/dev/sda") assert not disk.used_by_ceph + def test_get_device_id(self, device_info): + udev = {k:k for k in ['ID_VENDOR', 'ID_MODEL', 'ID_SCSI_SERIAL']} + device_info(udevadm=udev) + disk = device.Device("/dev/sda") + assert disk._get_device_id() == 'ID_VENDOR_ID_MODEL_ID_SCSI_SERIAL' + + + +class TestDeviceEncryption(object): + + def test_partition_is_not_encrypted_lsblk(self, device_info, pvolumes): + lsblk = {'TYPE': 'part', 'FSTYPE': 'xfs'} + device_info(lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.is_encrypted is False + + def test_partition_is_encrypted_lsblk(self, device_info, pvolumes): + lsblk = {'TYPE': 'part', 'FSTYPE': 'crypto_LUKS'} + device_info(lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.is_encrypted is True + + def test_partition_is_not_encrypted_blkid(self, device_info, pvolumes): + lsblk = {'TYPE': 'part'} + blkid = {'TYPE': 'ceph data'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + assert disk.is_encrypted is False + + def test_partition_is_encrypted_blkid(self, device_info, pvolumes): + lsblk = {'TYPE': 'part'} + blkid = {'TYPE': 'crypto_LUKS'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + assert disk.is_encrypted is True + + def test_mapper_is_encrypted_luks1(self, device_info, pvolumes, monkeypatch): + status = {'type': 'LUKS1'} + monkeypatch.setattr(device, 'encryption_status', lambda x: status) + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/mapper/uuid") + assert disk.is_encrypted is True + + def test_mapper_is_encrypted_luks2(self, device_info, pvolumes, monkeypatch): + status = {'type': 'LUKS2'} + monkeypatch.setattr(device, 'encryption_status', lambda x: status) + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/mapper/uuid") + assert disk.is_encrypted is True + + def test_mapper_is_encrypted_plain(self, device_info, pvolumes, monkeypatch): + status = {'type': 'PLAIN'} + monkeypatch.setattr(device, 'encryption_status', lambda x: status) + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/mapper/uuid") + assert disk.is_encrypted is True + + def test_mapper_is_not_encrypted_plain(self, device_info, pvolumes, monkeypatch): + monkeypatch.setattr(device, 'encryption_status', lambda x: {}) + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/mapper/uuid") + assert disk.is_encrypted is False + + def test_lv_is_encrypted_blkid(self, device_info, pvolumes): + lsblk = {'TYPE': 'lvm'} + blkid = {'TYPE': 'crypto_LUKS'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + disk.lv_api = {} + assert disk.is_encrypted is True + + def test_lv_is_not_encrypted_blkid(self, factory, device_info, pvolumes): + lsblk = {'TYPE': 'lvm'} + blkid = {'TYPE': 'xfs'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + disk.lv_api = factory(encrypted=None) + assert disk.is_encrypted is False + + def test_lv_is_encrypted_lsblk(self, device_info, pvolumes): + lsblk = {'FSTYPE': 'crypto_LUKS', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + disk.lv_api = {} + assert disk.is_encrypted is True + + def test_lv_is_not_encrypted_lsblk(self, factory, device_info, pvolumes): + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + disk.lv_api = factory(encrypted=None) + assert disk.is_encrypted is False + + def test_lv_is_encrypted_lvm_api(self, factory, device_info, pvolumes): + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + disk.lv_api = factory(encrypted=True) + assert disk.is_encrypted is True + + def test_lv_is_not_encrypted_lvm_api(self, factory, device_info, pvolumes): + lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} + blkid = {'TYPE': 'mapper'} + device_info(lsblk=lsblk, blkid=blkid) + disk = device.Device("/dev/sda") + disk.lv_api = factory(encrypted=False) + assert disk.is_encrypted is False + class TestDeviceOrdering(object): diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py index 5d1bd82b6..e40c982d1 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py @@ -45,6 +45,34 @@ class TestBlkid(object): assert result['UUID'] == '62416664-cbaf-40bd-9689-10bd337379c3' assert result['TYPE'] == 'xfs' +class TestUdevadmProperty(object): + + def test_good_output(self, stub_call): + output = """ID_MODEL=SK_hynix_SC311_SATA_512GB +ID_PART_TABLE_TYPE=gpt +ID_SERIAL_SHORT=MS83N71801150416A""".split() + stub_call((output, [], 0)) + result = disk.udevadm_property('dev/sda') + assert result['ID_MODEL'] == 'SK_hynix_SC311_SATA_512GB' + assert result['ID_PART_TABLE_TYPE'] == 'gpt' + assert result['ID_SERIAL_SHORT'] == 'MS83N71801150416A' + + def test_property_filter(self, stub_call): + output = """ID_MODEL=SK_hynix_SC311_SATA_512GB +ID_PART_TABLE_TYPE=gpt +ID_SERIAL_SHORT=MS83N71801150416A""".split() + stub_call((output, [], 0)) + result = disk.udevadm_property('dev/sda', ['ID_MODEL', + 'ID_SERIAL_SHORT']) + assert result['ID_MODEL'] == 'SK_hynix_SC311_SATA_512GB' + assert 'ID_PART_TABLE_TYPE' not in result + + def test_fail_on_broken_output(self, stub_call): + output = ["ID_MODEL:SK_hynix_SC311_SATA_512GB"] + stub_call((output, [], 0)) + with pytest.raises(ValueError): + disk.udevadm_property('dev/sda') + class TestDeviceFamily(object): @@ -239,6 +267,28 @@ class TestGetDevices(object): assert len(result) == 1 assert result == [ceph_data_path] + def test_sda1_partition(self, tmpfile, tmpdir): + block_path, dev_path, mapper_path = self.setup_paths(tmpdir) + block_sda_path = os.path.join(block_path, 'sda') + block_sda1_path = os.path.join(block_sda_path, 'sda1') + block_sda1_holders = os.path.join(block_sda1_path, 'holders') + dev_sda_path = os.path.join(dev_path, 'sda') + dev_sda1_path = os.path.join(dev_path, 'sda1') + os.makedirs(block_sda_path) + os.makedirs(block_sda1_path) + os.makedirs(dev_sda1_path) + os.makedirs(block_sda1_holders) + os.makedirs(dev_sda_path) + tmpfile('size', '1024', directory=block_sda_path) + tmpfile('partition', '1', directory=block_sda1_path) + result = disk.get_devices( + _sys_block_path=block_path, + _dev_path=dev_path, + _mapper_path=mapper_path) + assert dev_sda_path in list(result.keys()) + assert '/dev/sda1' in list(result.keys()) + assert result['/dev/sda1']['holders'] == [] + def test_sda_size(self, tmpfile, tmpdir): block_path, dev_path, mapper_path = self.setup_paths(tmpdir) block_sda_path = os.path.join(block_path, 'sda') diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py index 8cca42689..e1420b440 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py @@ -33,3 +33,21 @@ class TestDmcryptClose(object): file_name = '/path/does/not/exist' encryption.dmcrypt_close(file_name) assert fake_run.calls == [] + + +class TestDmcryptKey(object): + + def test_dmcrypt_with_default_size(self, conf_ceph_stub): + conf_ceph_stub('[global]\nfsid=asdf-lkjh') + result = encryption.create_dmcrypt_key() + assert len(result) == 172 + + def test_dmcrypt_with_custom_size(self, conf_ceph_stub): + conf_ceph_stub(''' + [global] + fsid=asdf + [osd] + osd_dmcrypt_size=8 + ''') + result = encryption.create_dmcrypt_key() + assert len(result) == 172 diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py index 82f2ef27f..1a094d33f 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py @@ -15,12 +15,27 @@ class TestAsBytes(object): class TestStrToInt(object): - def test_passing_a_float_str(self): - result = util.str_to_int("1.99") + def test_passing_a_float_str_comma(self): + result = util.str_to_int("1,99") assert result == 1 - def test_passing_a_float_does_not_round(self): - result = util.str_to_int("1.99", round_down=False) + def test_passing_a_float_does_not_round_comma(self): + result = util.str_to_int("1,99", round_down=False) + assert result == 2 + + @pytest.mark.parametrize("value", ['2', 2]) + def test_passing_an_int(self, value): + result = util.str_to_int(value) + assert result == 2 + + @pytest.mark.parametrize("value", ['1.99', 1.99]) + def test_passing_a_float(self, value): + result = util.str_to_int(value) + assert result == 1 + + @pytest.mark.parametrize("value", ['1.99', 1.99]) + def test_passing_a_float_does_not_round(self, value): + result = util.str_to_int(value, round_down=False) assert result == 2 def test_text_is_not_an_integer_like(self): @@ -28,6 +43,11 @@ class TestStrToInt(object): util.str_to_int("1.4GB") assert str(error.value) == "Unable to convert to integer: '1.4GB'" + def test_input_is_not_string(self): + with pytest.raises(RuntimeError) as error: + util.str_to_int(None) + assert str(error.value) == "Unable to convert to integer: 'None'" + def true_responses(upper_casing=False): if upper_casing: @@ -75,22 +95,22 @@ class TestPromptBool(object): def test_trueish(self, response): fake_input = lambda x: response qx = 'what the what?' - assert util.prompt_bool(qx, _raw_input=fake_input) is True + assert util.prompt_bool(qx, input_=fake_input) is True @pytest.mark.parametrize('response', false_responses()) def test_falseish(self, response): fake_input = lambda x: response qx = 'what the what?' - assert util.prompt_bool(qx, _raw_input=fake_input) is False + assert util.prompt_bool(qx, input_=fake_input) is False def test_try_again_true(self): responses = ['g', 'h', 'y'] fake_input = lambda x: responses.pop(0) qx = 'what the what?' - assert util.prompt_bool(qx, _raw_input=fake_input) is True + assert util.prompt_bool(qx, input_=fake_input) is True def test_try_again_false(self): responses = ['g', 'h', 'n'] fake_input = lambda x: responses.pop(0) qx = 'what the what?' - assert util.prompt_bool(qx, _raw_input=fake_input) is False + assert util.prompt_bool(qx, input_=fake_input) is False diff --git a/ceph/src/ceph-volume/ceph_volume/util/__init__.py b/ceph/src/ceph-volume/ceph_volume/util/__init__.py index cdcf3a5b0..43c9c9d68 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/__init__.py +++ b/ceph/src/ceph-volume/ceph_volume/util/__init__.py @@ -2,6 +2,10 @@ import logging from math import floor from ceph_volume import terminal +try: + input = raw_input # pylint: disable=redefined-builtin +except NameError: + pass logger = logging.getLogger(__name__) @@ -31,10 +35,21 @@ def str_to_int(string, round_down=True): """ Parses a string number into an integer, optionally converting to a float and rounding down. + + Some LVM values may come with a comma instead of a dot to define decimals. + This function normalizes a comma into a dot """ error_msg = "Unable to convert to integer: '%s'" % str(string) try: - integer = float(string) + integer = float(string.replace(',', '.')) + except AttributeError: + # this might be a integer already, so try to use it, otherwise raise + # the original exception + if isinstance(string, (int, float)): + integer = string + else: + logger.exception(error_msg) + raise RuntimeError(error_msg) except (TypeError, ValueError): logger.exception(error_msg) raise RuntimeError(error_msg) @@ -68,12 +83,12 @@ def str_to_bool(val): raise ValueError("Invalid input value: %s" % val) -def prompt_bool(question, _raw_input=None): +def prompt_bool(question, input_=None): """ Interface to prompt a boolean (or boolean-like) response from a user. Usually a confirmation. """ - input_prompt = _raw_input or raw_input + input_prompt = input_ or input prompt_format = '--> {question} '.format(question=question) response = input_prompt(prompt_format) try: @@ -82,4 +97,4 @@ def prompt_bool(question, _raw_input=None): terminal.error('Valid true responses are: y, yes, ') terminal.error('Valid false responses are: n, no') terminal.error('That response was invalid, please try again') - return prompt_bool(question, _raw_input=input_prompt) + return prompt_bool(question, input_=input_prompt) diff --git a/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py b/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py index 534c9aa64..a04c19924 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py +++ b/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -8,15 +8,22 @@ from ceph_volume.util.device import Device class ValidDevice(object): - def __init__(self, as_string=False): + def __init__(self, as_string=False, gpt_ok=False): self.as_string = as_string + self.gpt_ok = gpt_ok def __call__(self, string): device = Device(string) error = None if not device.exists: error = "Unable to proceed with non-existing device: %s" % string - elif device.has_gpt_headers: + # FIXME this is not a nice API, this validator was meant to catch any + # non-existing devices upfront, not check for gpt headers. Now this + # needs to optionally skip checking gpt headers which is beyond + # verifying if the device exists. The better solution would be to + # configure this with a list of checks that can be excluded/included on + # __init__ + elif device.has_gpt_headers and not self.gpt_ok: error = "GPT headers found, they must be removed on: %s" % string if error: diff --git a/ceph/src/ceph-volume/ceph_volume/util/device.py b/ceph/src/ceph-volume/ceph_volume/util/device.py index 181044886..06f90cd37 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/device.py +++ b/ceph/src/ceph-volume/ceph_volume/util/device.py @@ -10,6 +10,16 @@ report_template = """ {dev:<25} {size:<12} {rot!s:<7} {available!s:<9} {model}""" +def encryption_status(abspath): + """ + Helper function to run ``encryption.status()``. It is done here to avoid + a circular import issue (encryption module imports from this module) and to + ease testing by allowing monkeypatching of this function. + """ + from ceph_volume.util import encryption + return encryption.status(abspath) + + class Devices(object): """ A container for Device instances with reporting @@ -79,6 +89,7 @@ class Device(object): self._is_lvm_member = None self._parse() self.available, self.rejected_reasons = self._check_reject_reasons() + self.device_id = self._get_device_id() def __lt__(self, other): ''' @@ -172,6 +183,32 @@ class Device(object): output['lvs'] = [lv.report() for lv in self.lvs] return output + def _get_device_id(self): + """ + Please keep this implementation in sync with get_device_id() in + src/common/blkdev.cc + """ + props = ['ID_VENDOR','ID_MODEL','ID_SERIAL_SHORT', 'ID_SERIAL', + 'ID_SCSI_SERIAL'] + p = disk.udevadm_property(self.abspath, props) + if 'ID_VENDOR' in p and 'ID_MODEL' in p and 'ID_SCSI_SERIAL' in p: + dev_id = '_'.join([p['ID_VENDOR'], p['ID_MODEL'], + p['ID_SCSI_SERIAL']]) + elif 'ID_MODEL' in p and 'ID_SERIAL_SHORT' in p: + dev_id = '_'.join([p['ID_MODEL'], p['ID_SERIAL_SHORT']]) + elif 'ID_SERIAL' in p: + dev_id = p['ID_SERIAL'] + if dev_id.startswith('MTFD'): + # Micron NVMes hide the vendor + dev_id = 'Micron_' + dev_id + else: + # the else branch should fallback to using sysfs and ioctl to + # retrieve device_id on FreeBSD. Still figuring out if/how the + # python ioctl implementation does that on FreeBSD + dev_id = '' + dev_id.replace(' ', '_') + return dev_id + def _set_lvm_membership(self): if self._is_lvm_member is None: # this is contentious, if a PV is recognized by LVM but has no @@ -185,6 +222,7 @@ class Device(object): pvs.filter(pv_name=path) has_vgs = [pv.vg_name for pv in pvs if pv.vg_name] if has_vgs: + self.vgs = list(set(has_vgs)) # a pv can only be in one vg, so this should be safe self.vg_name = has_vgs[0] self._is_lvm_member = True @@ -194,6 +232,8 @@ class Device(object): lv = lvm.get_lv(vg_name=pv.vg_name, lv_uuid=pv.lv_uuid) if lv: self.lvs.append(lv) + else: + self.vgs = [] return self._is_lvm_member def _get_pv_paths(self): @@ -239,11 +279,18 @@ class Device(object): @property def is_ceph_disk_member(self): - return self.ceph_disk.is_member + is_member = self.ceph_disk.is_member + if self.sys_api.get("partitions"): + for part in self.sys_api.get("partitions").keys(): + part = Device("/dev/%s" % part) + if part.is_ceph_disk_member: + is_member = True + break + return is_member @property def is_mapper(self): - return self.path.startswith('/dev/mapper') + return self.path.startswith(('/dev/mapper', '/dev/dm-')) @property def is_lv(self): @@ -258,9 +305,40 @@ class Device(object): @property def is_device(self): if self.disk_api: - return self.disk_api['TYPE'] == 'device' + is_device = self.disk_api['TYPE'] == 'device' + is_disk = self.disk_api['TYPE'] == 'disk' + if is_device or is_disk: + return True return False + @property + def is_encrypted(self): + """ + Only correct for LVs, device mappers, and partitions. Will report a ``None`` + for raw devices. + """ + crypt_reports = [self.blkid_api.get('TYPE', ''), self.disk_api.get('FSTYPE', '')] + if self.is_lv: + # if disk APIs are reporting this is encrypted use that: + if 'crypto_LUKS' in crypt_reports: + return True + # if ceph-volume created this, then a tag would let us know + elif self.lv_api.encrypted: + return True + return False + elif self.is_partition: + return 'crypto_LUKS' in crypt_reports + elif self.is_mapper: + active_mapper = encryption_status(self.abspath) + if active_mapper: + # normalize a bit to ensure same values regardless of source + encryption_type = active_mapper['type'].lower().strip('12') # turn LUKS1 or LUKS2 into luks + return True if encryption_type in ['plain', 'luks'] else False + else: + return False + else: + return None + @property def used_by_ceph(self): # only filter out data devices as journals could potentially be reused @@ -282,6 +360,9 @@ class Device(object): ] rejected = [reason for (k, v, reason) in reasons if self.sys_api.get(k, '') == v] + if self.is_ceph_disk_member: + rejected.append("Used by ceph-disk") + return len(rejected) == 0, rejected diff --git a/ceph/src/ceph-volume/ceph_volume/util/disk.py b/ceph/src/ceph-volume/ceph_volume/util/disk.py index ccc2ff7a1..c85d3be9a 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/disk.py +++ b/ceph/src/ceph-volume/ceph_volume/util/disk.py @@ -127,6 +127,23 @@ def get_device_from_partuuid(partuuid): return ' '.join(out).strip() +def remove_partition(device): + """ + Removes a partition using parted + + :param device: A ``Device()`` object + """ + parent_device = '/dev/%s' % device.disk_api['PKNAME'] + udev_info = udevadm_property(device.abspath) + partition_number = udev_info.get('ID_PART_ENTRY_NUMBER') + if not partition_number: + raise RuntimeError('Unable to detect the partition number for device: %s' % device.abspath) + + process.run( + ['parted', parent_device, '--script', '--', 'rm', partition_number] + ) + + def _stat_is_device(stat_obj): """ Helper function that will interpret ``os.stat`` output directly, so that other @@ -170,6 +187,47 @@ def device_family(device): return devices +def udevadm_property(device, properties=[]): + """ + Query udevadm for information about device properties. + Optionally pass a list of properties to return. A requested property might + not be returned if not present. + + Expected output format:: + # udevadm info --query=property --name=/dev/sda :( + DEVNAME=/dev/sda + DEVTYPE=disk + ID_ATA=1 + ID_BUS=ata + ID_MODEL=SK_hynix_SC311_SATA_512GB + ID_PART_TABLE_TYPE=gpt + ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c + ID_PATH=pci-0000:00:17.0-ata-3 + ID_PATH_TAG=pci-0000_00_17_0-ata-3 + ID_REVISION=70000P10 + ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A + TAGS=:systemd: + USEC_INITIALIZED=16117769 + ... + """ + out = _udevadm_info(device) + ret = {} + for line in out: + p, v = line.split('=', 1) + if not properties or p in properties: + ret[p] = v + return ret + + +def _udevadm_info(device): + """ + Call udevadm and return the output + """ + cmd = ['udevadm', 'info', '--query=property', device] + out, _err, _rc = process.call(cmd) + return out + + def lsblk(device, columns=None, abspath=False): """ Create a dictionary of identifying values for a device using ``lsblk``. @@ -631,7 +689,7 @@ def get_partitions_facts(sys_block_path): folder_path = os.path.join(sys_block_path, folder) if os.path.exists(os.path.join(folder_path, 'partition')): contents = get_file_contents(os.path.join(folder_path, 'partition')) - if '1' in contents: + if contents: part = {} partname = folder part_sys_block_path = os.path.join(sys_block_path, partname) @@ -645,6 +703,9 @@ def get_partitions_facts(sys_block_path): part['sectorsize'] = get_file_contents( part_sys_block_path + "/queue/hw_sector_size", 512) part['size'] = human_readable_size(float(part['sectors']) * 512) + part['holders'] = [] + for holder in os.listdir(part_sys_block_path + '/holders'): + part['holders'].append(holder) partition_metadata[partname] = part return partition_metadata @@ -754,5 +815,9 @@ def get_devices(_sys_block_path='/sys/block', _dev_path='/dev', _mapper_path='/d metadata['path'] = diskname metadata['locked'] = is_locked_raw_device(metadata['path']) + for part_name, part_metadata in metadata['partitions'].items(): + part_abspath = '/dev/%s' % part_name + device_facts[part_abspath] = part_metadata + device_facts[diskname] = metadata return device_facts diff --git a/ceph/src/ceph-volume/ceph_volume/util/encryption.py b/ceph/src/ceph-volume/ceph_volume/util/encryption.py index f6e3fdd7e..e2b3ca164 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/encryption.py +++ b/ceph/src/ceph-volume/ceph_volume/util/encryption.py @@ -23,7 +23,7 @@ def create_dmcrypt_key(): ) # The size of the key is defined in bits, so we must transform that # value to bytes (dividing by 8) because we read in bytes, not bits - random_string = os.urandom(dmcrypt_key_size / 8) + random_string = os.urandom(int(dmcrypt_key_size / 8)) key = base64.b64encode(random_string).decode('utf-8') return key @@ -60,6 +60,7 @@ def plain_open(key, device, mapping): 'cryptsetup', '--key-file', '-', + '--allow-discards', # allow discards (aka TRIM) requests for device 'open', device, mapping, @@ -84,6 +85,7 @@ def luks_open(key, device, mapping): 'cryptsetup', '--key-file', '-', + '--allow-discards', # allow discards (aka TRIM) requests for device 'luksOpen', device, mapping, diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc index fc99ad53b..9f78b24a5 100644 --- a/ceph/src/client/Client.cc +++ b/ceph/src/client/Client.cc @@ -456,6 +456,7 @@ void Client::dump_status(Formatter *f) f->dump_int("mds_epoch", mdsmap->get_epoch()); f->dump_int("osd_epoch", osd_epoch); f->dump_int("osd_epoch_barrier", cap_epoch_barrier); + f->dump_bool("blacklisted", blacklisted); } } @@ -2475,6 +2476,12 @@ void Client::handle_osd_map(MOSDMap *m) return o.is_blacklisted(myaddr);}); } + // Always subscribe to next osdmap for blacklisted client + // until this client is not blacklisted. + if (blacklisted) { + objecter->maybe_request_map(); + } + if (objecter->osdmap_full_flag()) { _handle_full_flag(-1); } else { @@ -2611,13 +2618,14 @@ void Client::handle_fs_map_user(MFSMapUser *m) void Client::handle_mds_map(MMDSMap* m) { + mds_gid_t old_inc, new_inc; if (m->get_epoch() <= mdsmap->get_epoch()) { ldout(cct, 1) << "handle_mds_map epoch " << m->get_epoch() << " is identical to or older than our " << mdsmap->get_epoch() << dendl; m->put(); return; - } + } ldout(cct, 1) << "handle_mds_map epoch " << m->get_epoch() << dendl; @@ -2664,6 +2672,13 @@ void Client::handle_mds_map(MMDSMap* m) if (!mdsmap->is_up(mds)) { session->con->mark_down(); } else if (mdsmap->get_inst(mds) != session->inst) { + old_inc = oldmap->get_incarnation(mds); + new_inc = mdsmap->get_incarnation(mds); + if (old_inc != new_inc) { + ldout(cct, 1) << "mds incarnation changed from " + << old_inc << " to " << new_inc << dendl; + oldstate = MDSMap::STATE_NULL; + } session->con->mark_down(); session->inst = mdsmap->get_inst(mds); // When new MDS starts to take over, notify kernel to trim unused entries @@ -2674,6 +2689,11 @@ void Client::handle_mds_map(MMDSMap* m) continue; // no change session->mds_state = newstate; + if (old_inc != new_inc && newstate > MDSMap::STATE_RECONNECT) { + // missed reconnect close the session so that it can be reopened + _closed_mds_session(session); + continue; + } if (newstate == MDSMap::STATE_RECONNECT) { session->con = messenger->get_connection(session->inst); send_reconnect(session); @@ -4862,7 +4882,6 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, MClientCaps *m) tcap->cap_id = m->peer.cap_id; tcap->seq = m->peer.seq - 1; tcap->issue_seq = tcap->seq; - tcap->mseq = m->peer.mseq; tcap->issued |= cap->issued; tcap->implemented |= cap->issued; if (cap == in->auth_cap) @@ -9196,6 +9215,8 @@ int Client::_preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt, in int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, const struct iovec *iov, int iovcnt) { + uint64_t fpos = 0; + if ((uint64_t)(offset+size) > mdsmap->get_max_filesize()) //too large! return -EFBIG; @@ -9235,7 +9256,7 @@ int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, } } offset = f->pos; - f->pos = offset+size; + fpos = offset+size; unlock_fh_pos(f); } @@ -9385,6 +9406,11 @@ success: lat -= start; logger->tinc(l_c_wrlat, lat); + if (fpos) { + lock_fh_pos(f); + f->pos = fpos; + unlock_fh_pos(f); + } totalwritten = size; r = (int)totalwritten; diff --git a/ceph/src/cls/lock/cls_lock.cc b/ceph/src/cls/lock/cls_lock.cc index 6e2ae4bbd..1dab0dd72 100644 --- a/ceph/src/cls/lock/cls_lock.cc +++ b/ceph/src/cls/lock/cls_lock.cc @@ -34,7 +34,18 @@ CLS_NAME(lock) #define LOCK_PREFIX "lock." -static int read_lock(cls_method_context_t hctx, const string& name, lock_info_t *lock) +static int clean_lock(cls_method_context_t hctx) +{ + int r = cls_cxx_remove(hctx); + if (r < 0) + return r; + + return 0; +} + +static int read_lock(cls_method_context_t hctx, + const string& name, + lock_info_t *lock) { bufferlist bl; string key = LOCK_PREFIX; @@ -67,16 +78,20 @@ static int read_lock(cls_method_context_t hctx, const string& name, lock_info_t map::iterator iter = lock->lockers.begin(); while (iter != lock->lockers.end()) { - map::iterator next = iter; - ++next; - struct locker_info_t& info = iter->second; if (!info.expiration.is_zero() && info.expiration < now) { CLS_LOG(20, "expiring locker"); - lock->lockers.erase(iter); + iter = lock->lockers.erase(iter); + } else { + ++iter; } + } - iter = next; + if (lock->lockers.empty() && cls_lock_is_ephemeral(lock->lock_type)) { + r = clean_lock(hctx); + if (r < 0) { + CLS_ERR("error, on read, cleaning lock object %s", cpp_strerror(r).c_str()); + } } return 0; @@ -121,24 +136,35 @@ static int lock_obj(cls_method_context_t hctx, const string& cookie, const string& tag) { - bool exclusive = lock_type == LOCK_EXCLUSIVE; + bool exclusive = cls_lock_is_exclusive(lock_type); lock_info_t linfo; - bool fail_if_exists = (flags & LOCK_FLAG_RENEW) == 0; + bool fail_if_exists = (flags & LOCK_FLAG_MAY_RENEW) == 0; + bool fail_if_does_not_exist = flags & LOCK_FLAG_MUST_RENEW; - CLS_LOG(20, "requested lock_type=%s fail_if_exists=%d", cls_lock_type_str(lock_type), fail_if_exists); - if (lock_type != LOCK_EXCLUSIVE && - lock_type != LOCK_SHARED) + CLS_LOG(20, + "requested lock_type=%s fail_if_exists=%d fail_if_does_not_exist=%d", + cls_lock_type_str(lock_type), fail_if_exists, fail_if_does_not_exist); + if (!cls_lock_is_valid(lock_type)) { return -EINVAL; + } if (name.empty()) return -EINVAL; + if (!fail_if_exists && fail_if_does_not_exist) { + // at most one of LOCK_FLAG_MAY_RENEW and LOCK_FLAG_MUST_RENEW may + // be set since they have different implications if the lock does + // not already exist + return -EINVAL; + } + // see if there's already a locker int r = read_lock(hctx, name, &linfo); if (r < 0 && r != -ENOENT) { CLS_ERR("Could not read lock info: %s", cpp_strerror(r).c_str()); return r; } + map& lockers = linfo.lockers; map::iterator iter; @@ -160,11 +186,13 @@ static int lock_obj(cls_method_context_t hctx, CLS_LOG(20, "existing_lock_type=%s", cls_lock_type_str(existing_lock_type)); iter = lockers.find(id); if (iter != lockers.end()) { - if (fail_if_exists) { + if (fail_if_exists && !fail_if_does_not_exist) { return -EEXIST; } else { lockers.erase(iter); // remove old entry } + } else if (fail_if_does_not_exist) { + return -ENOENT; } if (!lockers.empty()) { @@ -235,9 +263,9 @@ static int lock_op(cls_method_context_t hctx, * entity or cookie is wrong), or -errno on other error. */ static int remove_lock(cls_method_context_t hctx, - const string& name, - entity_name_t& locker, - const string& cookie) + const string& name, + entity_name_t& locker, + const string& cookie) { // get current lockers lock_info_t linfo; @@ -257,7 +285,12 @@ static int remove_lock(cls_method_context_t hctx, } lockers.erase(iter); - r = write_lock(hctx, name, linfo); + if (cls_lock_is_ephemeral(linfo.lock_type)) { + ceph_assert(lockers.empty()); + r = clean_lock(hctx); + } else { + r = write_lock(hctx, name, linfo); + } return r; } @@ -301,7 +334,7 @@ static int unlock_op(cls_method_context_t hctx, * is wrong), or -errno on other (unexpected) error. */ static int break_lock(cls_method_context_t hctx, - bufferlist *in, bufferlist *out) + bufferlist *in, bufferlist *out) { CLS_LOG(20, "break_lock"); cls_lock_break_op op; @@ -421,7 +454,7 @@ int assert_locked(cls_method_context_t hctx, bufferlist *in, bufferlist *out) return -EINVAL; } - if (op.type != LOCK_EXCLUSIVE && op.type != LOCK_SHARED) { + if (!cls_lock_is_valid(op.type)) { return -EINVAL; } @@ -493,7 +526,7 @@ int set_cookie(cls_method_context_t hctx, bufferlist *in, bufferlist *out) return -EINVAL; } - if (op.type != LOCK_EXCLUSIVE && op.type != LOCK_SHARED) { + if (!cls_lock_is_valid(op.type)) { return -EINVAL; } diff --git a/ceph/src/cls/lock/cls_lock_client.cc b/ceph/src/cls/lock/cls_lock_client.cc index 3a3cef367..776fe4889 100644 --- a/ceph/src/cls/lock/cls_lock_client.cc +++ b/ceph/src/cls/lock/cls_lock_client.cc @@ -208,14 +208,19 @@ namespace rados { rados_op->exec("lock", "set_cookie", in); } + void Lock::assert_locked_shared(ObjectOperation *op) + { + assert_locked(op, name, LOCK_SHARED, cookie, tag); + } + void Lock::assert_locked_exclusive(ObjectOperation *op) { assert_locked(op, name, LOCK_EXCLUSIVE, cookie, tag); } - void Lock::assert_locked_shared(ObjectOperation *op) + void Lock::assert_locked_exclusive_ephemeral(ObjectOperation *op) { - assert_locked(op, name, LOCK_SHARED, cookie, tag); + assert_locked(op, name, LOCK_EXCLUSIVE_EPHEMERAL, cookie, tag); } void Lock::lock_shared(ObjectWriteOperation *op) @@ -242,6 +247,18 @@ namespace rados { cookie, tag, description, duration, flags); } + void Lock::lock_exclusive_ephemeral(ObjectWriteOperation *op) + { + lock(op, name, LOCK_EXCLUSIVE_EPHEMERAL, + cookie, tag, description, duration, flags); + } + + int Lock::lock_exclusive_ephemeral(IoCtx *ioctx, const string& oid) + { + return lock(ioctx, oid, name, LOCK_EXCLUSIVE_EPHEMERAL, + cookie, tag, description, duration, flags); + } + void Lock::unlock(ObjectWriteOperation *op) { rados::cls::lock::unlock(op, name, cookie); diff --git a/ceph/src/cls/lock/cls_lock_client.h b/ceph/src/cls/lock/cls_lock_client.h index 7aa06238f..0066dc3c0 100644 --- a/ceph/src/cls/lock/cls_lock_client.h +++ b/ceph/src/cls/lock/cls_lock_client.h @@ -4,6 +4,8 @@ #ifndef CEPH_CLS_LOCK_CLIENT_H #define CEPH_CLS_LOCK_CLIENT_H +#include + #include "cls/lock/cls_lock_types.h" namespace librados { @@ -87,26 +89,53 @@ namespace rados { void set_tag(const std::string& t) { tag = t; } void set_description(const std::string& desc) { description = desc; } void set_duration(const utime_t& e) { duration = e; } - void set_renew(bool renew) { + void set_duration(const ceph::timespan& d) { + duration = utime_t(ceph::real_clock::time_point::min() + d); + } + + void set_may_renew(bool renew) { if (renew) { - flags |= LOCK_FLAG_RENEW; + flags |= LOCK_FLAG_MAY_RENEW; + flags &= ~LOCK_FLAG_MUST_RENEW; // if may then not must } else { - flags &= ~LOCK_FLAG_RENEW; + flags &= ~LOCK_FLAG_MAY_RENEW; + } + } + + void set_must_renew(bool renew) { + if (renew) { + flags |= LOCK_FLAG_MUST_RENEW; + flags &= ~LOCK_FLAG_MAY_RENEW; // if must then not may + } else { + flags &= ~LOCK_FLAG_MUST_RENEW; } } - void assert_locked_exclusive(librados::ObjectOperation *rados_op); void assert_locked_shared(librados::ObjectOperation *rados_op); + void assert_locked_exclusive(librados::ObjectOperation *rados_op); + void assert_locked_exclusive_ephemeral(librados::ObjectOperation *rados_op); /* ObjectWriteOperation */ - void lock_exclusive(librados::ObjectWriteOperation *ioctx); void lock_shared(librados::ObjectWriteOperation *ioctx); + void lock_exclusive(librados::ObjectWriteOperation *ioctx); + + // Be careful when using an exclusive ephemeral lock; it is + // intended strictly for cases when a lock object exists + // solely for a lock in a given process and the object is no + // longer needed when the lock is unlocked or expired, as the + // cls back-end will make an effort to delete it. + void lock_exclusive_ephemeral(librados::ObjectWriteOperation *ioctx); void unlock(librados::ObjectWriteOperation *ioctx); - void break_lock(librados::ObjectWriteOperation *ioctx, const entity_name_t& locker); + void break_lock(librados::ObjectWriteOperation *ioctx, + const entity_name_t& locker); /* IoCtx */ - int lock_exclusive(librados::IoCtx *ioctx, const std::string& oid); int lock_shared(librados::IoCtx *ioctx, const std::string& oid); + int lock_exclusive(librados::IoCtx *ioctx, const std::string& oid); + + // NB: see above comment on exclusive ephemeral locks + int lock_exclusive_ephemeral(librados::IoCtx *ioctx, + const std::string& oid); int unlock(librados::IoCtx *ioctx, const std::string& oid); int break_lock(librados::IoCtx *ioctx, const std::string& oid, const entity_name_t& locker); diff --git a/ceph/src/cls/lock/cls_lock_ops.cc b/ceph/src/cls/lock/cls_lock_ops.cc index 10d005900..96a2b1ae5 100644 --- a/ceph/src/cls/lock/cls_lock_ops.cc +++ b/ceph/src/cls/lock/cls_lock_ops.cc @@ -45,7 +45,7 @@ void cls_lock_lock_op::generate_test_instances(list& o) i->tag = "tag"; i->description = "description"; i->duration = utime_t(5, 0); - i->flags = LOCK_FLAG_RENEW; + i->flags = LOCK_FLAG_MAY_RENEW; o.push_back(i); o.push_back(new cls_lock_lock_op); } diff --git a/ceph/src/cls/lock/cls_lock_ops.h b/ceph/src/cls/lock/cls_lock_ops.h index dbdddfe21..b9388e788 100644 --- a/ceph/src/cls/lock/cls_lock_ops.h +++ b/ceph/src/cls/lock/cls_lock_ops.h @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_CLS_LOCK_OPS_H #define CEPH_CLS_LOCK_OPS_H diff --git a/ceph/src/cls/lock/cls_lock_types.h b/ceph/src/cls/lock/cls_lock_types.h index 36d39c890..5f44126b4 100644 --- a/ceph/src/cls/lock/cls_lock_types.h +++ b/ceph/src/cls/lock/cls_lock_types.h @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_CLS_LOCK_TYPES_H #define CEPH_CLS_LOCK_TYPES_H @@ -7,12 +10,14 @@ #include "msg/msg_types.h" /* lock flags */ -#define LOCK_FLAG_RENEW 0x1 /* idempotent lock acquire */ +#define LOCK_FLAG_MAY_RENEW 0x1 /* idempotent lock acquire */ +#define LOCK_FLAG_MUST_RENEW 0x2 /* lock must already be acquired */ enum ClsLockType { - LOCK_NONE = 0, - LOCK_EXCLUSIVE = 1, - LOCK_SHARED = 2, + LOCK_NONE = 0, + LOCK_EXCLUSIVE = 1, + LOCK_SHARED = 2, + LOCK_EXCLUSIVE_EPHEMERAL = 3, /* lock object is removed @ unlock */ }; static inline const char *cls_lock_type_str(ClsLockType type) @@ -24,11 +29,27 @@ static inline const char *cls_lock_type_str(ClsLockType type) return "exclusive"; case LOCK_SHARED: return "shared"; + case LOCK_EXCLUSIVE_EPHEMERAL: + return "exclusive-ephemeral"; default: return ""; } } +inline bool cls_lock_is_exclusive(ClsLockType type) { + return LOCK_EXCLUSIVE == type || LOCK_EXCLUSIVE_EPHEMERAL == type; +} + +inline bool cls_lock_is_ephemeral(ClsLockType type) { + return LOCK_EXCLUSIVE_EPHEMERAL == type; +} + +inline bool cls_lock_is_valid(ClsLockType type) { + return LOCK_SHARED == type || + LOCK_EXCLUSIVE == type || + LOCK_EXCLUSIVE_EPHEMERAL == type; +} + namespace rados { namespace cls { namespace lock { diff --git a/ceph/src/cls/rgw/cls_rgw.cc b/ceph/src/cls/rgw/cls_rgw.cc index 13b3e92dc..fba47d460 100644 --- a/ceph/src/cls/rgw/cls_rgw.cc +++ b/ceph/src/cls/rgw/cls_rgw.cc @@ -1437,13 +1437,15 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer if (ret < 0) { return ret; } + } + + removing = existed && op.delete_marker; + if (!removing) { ret = other_obj.unlink(); if (ret < 0) { return ret; } } - - removing = existed && op.delete_marker; } else { removing = (existed && !obj.is_delete_marker() && op.delete_marker); } @@ -3758,7 +3760,7 @@ static int rgw_set_bucket_resharding(cls_method_context_t hctx, bufferlist *in, static int rgw_clear_bucket_resharding(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { - cls_rgw_set_bucket_resharding_op op; + cls_rgw_clear_bucket_resharding_op op; bufferlist::iterator in_iter = in->begin(); try { diff --git a/ceph/src/cls/rgw/cls_rgw_client.cc b/ceph/src/cls/rgw/cls_rgw_client.cc index 3c4ed919a..93ef2b522 100644 --- a/ceph/src/cls/rgw/cls_rgw_client.cc +++ b/ceph/src/cls/rgw/cls_rgw_client.cc @@ -92,14 +92,16 @@ bool BucketIndexAioManager::wait_for_completions(int valid_ret_code, return true; } -void cls_rgw_bucket_init(ObjectWriteOperation& o) +// note: currently only called by tesing code +void cls_rgw_bucket_init_index(ObjectWriteOperation& o) { bufferlist in; o.exec(RGW_CLASS, RGW_BUCKET_INIT_INDEX, in); } static bool issue_bucket_index_init_op(librados::IoCtx& io_ctx, - const string& oid, BucketIndexAioManager *manager) { + const string& oid, + BucketIndexAioManager *manager) { bufferlist in; librados::ObjectWriteOperation op; op.create(true); @@ -107,6 +109,15 @@ static bool issue_bucket_index_init_op(librados::IoCtx& io_ctx, return manager->aio_operate(io_ctx, oid, &op); } +static bool issue_bucket_index_clean_op(librados::IoCtx& io_ctx, + const string& oid, + BucketIndexAioManager *manager) { + bufferlist in; + librados::ObjectWriteOperation op; + op.remove(); + return manager->aio_operate(io_ctx, oid, &op); +} + static bool issue_bucket_set_tag_timeout_op(librados::IoCtx& io_ctx, const string& oid, uint64_t timeout, BucketIndexAioManager *manager) { bufferlist in; @@ -126,11 +137,16 @@ int CLSRGWIssueBucketIndexInit::issue_op(int shard_id, const string& oid) void CLSRGWIssueBucketIndexInit::cleanup() { // Do best effort removal - for (map::iterator citer = objs_container.begin(); citer != iter; ++citer) { + for (auto citer = objs_container.begin(); citer != iter; ++citer) { io_ctx.remove(citer->second); } } +int CLSRGWIssueBucketIndexClean::issue_op(int shard_id, const string& oid) +{ + return issue_bucket_index_clean_op(io_ctx, oid, &manager); +} + int CLSRGWIssueSetTagTimeout::issue_op(int shard_id, const string& oid) { return issue_bucket_set_tag_timeout_op(io_ctx, oid, tag_timeout, &manager); @@ -956,4 +972,3 @@ int CLSRGWIssueSetBucketResharding::issue_op(int shard_id, const string& oid) { return issue_set_bucket_resharding(io_ctx, oid, entry, &manager); } - diff --git a/ceph/src/cls/rgw/cls_rgw_client.h b/ceph/src/cls/rgw/cls_rgw_client.h index c4ab0f648..97a950cf0 100644 --- a/ceph/src/cls/rgw/cls_rgw_client.h +++ b/ceph/src/cls/rgw/cls_rgw_client.h @@ -1,3 +1,6 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_CLS_RGW_CLIENT_H #define CEPH_CLS_RGW_CLIENT_H @@ -230,7 +233,7 @@ public: }; /* bucket index */ -void cls_rgw_bucket_init(librados::ObjectWriteOperation& o); +void cls_rgw_bucket_init_index(librados::ObjectWriteOperation& o); class CLSRGWConcurrentIO { protected: @@ -252,9 +255,15 @@ protected: virtual void reset_container(map& objs) {} public: - CLSRGWConcurrentIO(librados::IoCtx& ioc, map& _objs_container, - uint32_t _max_aio) : io_ctx(ioc), objs_container(_objs_container), max_aio(_max_aio) {} - virtual ~CLSRGWConcurrentIO() {} + + CLSRGWConcurrentIO(librados::IoCtx& ioc, + map& _objs_container, + uint32_t _max_aio) : + io_ctx(ioc), objs_container(_objs_container), max_aio(_max_aio) + {} + + virtual ~CLSRGWConcurrentIO() + {} int operator()() { int ret = 0; @@ -305,6 +314,23 @@ public: CLSRGWConcurrentIO(ioc, _bucket_objs, _max_aio) {} }; + +class CLSRGWIssueBucketIndexClean : public CLSRGWConcurrentIO { +protected: + int issue_op(int shard_id, const string& oid) override; + int valid_ret_code() override { + return -ENOENT; + } + +public: + CLSRGWIssueBucketIndexClean(librados::IoCtx& ioc, + map& _bucket_objs, + uint32_t _max_aio) : + CLSRGWConcurrentIO(ioc, _bucket_objs, _max_aio) + {} +}; + + class CLSRGWIssueSetTagTimeout : public CLSRGWConcurrentIO { uint64_t tag_timeout; protected: @@ -536,7 +562,7 @@ int cls_rgw_reshard_get(librados::IoCtx& io_ctx, const string& oid, cls_rgw_resh int cls_rgw_reshard_get_head(librados::IoCtx& io_ctx, const string& oid, cls_rgw_reshard_entry& entry); void cls_rgw_reshard_remove(librados::ObjectWriteOperation& op, const cls_rgw_reshard_entry& entry); -/* resharding attribute */ +/* resharding attribute on bucket index shard headers */ int cls_rgw_set_bucket_resharding(librados::IoCtx& io_ctx, const string& oid, const cls_rgw_bucket_instance_entry& entry); int cls_rgw_clear_bucket_resharding(librados::IoCtx& io_ctx, const string& oid); diff --git a/ceph/src/cls/rgw/cls_rgw_types.h b/ceph/src/cls/rgw/cls_rgw_types.h index 51107c325..baa61c9fb 100644 --- a/ceph/src/cls/rgw/cls_rgw_types.h +++ b/ceph/src/cls/rgw/cls_rgw_types.h @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_CLS_RGW_TYPES_H #define CEPH_CLS_RGW_TYPES_H @@ -609,6 +612,24 @@ enum cls_rgw_reshard_status { CLS_RGW_RESHARD_DONE = 2, }; +static inline std::string to_string(const enum cls_rgw_reshard_status status) +{ + switch (status) { + case CLS_RGW_RESHARD_NONE: + return "CLS_RGW_RESHARD_NONE"; + break; + case CLS_RGW_RESHARD_IN_PROGRESS: + return "CLS_RGW_RESHARD_IN_PROGRESS"; + break; + case CLS_RGW_RESHARD_DONE: + return "CLS_RGW_RESHARD_DONE"; + break; + default: + break; + }; + return "Unknown reshard status"; +} + struct cls_rgw_bucket_instance_entry { cls_rgw_reshard_status reshard_status{CLS_RGW_RESHARD_NONE}; string new_bucket_instance_id; diff --git a/ceph/src/common/Cond.h b/ceph/src/common/Cond.h index aa53b60f2..1777827e3 100644 --- a/ceph/src/common/Cond.h +++ b/ceph/src/common/Cond.h @@ -17,105 +17,7 @@ #define CEPH_COND_H #include "include/Context.h" - -class Cond { - // my bits - pthread_cond_t _c; - - Mutex *waiter_mutex; - - // don't allow copying. - void operator=(Cond &C); - Cond(const Cond &C); - - public: - Cond() : waiter_mutex(NULL) { - int r = pthread_cond_init(&_c,NULL); - assert(r == 0); - } - virtual ~Cond() { - pthread_cond_destroy(&_c); - } - - int Wait(Mutex &mutex) { - // make sure this cond is used with one mutex only - assert(waiter_mutex == NULL || waiter_mutex == &mutex); - waiter_mutex = &mutex; - - assert(mutex.is_locked()); - - mutex._pre_unlock(); - int r = pthread_cond_wait(&_c, &mutex._m); - mutex._post_lock(); - return r; - } - - int WaitUntil(Mutex &mutex, utime_t when) { - // make sure this cond is used with one mutex only - assert(waiter_mutex == NULL || waiter_mutex == &mutex); - waiter_mutex = &mutex; - - assert(mutex.is_locked()); - - struct timespec ts; - when.to_timespec(&ts); - - mutex._pre_unlock(); - int r = pthread_cond_timedwait(&_c, &mutex._m, &ts); - mutex._post_lock(); - - return r; - } - - int WaitInterval(Mutex &mutex, utime_t interval) { - utime_t when = ceph_clock_now(); - when += interval; - return WaitUntil(mutex, when); - } - - template - int WaitInterval(Mutex &mutex, Duration interval) { - ceph::real_time when(ceph::real_clock::now()); - when += interval; - - struct timespec ts = ceph::real_clock::to_timespec(when); - - mutex._pre_unlock(); - int r = pthread_cond_timedwait(&_c, &mutex._m, &ts); - mutex._post_lock(); - - return r; - } - - int SloppySignal() { - int r = pthread_cond_broadcast(&_c); - return r; - } - int Signal() { - // make sure signaler is holding the waiter's lock. - assert(waiter_mutex == NULL || - waiter_mutex->is_locked()); - - int r = pthread_cond_broadcast(&_c); - return r; - } - int SignalOne() { - // make sure signaler is holding the waiter's lock. - assert(waiter_mutex == NULL || - waiter_mutex->is_locked()); - - int r = pthread_cond_signal(&_c); - return r; - } - int SignalAll() { - // make sure signaler is holding the waiter's lock. - assert(waiter_mutex == NULL || - waiter_mutex->is_locked()); - - int r = pthread_cond_broadcast(&_c); - return r; - } -}; +#include "CondVar.h" /** * context to signal a cond diff --git a/ceph/src/common/CondVar.h b/ceph/src/common/CondVar.h new file mode 100644 index 000000000..c193b9988 --- /dev/null +++ b/ceph/src/common/CondVar.h @@ -0,0 +1,109 @@ +#ifndef CEPH_COND_VAR_H +#define CEPH_COND_VAR_H + +#include "include/utime.h" + +#include "Clock.h" +#include "Mutex.h" +#include "pthread.h" + +class Cond { + // my bits + pthread_cond_t _c; + + Mutex *waiter_mutex; + + // don't allow copying. + void operator=(Cond &C); + Cond(const Cond &C); + + public: + Cond() : waiter_mutex(NULL) { + int r = pthread_cond_init(&_c,NULL); + assert(r == 0); + } + virtual ~Cond() { + pthread_cond_destroy(&_c); + } + + int Wait(Mutex &mutex) { + // make sure this cond is used with one mutex only + assert(waiter_mutex == NULL || waiter_mutex == &mutex); + waiter_mutex = &mutex; + + assert(mutex.is_locked()); + + mutex._pre_unlock(); + int r = pthread_cond_wait(&_c, &mutex._m); + mutex._post_lock(); + return r; + } + + int WaitUntil(Mutex &mutex, utime_t when) { + // make sure this cond is used with one mutex only + assert(waiter_mutex == NULL || waiter_mutex == &mutex); + waiter_mutex = &mutex; + + assert(mutex.is_locked()); + + struct timespec ts; + when.to_timespec(&ts); + + mutex._pre_unlock(); + int r = pthread_cond_timedwait(&_c, &mutex._m, &ts); + mutex._post_lock(); + + return r; + } + + int WaitInterval(Mutex &mutex, utime_t interval) { + utime_t when = ceph_clock_now(); + when += interval; + return WaitUntil(mutex, when); + } + + template + int WaitInterval(Mutex &mutex, Duration interval) { + ceph::real_time when(ceph::real_clock::now()); + when += interval; + + struct timespec ts = ceph::real_clock::to_timespec(when); + + mutex._pre_unlock(); + int r = pthread_cond_timedwait(&_c, &mutex._m, &ts); + mutex._post_lock(); + + return r; + } + + int SloppySignal() { + int r = pthread_cond_broadcast(&_c); + return r; + } + int Signal() { + // make sure signaler is holding the waiter's lock. + assert(waiter_mutex == NULL || + waiter_mutex->is_locked()); + + int r = pthread_cond_broadcast(&_c); + return r; + } + int SignalOne() { + // make sure signaler is holding the waiter's lock. + assert(waiter_mutex == NULL || + waiter_mutex->is_locked()); + + int r = pthread_cond_signal(&_c); + return r; + } + int SignalAll() { + // make sure signaler is holding the waiter's lock. + assert(waiter_mutex == NULL || + waiter_mutex->is_locked()); + + int r = pthread_cond_broadcast(&_c); + return r; + } +}; + +#endif // CEPH_COND_VAR_H diff --git a/ceph/src/common/TrackedOp.cc b/ceph/src/common/TrackedOp.cc index 4ed2fa48b..788b29744 100644 --- a/ceph/src/common/TrackedOp.cc +++ b/ceph/src/common/TrackedOp.cc @@ -342,8 +342,10 @@ bool OpTracker::check_ops_in_flight(std::vector &warning_vector, int *sl while (i != sdata->ops_in_flight_sharded.end() && i->get_initiated() < too_old) { - if (!i->warn_interval_multiplier) + if (!i->warn_interval_multiplier) { + ++i; continue; + } (*slow)++; diff --git a/ceph/src/common/WeightedPriorityQueue.h b/ceph/src/common/WeightedPriorityQueue.h index 64ac120bf..fa463b4c4 100644 --- a/ceph/src/common/WeightedPriorityQueue.h +++ b/ceph/src/common/WeightedPriorityQueue.h @@ -67,8 +67,11 @@ class WeightedPriorityQueue : public OpQueue K key; // klass ListPairs lp; Klass(K& k) : - key(k) - {} + key(k) { + } + ~Klass() { + lp.clear_and_dispose(DelItem()); + } friend bool operator< (const Klass &a, const Klass &b) { return a.key < b.key; } friend bool operator> (const Klass &a, const Klass &b) @@ -129,8 +132,11 @@ class WeightedPriorityQueue : public OpQueue Kit next; SubQueue(unsigned& p) : key(p), - next(klasses.begin()) - {} + next(klasses.begin()) { + } + ~SubQueue() { + klasses.clear_and_dispose(DelItem()); + } friend bool operator< (const SubQueue &a, const SubQueue &b) { return a.key < b.key; } friend bool operator> (const SubQueue &a, const SubQueue &b) @@ -195,8 +201,11 @@ class WeightedPriorityQueue : public OpQueue Queue() : total_prio(0), max_cost(0), - size(0) - {} + size(0) { + } + ~Queue() { + queues.clear_and_dispose(DelItem()); + } bool empty() const { return !size; } diff --git a/ceph/src/common/buffer.cc b/ceph/src/common/buffer.cc index 09dcc67b2..cf63639a0 100644 --- a/ceph/src/common/buffer.cc +++ b/ceph/src/common/buffer.cc @@ -1723,6 +1723,32 @@ static std::atomic_flag buffer_debug_lock = ATOMIC_FLAG_INIT; } } + uint64_t buffer::list::get_wasted_space() const + { + if (_buffers.size() == 1) + return _buffers.back().wasted(); + + std::vector raw_vec; + raw_vec.reserve(_buffers.size()); + for (const auto& p : _buffers) + raw_vec.push_back(p.get_raw()); + std::sort(raw_vec.begin(), raw_vec.end()); + + uint64_t total = 0; + const raw *last = nullptr; + for (const auto r : raw_vec) { + if (r == last) + continue; + last = r; + total += r->len; + } + // If multiple buffers are sharing the same raw buffer and they overlap + // with each other, the wasted space will be underestimated. + if (total <= length()) + return 0; + return total - length(); + } + void buffer::list::rebuild() { if (_len == 0) { diff --git a/ceph/src/common/ceph_context.cc b/ceph/src/common/ceph_context.cc index 0afdc3ac6..87194f7dd 100644 --- a/ceph/src/common/ceph_context.cc +++ b/ceph/src/common/ceph_context.cc @@ -34,7 +34,8 @@ namespace { class LockdepObs : public md_config_obs_t { public: - explicit LockdepObs(CephContext *cct) : m_cct(cct), m_registered(false) { + explicit LockdepObs(CephContext *cct) + : m_cct(cct), m_registered(false), lock("lock_dep_obs", false, true) { } ~LockdepObs() override { if (m_registered) { @@ -49,6 +50,7 @@ public: void handle_conf_change(const md_config_t *conf, const std::set &changed) override { + Mutex::Locker locker(lock); if (conf->lockdep && !m_registered) { lockdep_register_ceph_context(m_cct); m_registered = true; @@ -60,14 +62,17 @@ public: private: CephContext *m_cct; bool m_registered; + Mutex lock; }; class MempoolObs : public md_config_obs_t, public AdminSocketHook { CephContext *cct; + Mutex lock; public: - explicit MempoolObs(CephContext *cct) : cct(cct) { + explicit MempoolObs(CephContext *cct) + : cct(cct), lock("mem_pool_obs", false, true) { cct->_conf->add_observer(this); int r = cct->get_admin_socket()->register_command( "dump_mempools", @@ -92,6 +97,7 @@ public: void handle_conf_change(const md_config_t *conf, const std::set &changed) override { + Mutex::Locker locker(lock); if (changed.count("mempool_debug")) { mempool::set_debug_mode(cct->_conf->mempool_debug); } @@ -184,9 +190,12 @@ private: */ class LogObs : public md_config_obs_t { ceph::logging::Log *log; + Mutex lock; public: - explicit LogObs(ceph::logging::Log *l) : log(l) {} + explicit LogObs(ceph::logging::Log *l) + : log(l), lock("log_obs", false, true) { + } const char** get_tracked_conf_keys() const override { static const char *KEYS[] = { @@ -211,6 +220,7 @@ public: void handle_conf_change(const md_config_t *conf, const std::set &changed) override { + Mutex::Locker locker(lock); // stderr if (changed.count("log_to_stderr") || changed.count("err_to_stderr")) { int l = conf->log_to_stderr ? 99 : (conf->err_to_stderr ? -1 : -2); diff --git a/ceph/src/common/cmdparse.h b/ceph/src/common/cmdparse.h index 41495f555..38d6f98aa 100644 --- a/ceph/src/common/cmdparse.h +++ b/ceph/src/common/cmdparse.h @@ -46,31 +46,74 @@ void handle_bad_get(CephContext *cct, const std::string& k, const char *name); std::string cmd_vartype_stringify(const cmd_vartype& v); +struct bad_cmd_get : public std::exception { + std::string desc; + bad_cmd_get(const std::string& f, const cmdmap_t& cmdmap) { + desc = "bad or missing field '" + f + "'"; + } + const char *what() const throw() override { + return desc.c_str(); + } +}; + template -bool -cmd_getval(CephContext *cct, const cmdmap_t& cmdmap, const std::string& k, T& val) +bool cmd_getval(CephContext *cct, const cmdmap_t& cmdmap, const std::string& k, + T& val) { if (cmdmap.count(k)) { try { val = boost::get(cmdmap.find(k)->second); return true; - } catch (boost::bad_get) { + } catch (boost::bad_get&) { handle_bad_get(cct, k, typeid(T).name()); } } return false; } +template +bool cmd_getval_throws(CephContext *cct, const cmdmap_t& cmdmap, + const std::string& k, T& val) +{ + if (cmdmap.count(k)) { + try { + val = boost::get(cmdmap.find(k)->second); + return true; + } catch (boost::bad_get&) { + throw bad_cmd_get(k, cmdmap); + } + } + return false; +} + // with default template -void -cmd_getval(CephContext *cct, const cmdmap_t& cmdmap, const std::string& k, T& val, const T& defval) +void cmd_getval(CephContext *cct, const cmdmap_t& cmdmap, const std::string& k, + T& val, const T& defval) { if (!cmd_getval(cct, cmdmap, k, val)) val = defval; } +template +bool cmd_getval_throws( + CephContext *cct, const cmdmap_t& cmdmap, const std::string& k, + T& val, const T& defval) +{ + if (cmdmap.count(k)) { + try { + val = boost::get(cmdmap.find(k)->second); + return true; + } catch (boost::bad_get&) { + throw bad_cmd_get(k, cmdmap); + } + } else { + val = defval; + return true; + } +} + template void cmd_putval(CephContext *cct, cmdmap_t& cmdmap, const std::string& k, const T& val) diff --git a/ceph/src/common/config.cc b/ceph/src/common/config.cc index b3a98a595..ef348f95d 100644 --- a/ceph/src/common/config.cc +++ b/ceph/src/common/config.cc @@ -20,6 +20,7 @@ #include "osd/osd_types.h" #include "common/errno.h" #include "common/hostname.h" +#include "common/backport14.h" #include @@ -197,11 +198,16 @@ void md_config_t::add_observer(md_config_obs_t* observer_) obs_map_t::value_type val(*k, observer_); observers.insert(val); } + obs_call_gate.emplace(observer_, ceph::make_unique()); } void md_config_t::remove_observer(md_config_obs_t* observer_) { Mutex::Locker l(lock); + + call_gate_close(observer_); + obs_call_gate.erase(observer_); + bool found_obs = false; for (obs_map_t::iterator o = observers.begin(); o != observers.end(); ) { if (o->second == observer_) { @@ -665,12 +671,21 @@ int md_config_t::parse_injectargs(std::vector& args, void md_config_t::apply_changes(std::ostream *oss) { - Mutex::Locker l(lock); - /* - * apply changes until the cluster name is assigned - */ - if (cluster.size()) - _apply_changes(oss); + rev_obs_map_t rev_obs; + { + Mutex::Locker l(lock); + /* + * apply changes until the cluster name is assigned + */ + if (cluster.size()) { + for_each_change( + oss, [this, &rev_obs](md_config_obs_t *obs, const std::string &key) { + map_observer_changes(obs, key, &rev_obs); + }); + } + } + + call_observers(rev_obs); } bool md_config_t::_internal_field(const string& s) @@ -680,12 +695,8 @@ bool md_config_t::_internal_field(const string& s) return false; } -void md_config_t::_apply_changes(std::ostream *oss) +void md_config_t::for_each_change(std::ostream *oss, config_gather_cb callback) { - /* Maps observers to the configuration options that they care about which - * have changed. */ - typedef std::map < md_config_obs_t*, std::set > rev_obs_map_t; - expand_all_meta(); // expand_all_meta could have modified anything. Copy it all out again. @@ -697,9 +708,6 @@ void md_config_t::_apply_changes(std::ostream *oss) update_legacy_val(option, ptr); } - // create the reverse observer mapping, mapping observers to the set of - // changed keys that they'll get. - rev_obs_map_t robs; std::set empty_set; char buf[128]; char *bufptr = (char*)buf; @@ -717,71 +725,68 @@ void md_config_t::_apply_changes(std::ostream *oss) } } for (obs_map_t::iterator r = range.first; r != range.second; ++r) { - rev_obs_map_t::value_type robs_val(r->second, empty_set); - pair < rev_obs_map_t::iterator, bool > robs_ret(robs.insert(robs_val)); - std::set &keys(robs_ret.first->second); - keys.insert(key); + callback(r->second, key); } } changed.clear(); - - // Make any pending observer callbacks - for (rev_obs_map_t::const_iterator r = robs.begin(); r != robs.end(); ++r) { - md_config_obs_t *obs = r->first; - obs->handle_conf_change(this, r->second); - } - } void md_config_t::call_all_observers() { - std::map > obs; + rev_obs_map_t rev_obs; { Mutex::Locker l(lock); expand_all_meta(); for (auto r = observers.begin(); r != observers.end(); ++r) { - obs[r->second].insert(r->first); + map_observer_changes(r->second, r->first, &rev_obs); } } - for (auto p = obs.begin(); - p != obs.end(); - ++p) { - p->first->handle_conf_change(this, p->second); - } + + call_observers(rev_obs); } int md_config_t::injectargs(const std::string& s, std::ostream *oss) { int ret; - Mutex::Locker l(lock); - char b[s.length()+1]; - strcpy(b, s.c_str()); - std::vector nargs; - char *p = b; - while (*p) { - nargs.push_back(p); - while (*p && *p != ' ') p++; - if (!*p) - break; - *p++ = 0; - while (*p && *p == ' ') p++; - } - ret = parse_injectargs(nargs, oss); - if (!nargs.empty()) { - *oss << " failed to parse arguments: "; - std::string prefix; - for (std::vector::const_iterator i = nargs.begin(); - i != nargs.end(); ++i) { - *oss << prefix << *i; - prefix = ","; + rev_obs_map_t rev_obs; + { + Mutex::Locker l(lock); + + char b[s.length()+1]; + strcpy(b, s.c_str()); + std::vector nargs; + char *p = b; + while (*p) { + nargs.push_back(p); + while (*p && *p != ' ') p++; + if (!*p) + break; + *p++ = 0; + while (*p && *p == ' ') p++; + } + ret = parse_injectargs(nargs, oss); + if (!nargs.empty()) { + *oss << " failed to parse arguments: "; + std::string prefix; + for (std::vector::const_iterator i = nargs.begin(); + i != nargs.end(); ++i) { + *oss << prefix << *i; + prefix = ","; + } + *oss << "\n"; + ret = -EINVAL; } - *oss << "\n"; - ret = -EINVAL; + + for_each_change( + oss, [this, &rev_obs](md_config_obs_t *obs, const std::string &key) { + map_observer_changes(obs, key, &rev_obs); + }); } - _apply_changes(oss); + + call_observers(rev_obs); return ret; } @@ -1389,3 +1394,26 @@ void md_config_t::complain_about_parse_errors(CephContext *cct) ::complain_about_parse_errors(cct, &parse_errors); } +void md_config_t::call_observers(rev_obs_map_t &rev_obs) { + for (auto p : rev_obs) { + p.first->handle_conf_change(this, p.second); + // this can be done outside the lock as call_gate_enter() + // and remove_observer() are serialized via lock + call_gate_leave(p.first); + } +} + +void md_config_t::map_observer_changes(md_config_obs_t *obs, const std::string &key, + rev_obs_map_t *rev_obs) { + ceph_assert(lock.is_locked()); + + auto p = rev_obs->emplace(obs, std::set{}); + + p.first->second.emplace(key); + if (p.second) { + // this needs to be done under lock as once this lock is + // dropped (before calling observers) a remove_observer() + // can sneak in and cause havoc. + call_gate_enter(p.first->first); + } +} diff --git a/ceph/src/common/config.h b/ceph/src/common/config.h index 612f083d8..1145e12e3 100644 --- a/ceph/src/common/config.h +++ b/ceph/src/common/config.h @@ -19,6 +19,7 @@ #include "common/entity_name.h" #include "common/code_environment.h" #include "common/Mutex.h" +#include "common/CondVar.h" #include "log/SubsystemMap.h" #include "common/config_obs.h" #include "common/options.h" @@ -65,6 +66,62 @@ extern const char *CEPH_CONF_FILE_DEFAULT; * while another thread is reading them, either. */ struct md_config_t { +private: + class CallGate { + private: + uint32_t call_count = 0; + Mutex lock; + Cond cond; + public: + CallGate() + : lock("call::gate::lock", false, true) { + } + + void enter() { + Mutex::Locker locker(lock); + ++call_count; + } + void leave() { + Mutex::Locker locker(lock); + ceph_assert(call_count > 0); + if (--call_count == 0) { + cond.Signal(); + } + } + void close() { + Mutex::Locker locker(lock); + while (call_count != 0) { + cond.Wait(lock); + } + } + }; + + void call_gate_enter(md_config_obs_t *obs) { + auto p = obs_call_gate.find(obs); + ceph_assert(p != obs_call_gate.end()); + p->second->enter(); + } + void call_gate_leave(md_config_obs_t *obs) { + auto p = obs_call_gate.find(obs); + ceph_assert(p != obs_call_gate.end()); + p->second->leave(); + } + void call_gate_close(md_config_obs_t *obs) { + auto p = obs_call_gate.find(obs); + ceph_assert(p != obs_call_gate.end()); + p->second->close(); + } + + typedef std::unique_ptr CallGateRef; + std::map obs_call_gate; + + typedef std::map> rev_obs_map_t; + typedef std::function config_gather_cb; + + void call_observers(rev_obs_map_t &rev_obs); + void map_observer_changes(md_config_obs_t *obs, const std::string &key, + rev_obs_map_t *rev_obs); + public: typedef boost::variant' afterwards // max xattr kv pairs size for each dir/file OPTION(mds_max_xattr_pairs_size, OPT_U32) OPTION(mds_max_file_recover, OPT_U32) @@ -440,17 +439,15 @@ OPTION(mds_beacon_interval, OPT_FLOAT) OPTION(mds_beacon_grace, OPT_FLOAT) OPTION(mds_enforce_unique_name, OPT_BOOL) -OPTION(mds_session_timeout, OPT_FLOAT) // cap bits and leases time out if client unresponsive or not returning its caps OPTION(mds_session_blacklist_on_timeout, OPT_BOOL) // whether to blacklist clients whose sessions are dropped due to timeout OPTION(mds_session_blacklist_on_evict, OPT_BOOL) // whether to blacklist clients whose sessions are dropped via admin commands OPTION(mds_sessionmap_keys_per_op, OPT_U32) // how many sessions should I try to load/store in a single OMAP operation? OPTION(mds_recall_state_timeout, OPT_FLOAT) // detect clients which aren't trimming caps OPTION(mds_freeze_tree_timeout, OPT_FLOAT) // detecting freeze tree deadlock -OPTION(mds_session_autoclose, OPT_FLOAT) // autoclose idle session OPTION(mds_health_summarize_threshold, OPT_INT) // collapse N-client health metrics to a single 'many' OPTION(mds_reconnect_timeout, OPT_FLOAT) // seconds to wait for clients during mds restart - // make it (mds_session_timeout - mds_beacon_grace) + // make it (mdsmap.session_timeout - mds_beacon_grace) OPTION(mds_tick_interval, OPT_FLOAT) OPTION(mds_dirstat_min_interval, OPT_FLOAT) // try to avoid propagating more often than this OPTION(mds_scatter_nudge_interval, OPT_FLOAT) // how quickly dirstat changes propagate up the hierarchy @@ -467,7 +464,6 @@ OPTION(mds_bal_export_pin, OPT_BOOL) // allow clients to pin directory trees to OPTION(mds_bal_sample_interval, OPT_DOUBLE) // every 3 seconds OPTION(mds_bal_replicate_threshold, OPT_FLOAT) OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT) -OPTION(mds_bal_frag, OPT_BOOL) OPTION(mds_bal_split_size, OPT_INT) OPTION(mds_bal_split_rd, OPT_FLOAT) OPTION(mds_bal_split_wr, OPT_FLOAT) @@ -665,7 +661,8 @@ OPTION(osd_peering_wq_threads, OPT_INT) OPTION(osd_peering_wq_batch_size, OPT_U64) OPTION(osd_op_pq_max_tokens_per_priority, OPT_U64) OPTION(osd_op_pq_min_cost, OPT_U64) -OPTION(osd_disk_threads, OPT_INT) +OPTION(osd_remove_threads, OPT_INT) +OPTION(osd_recovery_threads, OPT_INT) OPTION(osd_disk_thread_ioprio_class, OPT_STR) // rt realtime be best effort idle OPTION(osd_disk_thread_ioprio_priority, OPT_INT) // 0-7 OPTION(osd_recover_clone_overlap, OPT_BOOL) // preserve clone_overlap during recovery/migration @@ -847,6 +844,7 @@ OPTION(osd_op_history_duration, OPT_U32) // Oldest completed op to track OPTION(osd_op_history_slow_op_size, OPT_U32) // Max number of slow ops to track OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE) // track the op if over this threshold OPTION(osd_target_transaction_size, OPT_INT) // to adjust various transactions that batch smaller items +OPTION(osd_delete_sleep, OPT_FLOAT) // seconds to sleep between removal transactions OPTION(osd_failsafe_full_ratio, OPT_FLOAT) // what % full makes an OSD "full" (failsafe) OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL) // immediately mark OSDs as down once they refuse to accept connections @@ -1003,6 +1001,9 @@ OPTION(bluestore_bluefs_max_ratio, OPT_FLOAT) // max fs free / total free OPTION(bluestore_bluefs_gift_ratio, OPT_FLOAT) // how much to add at a time OPTION(bluestore_bluefs_reclaim_ratio, OPT_FLOAT) // how much to reclaim at a time OPTION(bluestore_bluefs_balance_interval, OPT_FLOAT) // how often (sec) to balance free space between bluefs and bluestore +// how often (sec) to dump allocation failure happened during bluefs rebalance +OPTION(bluestore_bluefs_balance_failure_dump_interval, OPT_FLOAT) + // If you want to use spdk driver, you need to specify NVMe serial number here // with "spdk:" prefix. // Users can use 'lspci -vvv -d 8086:0953 | grep "Device Serial Number"' to @@ -1031,6 +1032,7 @@ OPTION(bluestore_block_preallocate_file, OPT_BOOL) //whether preallocate space i OPTION(bluestore_csum_type, OPT_STR) // none|xxhash32|xxhash64|crc32c|crc32c_16|crc32c_8 OPTION(bluestore_csum_min_block, OPT_U32) OPTION(bluestore_csum_max_block, OPT_U32) +OPTION(bluestore_retry_disk_reads, OPT_U64) OPTION(bluestore_min_alloc_size, OPT_U32) OPTION(bluestore_min_alloc_size_hdd, OPT_U32) OPTION(bluestore_min_alloc_size_ssd, OPT_U32) @@ -1124,6 +1126,7 @@ OPTION(bluestore_debug_omit_kv_commit, OPT_BOOL) OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL) OPTION(bluestore_shard_finishers, OPT_BOOL) OPTION(bluestore_debug_random_read_err, OPT_DOUBLE) +OPTION(bluestore_debug_inject_csum_err_probability, OPT_FLOAT) OPTION(kstore_max_ops, OPT_U64) OPTION(kstore_max_bytes, OPT_U64) @@ -1540,6 +1543,7 @@ OPTION(rgw_shard_warning_threshold, OPT_DOUBLE) // pct of safe max OPTION(rgw_swift_versioning_enabled, OPT_BOOL) // whether swift object versioning feature is enabled +OPTION(rgw_trust_forwarded_https, OPT_BOOL) // trust Forwarded and X-Forwarded-Proto headers for ssl termination OPTION(rgw_crypt_require_ssl, OPT_BOOL) // requests including encryption key headers must be sent over ssl OPTION(rgw_crypt_default_encryption_key, OPT_STR) // base64 encoded key for encryption of rgw objects OPTION(rgw_crypt_s3_kms_encryption_keys, OPT_STR) // extra keys that may be used for aws:kms diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc index ff3bb1a1b..231a7651b 100644 --- a/ceph/src/common/options.cc +++ b/ceph/src/common/options.cc @@ -898,7 +898,7 @@ std::vector