From a8e162989c7c62b2dc22e83129e3cbdaba113a4d Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Fri, 12 Apr 2019 11:40:44 +0200 Subject: [PATCH] import ceph 12.2.12 Signed-off-by: Thomas Lamprecht --- Makefile | 2 +- ceph/CMakeLists.txt | 2 +- ceph/PendingReleaseNotes | 31 +- ceph/admin/doc-requirements.txt | 2 +- ceph/alpine/APKBUILD | 6 +- ceph/ceph.spec | 10 +- ceph/ceph.spec.in | 4 +- ceph/changelog.upstream | 6 + ceph/doc/api/libcephfs-java.rst | 9 + ceph/doc/ceph-volume/simple/scan.rst | 11 + ceph/doc/man/8/ceph-volume.rst | 6 +- ceph/doc/man/8/ceph.rst | 10 + ceph/doc/man/8/rbdmap.rst | 9 +- .../rados/configuration/mon-config-ref.rst | 2 +- .../rados/configuration/osd-config-ref.rst | 18 +- ceph/doc/rados/operations/health-checks.rst | 2 +- .../troubleshooting/troubleshooting-mon.rst | 4 +- ceph/doc/radosgw/index.rst | 1 + ceph/doc/radosgw/placement.rst | 180 +++ ceph/doc/radosgw/s3/authentication.rst | 121 ++ .../objectstore-ec/bluestore-bitmap.yaml | 1 + .../tasks/cfuse_workunit_suites_fsstress.yaml | 1 + ceph/qa/objectstore/bluestore-bitmap.yaml | 4 + .../bluestore-stupid.yaml} | 1 + ceph/qa/objectstore/bluestore.yaml | 38 - .../objectstore_cephfs/bluestore-bitmap.yaml | 1 + ceph/qa/objectstore_cephfs/bluestore.yaml | 1 - ceph/qa/overrides/short_pg_log.yaml | 4 +- ceph/qa/packages/packages.yaml | 4 - ceph/qa/run-standalone.sh | 2 + ceph/qa/standalone/ceph-helpers.sh | 16 +- ceph/qa/standalone/osd/osd-backfill-prio.sh | 504 ++++++ ceph/qa/standalone/osd/osd-markdown.sh | 5 +- ceph/qa/standalone/osd/osd-recovery-prio.sh | 500 ++++++ ceph/qa/standalone/scrub/osd-scrub-repair.sh | 89 +- .../special/ceph_objectstore_tool.py | 47 +- .../basic/objectstore/bluestore-bitmap.yaml | 1 + .../basic/objectstore/bluestore.yaml | 1 - .../objectstore/bluestore-bitmap.yaml | 1 + .../objectstore/bluestore.yaml | 1 - .../tasks/libcephfs_java.yaml | 14 - .../objectstore/bluestore-bitmap.yaml | 1 + .../objectstore/bluestore.yaml | 1 - .../client_trim_caps/tasks/trim-i22073.yaml | 1 - .../suites/fs/verify/validater/valgrind.yaml | 11 +- .../powercycle/osd/whitelist_health.yaml | 1 + .../monthrash/workloads/rados_api_tests.yaml | 1 + ceph/qa/suites/rados/rest/rest_test.yaml | 1 + .../objectstore/bluestore-bitmap.yaml | 1 + .../objectstore/bluestore.yaml | 1 - .../all/osd-recovery-incomplete.yaml | 1 + .../rados/singleton/all/osd-recovery.yaml | 2 +- .../rados/singleton/all/thrash-eio.yaml | 1 + .../bluestore-bitmap.yaml | 1 + .../bluestore.yaml | 1 - .../rados/verify/tasks/rados_api_tests.yaml | 1 + .../rbd_python_api_tests_old_format.yaml | 1 + .../objectstore/bluestore-bitmap.yaml | 1 + .../objectstore/bluestore.yaml | 1 - ceph/qa/suites/rgw/multisite/overrides.yaml | 2 + .../basic/objectstore/bluestore-bitmap.yaml | 1 + .../smoke/basic/objectstore/bluestore.yaml | 1 - .../objectstore/bluestore-bitmap.yaml | 1 + .../stress-split/objectstore/bluestore.yaml | 1 - ceph/qa/tasks/cephfs/test_client_limits.py | 38 +- ceph/qa/tasks/cephfs/test_misc.py | 3 + ceph/qa/tasks/radosbench.py | 4 +- ceph/qa/valgrind.supp | 622 ++++++++ ceph/qa/workunits/cephtool/test.sh | 8 +- ceph/qa/workunits/libcephfs-java/test.sh | 39 - .../workunits/rados/test_health_warnings.sh | 1 + ceph/src/.git_version | 4 +- ceph/src/CMakeLists.txt | 2 +- ceph/src/auth/Crypto.cc | 5 +- ceph/src/ceph-disk/run-tox.sh | 2 +- .../ceph_volume/devices/simple/activate.py | 34 +- .../ceph_volume/devices/simple/scan.py | 54 +- .../ceph_volume/systemd/systemctl.py | 23 + .../tests/devices/simple/test_activate.py | 20 + .../tests/devices/simple/test_scan.py | 8 - .../tests/functional/batch/tox.ini | 8 +- .../lvm/centos7/bluestore/dmcrypt/test.yml | 5 + .../lvm/centos7/filestore/dmcrypt/test.yml | 5 + .../lvm/playbooks/test_bluestore.yml | 5 + .../lvm/playbooks/test_filestore.yml | 5 + .../ceph_volume/tests/functional/lvm/tox.ini | 8 +- .../lvm/xenial/bluestore/dmcrypt/test.yml | 5 + .../lvm/xenial/filestore/dmcrypt/test.yml | 5 + .../centos7/bluestore/dmcrypt-luks/test.yml | 22 +- .../centos7/filestore/activate/test.yml | 4 +- .../tests/functional/simple/tox.ini | 6 +- .../simple/xenial/filestore/activate/test.yml | 22 +- .../tests/functional/tests/__init__.py | 0 .../tests/functional/tests/conftest.py | 103 ++ .../tests/functional/tests/osd/__init__.py | 0 .../tests/functional/tests/osd/test_osds.py | 60 + .../tests/systemd/test_systemctl.py | 21 + .../ceph_volume/tests/util/test_device.py | 36 + .../ceph_volume/tests/util/test_disk.py | 22 - .../ceph-volume/ceph_volume/util/device.py | 15 +- ceph/src/ceph-volume/ceph_volume/util/disk.py | 4 - ceph/src/ceph-volume/tox.ini | 2 +- ceph/src/ceph.in | 21 +- ceph/src/client/Client.cc | 153 +- ceph/src/client/Client.h | 7 +- ceph/src/common/AsyncReserver.h | 72 + ceph/src/common/ceph_crypto.cc | 119 ++ ceph/src/common/ceph_crypto.h | 18 +- ceph/src/common/ceph_timer.h | 2 + ceph/src/common/legacy_config_opts.h | 2 +- ceph/src/common/options.cc | 73 +- ceph/src/common/str_map.cc | 20 +- ceph/src/crush/CrushWrapper.cc | 103 +- ceph/src/crush/CrushWrapper.h | 16 +- ceph/src/journal/Journaler.cc | 4 +- ceph/src/log/test.cc | 20 +- ceph/src/mds/Beacon.cc | 53 +- ceph/src/mds/Beacon.h | 2 +- ceph/src/mds/CInode.cc | 10 +- ceph/src/mds/Capability.cc | 82 +- ceph/src/mds/Capability.h | 134 +- ceph/src/mds/Locker.cc | 236 +-- ceph/src/mds/Locker.h | 6 +- ceph/src/mds/MDBalancer.cc | 2 +- ceph/src/mds/MDCache.cc | 295 ++-- ceph/src/mds/MDCache.h | 41 +- ceph/src/mds/MDSDaemon.cc | 7 +- ceph/src/mds/MDSDaemon.h | 2 +- ceph/src/mds/MDSRank.cc | 137 +- ceph/src/mds/MDSRank.h | 6 +- ceph/src/mds/Migrator.cc | 5 +- ceph/src/mds/Server.cc | 308 ++-- ceph/src/mds/Server.h | 16 +- ceph/src/mds/SessionMap.cc | 108 +- ceph/src/mds/SessionMap.h | 119 +- ceph/src/mds/SimpleLock.h | 5 +- ceph/src/messages/MMDSFragmentNotify.h | 39 +- ceph/src/messages/MMDSFragmentNotifyAck.h | 57 + ceph/src/mgr/ActivePyModules.cc | 12 +- ceph/src/mgr/BaseMgrModule.cc | 13 + ceph/src/mgr/DaemonServer.cc | 3 + ceph/src/mon/AuthMonitor.cc | 46 +- ceph/src/mon/AuthMonitor.h | 15 +- ceph/src/mon/CMakeLists.txt | 1 + ceph/src/mon/FSCommands.cc | 10 + ceph/src/mon/MDSMonitor.cc | 7 +- ceph/src/mon/MgrStatMonitor.cc | 4 +- ceph/src/mon/PGMap.cc | 108 +- ceph/src/msg/Message.cc | 5 + ceph/src/msg/Message.h | 1 + ceph/src/msg/async/AsyncConnection.cc | 27 +- ceph/src/msg/async/AsyncConnection.h | 1 - ceph/src/msg/async/EventEpoll.cc | 14 +- ceph/src/msg/msg_types.h | 32 +- ceph/src/msg/simple/Pipe.cc | 6 +- ceph/src/os/CMakeLists.txt | 4 +- ceph/src/os/bluestore/Allocator.cc | 13 +- ceph/src/os/bluestore/Allocator.h | 17 +- ceph/src/os/bluestore/BitAllocator.cc | 1420 ----------------- ceph/src/os/bluestore/BitAllocator.h | 569 ------- ceph/src/os/bluestore/BitMapAllocator.cc | 220 --- ceph/src/os/bluestore/BitMapAllocator.h | 50 - ceph/src/os/bluestore/BitmapAllocator.cc | 101 ++ ceph/src/os/bluestore/BitmapAllocator.h | 50 + ceph/src/os/bluestore/BlockDevice.h | 2 +- ceph/src/os/bluestore/BlueFS.cc | 49 +- ceph/src/os/bluestore/BlueFS.h | 2 +- ceph/src/os/bluestore/BlueStore.cc | 167 +- ceph/src/os/bluestore/BlueStore.h | 11 +- ceph/src/os/bluestore/KernelDevice.cc | 6 +- ceph/src/os/bluestore/StupidAllocator.cc | 84 +- ceph/src/os/bluestore/StupidAllocator.h | 9 +- ceph/src/os/bluestore/bluefs_types.h | 7 +- ceph/src/os/bluestore/bluestore_types.cc | 30 +- ceph/src/os/bluestore/bluestore_types.h | 99 +- .../os/bluestore/fastbmap_allocator_impl.cc | 544 +++++++ .../os/bluestore/fastbmap_allocator_impl.h | 774 +++++++++ ceph/src/os/filestore/FileStore.cc | 26 +- ceph/src/os/filestore/LFNIndex.cc | 10 +- ceph/src/os/filestore/WBThrottle.cc | 9 +- ceph/src/osd/ECBackend.cc | 5 +- ceph/src/osd/OSD.cc | 99 +- ceph/src/osd/OSDMap.cc | 644 +++++--- ceph/src/osd/OSDMap.h | 1 + ceph/src/osd/PG.cc | 77 +- ceph/src/osd/PG.h | 23 +- ceph/src/osd/PrimaryLogPG.cc | 11 +- ceph/src/osd/osd_types.h | 7 +- ceph/src/osdc/Objecter.cc | 13 +- ceph/src/pybind/mgr/balancer/module.py | 45 +- ceph/src/pybind/mgr/dashboard/module.py | 15 + ceph/src/pybind/mgr/prometheus/module.py | 44 +- ceph/src/pybind/mgr/restful/api/crush.py | 6 +- ceph/src/pybind/mgr/restful/common.py | 54 +- ceph/src/pybind/mgr/restful/module.py | 7 +- ceph/src/rbdmap | 57 +- ceph/src/rgw/CMakeLists.txt | 8 +- ceph/src/rgw/rgw_admin.cc | 23 +- ceph/src/rgw/rgw_auth_s3.cc | 34 +- ceph/src/rgw/rgw_auth_s3.h | 18 +- ceph/src/rgw/rgw_bucket.cc | 51 +- ceph/src/rgw/rgw_common.cc | 3 - ceph/src/rgw/rgw_common.h | 28 +- ceph/src/rgw/rgw_cr_rados.cc | 4 +- ceph/src/rgw/rgw_cr_rest.h | 87 +- ceph/src/rgw/rgw_crypt.cc | 103 +- ceph/src/rgw/rgw_crypt.h | 4 + ceph/src/rgw/rgw_data_sync.cc | 83 +- ceph/src/rgw/rgw_data_sync.h | 8 +- ceph/src/rgw/rgw_es_query.cc | 34 +- ceph/src/rgw/rgw_file.h | 27 +- ceph/src/rgw/rgw_gc.cc | 1 + ceph/src/rgw/rgw_iam_policy.h | 2 + ceph/src/rgw/rgw_ldap.cc | 2 +- ceph/src/rgw/rgw_loadgen.cc | 1 + ceph/src/rgw/rgw_op.cc | 7 +- ceph/src/rgw/rgw_rados.cc | 178 ++- ceph/src/rgw/rgw_rados.h | 10 +- ceph/src/rgw/rgw_reshard.cc | 5 + ceph/src/rgw/rgw_rest_client.cc | 2 +- ceph/src/rgw/rgw_rest_conn.h | 42 +- ceph/src/rgw/rgw_rest_s3.cc | 56 +- ceph/src/rgw/rgw_rest_s3.h | 2 + ceph/src/rgw/rgw_sync_module.cc | 4 +- ceph/src/rgw/rgw_sync_module_es.cc | 329 +++- ceph/src/rgw/rgw_sync_module_es.h | 28 + ceph/src/rgw/rgw_sync_module_es_rest.cc | 17 +- ceph/src/test/cli/osdmaptool/upmap-out.t | 13 +- ceph/src/test/cli/osdmaptool/upmap.t | 11 +- ceph/src/test/cli/rbd/help.t | 10 +- ceph/src/test/common/test_str_map.cc | 12 + ceph/src/test/librbd/fsx.cc | 11 +- ceph/src/test/mds/TestSessionFilter.cc | 22 +- ceph/src/test/objectstore/Allocator_bench.cc | 340 ++++ ceph/src/test/objectstore/Allocator_test.cc | 221 +-- .../src/test/objectstore/BitAllocator_test.cc | 593 ------- ceph/src/test/objectstore/CMakeLists.txt | 25 +- .../objectstore/fastbmap_allocator_test.cc | 933 +++++++++++ .../test/objectstore/test_bluestore_types.cc | 18 +- ceph/src/test/osd/TestOSDMap.cc | 372 ++++- ceph/src/test/osd/TestPGLog.cc | 2 + ceph/src/test/rgw/rgw_multi/tests.py | 129 +- ceph/src/test/rgw/rgw_multi/zone_rados.py | 42 +- ceph/src/test/rgw/test_rgw_crypto.cc | 170 +- ceph/src/test/smoke.sh | 11 +- ceph/src/tools/ceph_monstore_tool.cc | 179 +-- ceph/src/tools/ceph_objectstore_tool.cc | 17 + ceph/src/tools/rbd/ArgumentTypes.cc | 4 +- ceph/src/tools/rbd_mirror/ImageReplayer.cc | 4 +- ceph/src/tools/rebuild_mondb.cc | 60 - ceph/src/valgrind.supp | 263 --- 251 files changed, 10165 insertions(+), 5421 deletions(-) create mode 100644 ceph/doc/radosgw/placement.rst create mode 120000 ceph/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml rename ceph/qa/{cephfs/objectstore-ec/bluestore.yaml => objectstore/bluestore-stupid.yaml} (96%) delete mode 100644 ceph/qa/objectstore/bluestore.yaml create mode 120000 ceph/qa/objectstore_cephfs/bluestore-bitmap.yaml delete mode 120000 ceph/qa/objectstore_cephfs/bluestore.yaml create mode 100755 ceph/qa/standalone/osd/osd-backfill-prio.sh create mode 100755 ceph/qa/standalone/osd/osd-recovery-prio.sh create mode 120000 ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore.yaml create mode 120000 ceph/qa/suites/fs/basic_functional/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/fs/basic_functional/objectstore/bluestore.yaml delete mode 100644 ceph/qa/suites/fs/basic_functional/tasks/libcephfs_java.yaml create mode 120000 ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore.yaml create mode 120000 ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore.yaml create mode 120000 ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore.yaml create mode 120000 ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml create mode 120000 ceph/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/smoke/basic/objectstore/bluestore.yaml create mode 120000 ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore-bitmap.yaml delete mode 120000 ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore.yaml create mode 100644 ceph/qa/valgrind.supp delete mode 100755 ceph/qa/workunits/libcephfs-java/test.sh create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/functional/tests/__init__.py create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/functional/tests/conftest.py create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/__init__.py create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/test_osds.py create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/systemd/test_systemctl.py create mode 100644 ceph/src/messages/MMDSFragmentNotifyAck.h delete mode 100644 ceph/src/os/bluestore/BitAllocator.cc delete mode 100644 ceph/src/os/bluestore/BitAllocator.h delete mode 100644 ceph/src/os/bluestore/BitMapAllocator.cc delete mode 100644 ceph/src/os/bluestore/BitMapAllocator.h create mode 100755 ceph/src/os/bluestore/BitmapAllocator.cc create mode 100755 ceph/src/os/bluestore/BitmapAllocator.h create mode 100755 ceph/src/os/bluestore/fastbmap_allocator_impl.cc create mode 100755 ceph/src/os/bluestore/fastbmap_allocator_impl.h create mode 100755 ceph/src/test/objectstore/Allocator_bench.cc delete mode 100644 ceph/src/test/objectstore/BitAllocator_test.cc create mode 100755 ceph/src/test/objectstore/fastbmap_allocator_test.cc delete mode 100644 ceph/src/valgrind.supp diff --git a/Makefile b/Makefile index 582d27904..72c31d182 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ RELEASE=5.3 PACKAGE=ceph -VER=12.2.11 +VER=12.2.12 DEBREL=pve1 SRCDIR=ceph diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt index 5403de8f4..bffe5649b 100644 --- a/ceph/CMakeLists.txt +++ b/ceph/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.11) project(ceph) -set(VERSION 12.2.11) +set(VERSION 12.2.12) if(POLICY CMP0046) # Tweak policies (this one disables "missing" dependency warning) diff --git a/ceph/PendingReleaseNotes b/ceph/PendingReleaseNotes index b75c79fb1..03daee301 100644 --- a/ceph/PendingReleaseNotes +++ b/ceph/PendingReleaseNotes @@ -1,5 +1,12 @@ ->= 12.2.11 ----------- +12.2.12 +------- +* In 12.2.9 and earlier releases, keyring caps were not checked for validity, + so the caps string could be anything. As of 12.2.10, caps strings are + validated and providing a keyring with an invalid caps string to, e.g., + "ceph auth add" will result in an error. + +12.2.11 +------- * `cephfs-journal-tool` makes rank argument (--rank) mandatory. Rank is of format `filesystem:rank`, where `filesystem` is the cephfs filesystem and `rank` is the MDS rank on which the operation is to be executed. To @@ -8,6 +15,26 @@ suffixed dump files. Importing journal information from dump files is disallowed if operation is targetted for all ranks. +* The MDS cache trimming is now throttled. Dropping the MDS cache + via the `ceph tell mds. cache drop` command or large reductions in the + cache size will no longer cause service unavailability. + +* The CephFS MDS behavior with recalling caps has been significantly improved + to not attempt recalling too many caps at once, leading to instability. + MDS with a large cache (64GB+) should be more stable. + +* MDS now provides a config option "mds_max_caps_per_client" (default: 1M) to + limit the number of caps a client session may hold. Long running client + sessions with a large number of caps have been a source of instability in the + MDS when all of these caps need to be processed during certain session + events. It is recommended to not unnecessarily increase this value. + +* The MDS config mds_recall_state_timeout has been removed. Late client recall + warnings are now generated based on the number of caps the MDS has recalled + which have not been released. The new configs mds_recall_warning_threshold + (default: 32K) and mds_recall_warning_decay_rate (default: 60s) sets the + threshold for this warning. + >= 12.1.2 --------- * When running 'df' on a CephFS filesystem comprising exactly one data pool, diff --git a/ceph/admin/doc-requirements.txt b/ceph/admin/doc-requirements.txt index dc1411303..44920d4bc 100644 --- a/ceph/admin/doc-requirements.txt +++ b/ceph/admin/doc-requirements.txt @@ -1,3 +1,3 @@ Sphinx == 1.6.3 -e git+https://github.com/ceph/sphinx-ditaa.git@py3#egg=sphinx-ditaa --e git+https://github.com/michaeljones/breathe#egg=breathe +breathe == 4.11.1 diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD index 220346e45..f29f0428b 100644 --- a/ceph/alpine/APKBUILD +++ b/ceph/alpine/APKBUILD @@ -1,7 +1,7 @@ # Contributor: John Coyle # Maintainer: John Coyle pkgname=ceph -pkgver=12.2.11 +pkgver=12.2.12 pkgrel=0 pkgdesc="Ceph is a distributed object store and file system" pkgusers="ceph" @@ -63,7 +63,7 @@ makedepends=" xmlstarlet yasm " -source="ceph-12.2.11.tar.bz2" +source="ceph-12.2.12.tar.bz2" subpackages=" $pkgname-base $pkgname-common @@ -116,7 +116,7 @@ _sysconfdir=/etc _udevrulesdir=/etc/udev/rules.d _python_sitelib=/usr/lib/python2.7/site-packages -builddir=$srcdir/ceph-12.2.11 +builddir=$srcdir/ceph-12.2.12 build() { export CEPH_BUILD_VIRTUALENV=$builddir diff --git a/ceph/ceph.spec b/ceph/ceph.spec index d10206738..9c42008a1 100644 --- a/ceph/ceph.spec +++ b/ceph/ceph.spec @@ -61,7 +61,7 @@ # main package definition ################################################################################# Name: ceph -Version: 12.2.11 +Version: 12.2.12 Release: 0%{?dist} %if 0%{?fedora} || 0%{?rhel} Epoch: 2 @@ -77,7 +77,7 @@ License: LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and Group: System/Filesystems %endif URL: http://ceph.com/ -Source0: http://ceph.com/download/ceph-12.2.11.tar.bz2 +Source0: http://ceph.com/download/ceph-12.2.12.tar.bz2 %if 0%{?suse_version} %if 0%{?is_opensuse} ExclusiveArch: x86_64 aarch64 ppc64 ppc64le @@ -382,7 +382,7 @@ Summary: Ceph daemon for mirroring RBD images %if 0%{?suse_version} Group: System/Filesystems %endif -Requires: ceph-common = %{_epoch_prefix}%{version}-%{release} +Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} Requires: librados2 = %{_epoch_prefix}%{version}-%{release} %description -n rbd-mirror Daemon for mirroring RBD images between Ceph clusters, streaming @@ -403,7 +403,7 @@ Summary: Rados REST gateway %if 0%{?suse_version} Group: System/Filesystems %endif -Requires: ceph-common = %{_epoch_prefix}%{version}-%{release} +Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} %if 0%{with selinux} Requires: ceph-selinux = %{_epoch_prefix}%{version}-%{release} %endif @@ -788,7 +788,7 @@ python-rbd, python-rgw or python-cephfs instead. # common ################################################################################# %prep -%autosetup -p1 -n ceph-12.2.11 +%autosetup -p1 -n ceph-12.2.12 %build %if 0%{with cephfs_java} diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in index fa34ade2d..aa3c47abe 100644 --- a/ceph/ceph.spec.in +++ b/ceph/ceph.spec.in @@ -382,7 +382,7 @@ Summary: Ceph daemon for mirroring RBD images %if 0%{?suse_version} Group: System/Filesystems %endif -Requires: ceph-common = %{_epoch_prefix}%{version}-%{release} +Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} Requires: librados2 = %{_epoch_prefix}%{version}-%{release} %description -n rbd-mirror Daemon for mirroring RBD images between Ceph clusters, streaming @@ -403,7 +403,7 @@ Summary: Rados REST gateway %if 0%{?suse_version} Group: System/Filesystems %endif -Requires: ceph-common = %{_epoch_prefix}%{version}-%{release} +Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} %if 0%{with selinux} Requires: ceph-selinux = %{_epoch_prefix}%{version}-%{release} %endif diff --git a/ceph/changelog.upstream b/ceph/changelog.upstream index eaed6bfbb..3f8f429e8 100644 --- a/ceph/changelog.upstream +++ b/ceph/changelog.upstream @@ -1,3 +1,9 @@ +ceph (12.2.12-1) stable; urgency=medium + + * New upstream release + + -- Ceph Release Team Thu, 11 Apr 2019 12:33:49 +0000 + ceph (12.2.11-1) stable; urgency=medium * New upstream release diff --git a/ceph/doc/api/libcephfs-java.rst b/ceph/doc/api/libcephfs-java.rst index 85b5c3112..83b5a6638 100644 --- a/ceph/doc/api/libcephfs-java.rst +++ b/ceph/doc/api/libcephfs-java.rst @@ -2,8 +2,17 @@ Libcephfs (JavaDoc) =================== +.. warning:: + + CephFS Java bindings are no longer tested by CI. They may not work properly + or corrupt data. + + Developers interested in reviving these bindings by fixing and writing tests + are encouraged to contribute! + .. The admin/build-docs script runs Ant to build the JavaDoc files, and copies them to api/libcephfs-java/javadoc/. + View the auto-generated `JavaDoc pages for the CephFS Java bindings `_. diff --git a/ceph/doc/ceph-volume/simple/scan.rst b/ceph/doc/ceph-volume/simple/scan.rst index 320fee8fb..2749b14b6 100644 --- a/ceph/doc/ceph-volume/simple/scan.rst +++ b/ceph/doc/ceph-volume/simple/scan.rst @@ -9,6 +9,7 @@ PLAIN formats is fully supported. The command has the ability to inspect a running OSD, by inspecting the directory where the OSD data is stored, or by consuming the data partition. +The command can also scan all running OSDs if no path or device is provided. Once scanned, information will (by default) persist the metadata as JSON in a file in ``/etc/ceph/osd``. This ``JSON`` file will use the naming convention @@ -31,6 +32,16 @@ the contents to ``stdout`` (no file will be written):: .. _ceph-volume-simple-scan-directory: +Running OSDs scan +----------------- +Using this command without providing an OSD directory or device will scan the +directories of any currently running OSDs. If a running OSD was not created +by ceph-disk it will be ignored and not scanned. + +To scan all running ceph-disk OSDs, the command would look like:: + + ceph-volume simple scan + Directory scan -------------- The directory scan will capture OSD file contents from interesting files. There diff --git a/ceph/doc/man/8/ceph-volume.rst b/ceph/doc/man/8/ceph-volume.rst index 9ad5a5237..5b1035ef7 100644 --- a/ceph/doc/man/8/ceph-volume.rst +++ b/ceph/doc/man/8/ceph-volume.rst @@ -280,6 +280,10 @@ directory as well. Optionally, the JSON blob can be sent to stdout for further inspection. +Usage on all running OSDs:: + + ceph-voume simple scan + Usage on data devices:: ceph-volume simple scan @@ -295,7 +299,7 @@ Optional arguments: * [--stdout] Send the JSON blob to stdout * [--force] If the JSON file exists at destination, overwrite it -Required Positional arguments: +Optional Positional arguments: * Actual data partition or a path to the running OSD diff --git a/ceph/doc/man/8/ceph.rst b/ceph/doc/man/8/ceph.rst index 32482a7d2..27efc67d1 100644 --- a/ceph/doc/man/8/ceph.rst +++ b/ceph/doc/man/8/ceph.rst @@ -1465,6 +1465,16 @@ Options reply to outfile. Only specific monitor commands (e.g. osd getmap) return a payload. +.. option:: --setuser user + + will apply the appropriate user ownership to the file specified by + the option '-o'. + +.. option:: --setgroup group + + will apply the appropriate group ownership to the file specified by + the option '-o'. + .. option:: -c ceph.conf, --conf=ceph.conf Use ceph.conf configuration file instead of the default diff --git a/ceph/doc/man/8/rbdmap.rst b/ceph/doc/man/8/rbdmap.rst index ba8001ff8..e6980ab7e 100644 --- a/ceph/doc/man/8/rbdmap.rst +++ b/ceph/doc/man/8/rbdmap.rst @@ -46,6 +46,8 @@ This will cause the script to issue an ``rbd map`` command like the following:: rbd map POOLNAME/IMAGENAME --PARAM1 VAL1 --PARAM2 VAL2 (See the ``rbd`` manpage for a full list of possible options.) +For parameters and values which contain commas or equality signs, a simple +apostrophe can be used to prevent replacing them. When run as ``rbdmap map``, the script parses the configuration file, and for each RBD image specified attempts to first map the image (using the ``rbd map`` @@ -77,11 +79,12 @@ sequence.) Examples ======== -Example ``/etc/ceph/rbdmap`` for two RBD images called "bar1" and "bar2", both -in pool "foopool":: +Example ``/etc/ceph/rbdmap`` for three RBD images called "bar1", "bar2" and "bar3", +which are in pool "foopool":: foopool/bar1 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring foopool/bar2 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring + foopool/bar3 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring,options='lock_on_read,queue_depth=1024' Each line in the file contains two strings: the image spec and the options to be passed to ``rbd map``. These two lines get transformed into the following @@ -89,12 +92,14 @@ commands:: rbd map foopool/bar1 --id admin --keyring /etc/ceph/ceph.client.admin.keyring rbd map foopool/bar2 --id admin --keyring /etc/ceph/ceph.client.admin.keyring + rbd map foopool/bar2 --id admin --keyring /etc/ceph/ceph.client.admin.keyring --options lock_on_read,queue_depth=1024 If the images had XFS filesystems on them, the corresponding ``/etc/fstab`` entries might look like this:: /dev/rbd/foopool/bar1 /mnt/bar1 xfs noauto 0 0 /dev/rbd/foopool/bar2 /mnt/bar2 xfs noauto 0 0 + /dev/rbd/foopool/bar3 /mnt/bar3 xfs noauto 0 0 After creating the images and populating the ``/etc/ceph/rbdmap`` file, making the images get automatically mapped and mounted at boot is just a matter of diff --git a/ceph/doc/rados/configuration/mon-config-ref.rst b/ceph/doc/rados/configuration/mon-config-ref.rst index 6c8e92b17..640e38203 100644 --- a/ceph/doc/rados/configuration/mon-config-ref.rst +++ b/ceph/doc/rados/configuration/mon-config-ref.rst @@ -1193,7 +1193,7 @@ Miscellaneous will be splitted on all OSDs serving that pool. We want to avoid extreme multipliers on PG splits. :Type: Integer -:Default: 300 +:Default: 32 ``mon session timeout`` diff --git a/ceph/doc/rados/configuration/osd-config-ref.rst b/ceph/doc/rados/configuration/osd-config-ref.rst index f839122cf..15b78d583 100644 --- a/ceph/doc/rados/configuration/osd-config-ref.rst +++ b/ceph/doc/rados/configuration/osd-config-ref.rst @@ -850,30 +850,14 @@ Ceph performs well as the OSD map grows larger. :Description: The number of OSD maps to keep cached. :Type: 32-bit Integer -:Default: ``500`` - - -``osd map cache bl size`` - -:Description: The size of the in-memory OSD map cache in OSD daemons. -:Type: 32-bit Integer :Default: ``50`` -``osd map cache bl inc size`` - -:Description: The size of the in-memory OSD map cache incrementals in - OSD daemons. - -:Type: 32-bit Integer -:Default: ``100`` - - ``osd map message max`` :Description: The maximum map entries allowed per MOSDMap message. :Type: 32-bit Integer -:Default: ``100`` +:Default: ``40`` diff --git a/ceph/doc/rados/operations/health-checks.rst b/ceph/doc/rados/operations/health-checks.rst index c1e22004a..e141f6bcd 100644 --- a/ceph/doc/rados/operations/health-checks.rst +++ b/ceph/doc/rados/operations/health-checks.rst @@ -514,7 +514,7 @@ PG_NOT_DEEP_SCRUBBED ____________________ One or more PGs has not been deep scrubbed recently. PGs are normally -scrubbed every ``osd_deep_mon_scrub_interval`` seconds, and this warning +scrubbed every ``osd_deep_scrub_interval`` seconds, and this warning triggers when ``mon_warn_not_deep_scrubbed`` such intervals have elapsed without a scrub. diff --git a/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst b/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst index 642b2e07b..6b3ec15e2 100644 --- a/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst +++ b/ceph/doc/rados/troubleshooting/troubleshooting-mon.rst @@ -460,12 +460,12 @@ Following information are not recoverable using the steps above: using ``ceph-monstore-tool``. But the MDS keyrings and other keyrings are missing in the recovered monitor store. You might need to re-add them manually. -- **pg settings**: the ``full ratio`` and ``nearfull ratio`` settings configured using - ``ceph pg set_full_ratio`` and ``ceph pg set_nearfull_ratio`` will be lost. +- **creating pools**: If any RADOS pools were in the process of being creating, that state is lost. The recovery tool assumes that all pools have been created. If there are PGs that are stuck in the 'unknown' after the recovery for a partially created pool, you can force creation of the *empty* PG with the ``ceph osd force-create-pg`` command. Note that this will create an *empty* PG, so only do this if you know the pool is empty. - **MDS Maps**: the MDS maps are lost. + Everything Failed! Now What? ============================= diff --git a/ceph/doc/radosgw/index.rst b/ceph/doc/radosgw/index.rst index 2e25fdbf1..eac49177f 100644 --- a/ceph/doc/radosgw/index.rst +++ b/ceph/doc/radosgw/index.rst @@ -40,6 +40,7 @@ you may write data with one API and retrieve it with the other. Manual Install w/Civetweb <../../install/install-ceph-gateway> HTTP Frontends + Pool Placement Multisite Configuration Configuring Pools Config Reference diff --git a/ceph/doc/radosgw/placement.rst b/ceph/doc/radosgw/placement.rst new file mode 100644 index 000000000..ce9ecbc2e --- /dev/null +++ b/ceph/doc/radosgw/placement.rst @@ -0,0 +1,180 @@ +============== +Pool Placement +============== + +.. contents:: + +Placement Targets +================= + +.. versionadded:: Jewel + +Placement targets control which `Pools`_ are associated with a particular +bucket. A bucket's placement target is selected on creation, and cannot be +modified. The ``radosgw-admin bucket stats`` command will display its +``placement_rule``. + +The zonegroup configuration contains a list of placement targets with an +initial target named ``default-placement``. The zone configuration then maps +each zonegroup placement target name onto its local storage. This zone +placement information includes the ``index_pool`` name for the bucket index, +the ``data_extra_pool`` name for metadata about incomplete multipart uploads, +and a ``data_pool`` name for object data. + +Zonegroup/Zone Configuration +============================ + +Placement configuration is performed with ``radosgw-admin`` commands on +the zonegroups and zones. + +The zonegroup placement configuration can be queried with: + +:: + + $ radosgw-admin zonegroup get + { + "id": "ab01123f-e0df-4f29-9d71-b44888d67cd5", + "name": "default", + "api_name": "default", + ... + "placement_targets": [ + { + "name": "default-placement", + "tags": [], + } + ], + "default_placement": "default-placement", + ... + } + +The zone placement configuration can be queried with: + +:: + + $ radosgw-admin zone get + { + "id": "557cdcee-3aae-4e9e-85c7-2f86f5eddb1f", + "name": "default", + "domain_root": "default.rgw.meta:root", + ... + "placement_pools": [ + { + "key": "default-placement", + "val": { + "index_pool": "default.rgw.buckets.index", + "data_pool": "default.rgw.buckets.data", + "data_extra_pool": "default.rgw.buckets.non-ec", + "index_type": 0 + } + } + ], + ... + } + +.. note:: If you have not done any previous `Multisite Configuration`_, + a ``default`` zone and zonegroup are created for you, and changes + to the zone/zonegroup will not take effect until the Ceph Object + Gateways are restarted. If you have created a realm for multisite, + the zone/zonegroup changes will take effect once the changes are + committed with ``radosgw-admin period update --commit``. + +Adding a Placement Target +------------------------- + +To create a new placement target named ``temporary``, start by adding it to +the zonegroup: + +:: + + $ radosgw-admin zonegroup placement add \ + --rgw-zonegroup default \ + --placement-id temporary + +Then provide the zone placement info for that target: + +:: + + $ radosgw-admin zone placement add \ + --rgw-zone default \ + --placement-id temporary \ + --data-pool default.rgw.temporary.data \ + --index-pool default.rgw.temporary.index \ + --data-extra-pool default.rgw.temporary.non-ec \ + --compression lz4 + +Customizing Placement +===================== + +Default Placement +----------------- + +By default, new buckets will use the zonegroup's ``default_placement`` target. +This zonegroup setting can be changed with: + +:: + + $ radosgw-admin zonegroup placement default \ + --rgw-zonegroup default \ + --placement-id new-placement + +User Placement +-------------- + +A Ceph Object Gateway user can override the zonegroup's default placement +target by setting a non-empty ``default_placement`` field in the user info. + +:: + + $ radosgw-admin user info --uid testid + { + ... + "default_placement": "", + "placement_tags": [], + ... + } + +If a zonegroup's placement target contains any ``tags``, users will be unable +to create buckets with that placement target unless their user info contains +at least one matching tag in its ``placement_tags`` field. This can be useful +to restrict access to certain types of storage. + +The ``radosgw-admin`` command cannot modify these fields directly, so the json +format must be edited manually: + +:: + + $ radosgw-admin metadata get user: > user.json + $ vi user.json + $ radosgw-admin metadata put user: < user.json + +S3 Bucket Placement +------------------- + +When creating a bucket with the S3 protocol, a placement target can be +provided as part of the LocationConstraint to override the default placement +targets from the user and zonegroup. + +Normally, the LocationConstraint must match the zonegroup's ``api_name``: + +:: + + default + +A custom placement target can be added to the ``api_name`` following a colon: + +:: + + default:new-placement + +Swift Bucket Placement +---------------------- + +When creating a bucket with the Swift protocol, a placement target can be +provided in the HTTP header ``X-Storage-Policy``: + +:: + + X-Storage-Policy: new-placement + +.. _`Pools`: ../pools +.. _`Multisite Configuration`: ../multisite diff --git a/ceph/doc/radosgw/s3/authentication.rst b/ceph/doc/radosgw/s3/authentication.rst index b1875385b..3cdacc495 100644 --- a/ceph/doc/radosgw/s3/authentication.rst +++ b/ceph/doc/radosgw/s3/authentication.rst @@ -71,5 +71,126 @@ an object: | ``FULL_CONTROL`` | Grantee has full permissions for object in the bucket. | Grantee can read or write to the object ACL. | +------------------+--------------------------------------------------------+----------------------------------------------+ +Internally, S3 operations are mapped to ACL permissions thus: + ++---------------------------------------+---------------+ +| Operation | Permission | ++=======================================+===============+ +| ``s3:GetObject`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectTorrent`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectVersion`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectVersionTorrent`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectTagging`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectVersionTagging`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:ListAllMyBuckets`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:ListBucket`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:ListBucketMultipartUploads`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:ListBucketVersions`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:ListMultipartUploadParts`` | ``READ`` | ++---------------------------------------+---------------+ +| ``s3:AbortMultipartUpload`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:CreateBucket`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:DeleteBucket`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:DeleteObject`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:s3DeleteObjectVersion`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:PutObject`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:PutObjectTagging`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:PutObjectVersionTagging`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:DeleteObjectTagging`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:DeleteObjectVersionTagging`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:RestoreObject`` | ``WRITE`` | ++---------------------------------------+---------------+ +| ``s3:GetAccelerateConfiguration`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketAcl`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketCORS`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketLocation`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketLogging`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketNotification`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketPolicy`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketRequestPayment`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketTagging`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketVersioning`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetBucketWebsite`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetLifecycleConfiguration`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectAcl`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetObjectVersionAcl`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:GetReplicationConfiguration`` | ``READ_ACP`` | ++---------------------------------------+---------------+ +| ``s3:DeleteBucketPolicy`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:DeleteBucketWebsite`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:DeleteReplicationConfiguration`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutAccelerateConfiguration`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketAcl`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketCORS`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketLogging`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketNotification`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketPolicy`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketRequestPayment`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketTagging`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutPutBucketVersioning`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutBucketWebsite`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutLifecycleConfiguration`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutObjectAcl`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutObjectVersionAcl`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ +| ``s3:PutReplicationConfiguration`` | ``WRITE_ACP`` | ++---------------------------------------+---------------+ + +Some mappings, (e.g. ``s3:CreateBucket`` to ``WRITE``) are not +applicable to S3 operation, but are required to allow Swift and S3 to +access the same resources when things like Swift user ACLs are in +play. This is one of the many reasons that you should use S3 bucket +policies rather than S3 ACLs when possible. + + .. _RFC 2104: http://www.ietf.org/rfc/rfc2104.txt .. _HMAC: http://en.wikipedia.org/wiki/HMAC diff --git a/ceph/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml b/ceph/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml new file mode 120000 index 000000000..9fb86b9fe --- /dev/null +++ b/ceph/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../../objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml b/ceph/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml index ddb18fb79..bae220292 100644 --- a/ceph/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml +++ b/ceph/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml @@ -1,5 +1,6 @@ tasks: - workunit: + timeout: 6h clients: all: - suites/fsstress.sh diff --git a/ceph/qa/objectstore/bluestore-bitmap.yaml b/ceph/qa/objectstore/bluestore-bitmap.yaml index 88dca3a21..b18e04bee 100644 --- a/ceph/qa/objectstore/bluestore-bitmap.yaml +++ b/ceph/qa/objectstore/bluestore-bitmap.yaml @@ -20,6 +20,8 @@ overrides: osd failsafe full ratio: .95 # this doesn't work with failures bc the log writes are not atomic across the two backends # bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true ceph-deploy: fs: xfs bluestore: yes @@ -36,4 +38,6 @@ overrides: mon osd backfillfull_ratio: .85 mon osd nearfull ratio: .8 osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true diff --git a/ceph/qa/cephfs/objectstore-ec/bluestore.yaml b/ceph/qa/objectstore/bluestore-stupid.yaml similarity index 96% rename from ceph/qa/cephfs/objectstore-ec/bluestore.yaml rename to ceph/qa/objectstore/bluestore-stupid.yaml index 19dfeb036..1d28ccbce 100644 --- a/ceph/qa/cephfs/objectstore-ec/bluestore.yaml +++ b/ceph/qa/objectstore/bluestore-stupid.yaml @@ -12,6 +12,7 @@ overrides: debug bluefs: 20 debug rocksdb: 10 bluestore fsck on mount: true + bluestore allocator: stupid # lower the full ratios since we can fill up a 100gb osd so quickly mon osd full ratio: .9 mon osd backfillfull_ratio: .85 diff --git a/ceph/qa/objectstore/bluestore.yaml b/ceph/qa/objectstore/bluestore.yaml deleted file mode 100644 index 19dfeb036..000000000 --- a/ceph/qa/objectstore/bluestore.yaml +++ /dev/null @@ -1,38 +0,0 @@ -overrides: - thrashosds: - bdev_inject_crash: 2 - bdev_inject_crash_probability: .5 - ceph: - fs: xfs - conf: - osd: - osd objectstore: bluestore - bluestore block size: 96636764160 - debug bluestore: 20 - debug bluefs: 20 - debug rocksdb: 10 - bluestore fsck on mount: true - # lower the full ratios since we can fill up a 100gb osd so quickly - mon osd full ratio: .9 - mon osd backfillfull_ratio: .85 - mon osd nearfull ratio: .8 - osd failsafe full ratio: .95 -# this doesn't work with failures bc the log writes are not atomic across the two backends -# bluestore bluefs env mirror: true - ceph-deploy: - fs: xfs - bluestore: yes - conf: - osd: - osd objectstore: bluestore - bluestore block size: 96636764160 - debug bluestore: 20 - debug bluefs: 20 - debug rocksdb: 10 - bluestore fsck on mount: true - # lower the full ratios since we can fill up a 100gb osd so quickly - mon osd full ratio: .9 - mon osd backfillfull_ratio: .85 - mon osd nearfull ratio: .8 - osd failsafe full ratio: .95 - diff --git a/ceph/qa/objectstore_cephfs/bluestore-bitmap.yaml b/ceph/qa/objectstore_cephfs/bluestore-bitmap.yaml new file mode 120000 index 000000000..951e65ac0 --- /dev/null +++ b/ceph/qa/objectstore_cephfs/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/objectstore_cephfs/bluestore.yaml b/ceph/qa/objectstore_cephfs/bluestore.yaml deleted file mode 120000 index ad17c0eb7..000000000 --- a/ceph/qa/objectstore_cephfs/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/overrides/short_pg_log.yaml b/ceph/qa/overrides/short_pg_log.yaml index 6ac1bca7f..20cc101de 100644 --- a/ceph/qa/overrides/short_pg_log.yaml +++ b/ceph/qa/overrides/short_pg_log.yaml @@ -2,5 +2,5 @@ overrides: ceph: conf: global: - osd_min_pg_log_entries: 300 - osd_max_pg_log_entries: 600 + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 diff --git a/ceph/qa/packages/packages.yaml b/ceph/qa/packages/packages.yaml index 398656450..31fb66aa9 100644 --- a/ceph/qa/packages/packages.yaml +++ b/ceph/qa/packages/packages.yaml @@ -11,8 +11,6 @@ ceph: - python-ceph - libcephfs2 - libcephfs-dev - - libcephfs-java - - libcephfs-jni - librados2 - librbd1 - rbd-fuse @@ -40,8 +38,6 @@ ceph: - ceph - ceph-mgr - ceph-fuse - - cephfs-java - - libcephfs_jni1 - libcephfs2 - libcephfs-devel - librados2 diff --git a/ceph/qa/run-standalone.sh b/ceph/qa/run-standalone.sh index 2c7ceaa34..acb486e1a 100755 --- a/ceph/qa/run-standalone.sh +++ b/ceph/qa/run-standalone.sh @@ -36,6 +36,8 @@ trap finish TERM HUP INT PATH=$(pwd)/bin:$PATH +export LD_LIBRARY_PATH="$(pwd)/lib" + # TODO: Use getops dryrun=false if [[ "$1" = "--dry-run" ]]; then diff --git a/ceph/qa/standalone/ceph-helpers.sh b/ceph/qa/standalone/ceph-helpers.sh index 3883a6f58..9a4bae2a5 100755 --- a/ceph/qa/standalone/ceph-helpers.sh +++ b/ceph/qa/standalone/ceph-helpers.sh @@ -19,7 +19,7 @@ # TIMEOUT=300 PG_NUM=4 -TMPDIR=${TMPDIR:-/tmp} +TMPDIR=${TMPDIR:-${CEPH_BUILD_DIR}} CEPH_BUILD_VIRTUALENV=${TMPDIR} TESTDIR=${TESTDIR:-${TMPDIR}} @@ -389,6 +389,17 @@ function test_kill_daemons() { teardown $dir || return 1 } +# +# return a random TCP port which is not used yet +# +# please note, there could be racing if we use this function for +# a free port, and then try to bind on this port. +# +function get_unused_port() { + local ip=127.0.0.1 + python3 -c "import socket; s=socket.socket(); s.bind(('$ip', 0)); print(s.getsockname()[1]); s.close()" +} + ####################################################################### ## @@ -1411,6 +1422,7 @@ function test_get_timeout_delays() { # @return 0 if the cluster is clean, 1 otherwise # function wait_for_clean() { + local cmd=$1 local num_active_clean=-1 local cur_active_clean local -a delays=($(get_timeout_delays $TIMEOUT .1)) @@ -1436,6 +1448,8 @@ function wait_for_clean() { ceph report return 1 fi + # eval is a no-op if cmd is empty + eval $cmd sleep ${delays[$loop]} loop+=1 done diff --git a/ceph/qa/standalone/osd/osd-backfill-prio.sh b/ceph/qa/standalone/osd/osd-backfill-prio.sh new file mode 100755 index 000000000..248ac6fb0 --- /dev/null +++ b/ceph/qa/standalone/osd/osd-backfill-prio.sh @@ -0,0 +1,504 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + export objects=50 + export poolprefix=test + export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED + export DEGRADED_PRIO="140" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + export NORMAL_PRIO="100" # See OSD_BACKFILL_PRIORITY_BASE + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_backfill_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + local degraded_prio=$(expr $DEGRADED_PRIO + 1) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 + ceph osd pool set $pool3 size 1 + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set nobackfill + ceph osd set noout + + # Get a pg to want to backfill and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + while(ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling") + do + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + while(ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling") + do + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + ceph pg cancel-force-backfill $PG3 || return 1 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-backfill $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-backfill $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset nobackfill + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to the backfill priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start backfill by changing the pool sizes from 1 to 2 +# Use dump_reservations to verify priorities +function TEST_backfill_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio) + pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG ${PG2} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-backfill-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh" +# End: diff --git a/ceph/qa/standalone/osd/osd-markdown.sh b/ceph/qa/standalone/osd/osd-markdown.sh index 6a28a305c..64157537d 100755 --- a/ceph/qa/standalone/osd/osd-markdown.sh +++ b/ceph/qa/standalone/osd/osd-markdown.sh @@ -45,7 +45,10 @@ function markdown_N_impl() { ceph osd tree ceph osd tree | grep osd.0 |grep up || return 1 # mark the OSD down. - ceph osd down 0 + # override any dup setting in the environment to ensure we do this + # exactly once (modulo messenger failures, at least; we can't *actually* + # provide exactly-once semantics for mon commands). + CEPH_CLI_TEST_DUP_COMMAND=0 ceph osd down 0 sleep $sleeptime done } diff --git a/ceph/qa/standalone/osd/osd-recovery-prio.sh b/ceph/qa/standalone/osd/osd-recovery-prio.sh new file mode 100755 index 000000000..25ecb2651 --- /dev/null +++ b/ceph/qa/standalone/osd/osd-recovery-prio.sh @@ -0,0 +1,500 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20" + export objects=200 + export poolprefix=test + export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED + export NORMAL_PRIO="180" # See OSD_RECOVERY_PRIORITY_BASE + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_recovery_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 + ceph osd pool set $pool3 size 1 + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set norecover + ceph osd set noout + + # Get a pg to want to recover and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + while(ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery") + do + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The first force-recovery PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + while(ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery") + do + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + #ceph osd unset norecover + ceph pg cancel-force-recovery $PG3 || return 1 + sleep 2 + #ceph osd set norecover + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-recovery $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-recovery $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-recovery PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset norecover + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to recovery priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start recovery by changing the pool sizes from 1 to 2 +# Use dump_reservations to verify priorities +function TEST_recovery_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio) + pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 10 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG for $pool2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-recovery-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh" +# End: diff --git a/ceph/qa/standalone/scrub/osd-scrub-repair.sh b/ceph/qa/standalone/scrub/osd-scrub-repair.sh index b6d541bb3..8b228784e 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh @@ -194,7 +194,7 @@ function create_ec_pool() { local pool_name=$1 local allow_overwrites=$2 - ceph osd erasure-code-profile set myprofile crush-failure-domain=osd $3 $4 $5 $6 $7 || return 1 + ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1 create_pool "$poolname" 1 1 erasure myprofile || return 1 @@ -5245,7 +5245,7 @@ function TEST_periodic_scrub_replicated() { # Can't upgrade with this set ceph osd set nodeep-scrub # Let map change propagate to OSDs - flush pg_stats + flush_pg_stats sleep 5 # Fake a schedule scrub @@ -5274,6 +5274,91 @@ function TEST_periodic_scrub_replicated() { rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1 } +function TEST_scrub_warning() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local scrubs=5 + local deep_scrubs=5 + local i1_day=86400 + local i7_days=$(calc $i1_day \* 7) + local i14_days=$(calc $i1_day \* 14) + local overdue=$i1_day + local conf_overdue_seconds=$(calc $i7_days + $overdue ) + local pool_overdue_seconds=$(calc $i14_days + $overdue ) + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 || return 1 + run_mgr $dir x --mon_warn_not_scrubbed=${overdue} --mon_warn_not_deep_scrubbed=${overdue} || return 1 + run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1 + + for i in $(seq 1 $(expr $scrubs + $deep_scrubs)) + do + create_pool $poolname-$i 1 1 || return 1 + wait_for_clean || return 1 + if [ $i = "1" ]; + then + ceph osd pool set $poolname-$i scrub_max_interval $i14_days + fi + if [ $i = $(expr $scrubs + 1) ]; + then + ceph osd pool set $poolname-$i deep_scrub_interval $i14_days + fi + done + + # Only 1 osd + local primary=0 + + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + ceph config set global osd_scrub_interval_randomize_ratio 0 + ceph config set global osd_deep_scrub_randomize_ratio 0 + ceph config set global osd_scrub_max_interval ${i7_days} + ceph config set global osd_deep_scrub_interval ${i7_days} + + # Fake schedule scrubs + for i in $(seq 1 $scrubs) + do + if [ $i = "1" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \ + trigger_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1 + done + # Fake schedule deep scrubs + for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs)) + do + if [ $i = "$(expr $scrubs + 1)" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \ + trigger_deep_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1 + done + flush_pg_stats + + ceph health + ceph health detail + ceph health | grep -q "$deep_scrubs pgs not deep-scrubbed in time" || return 1 + ceph health | grep -q "$scrubs pgs not scrubbed in time" || return 1 + COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l) + if [ "$COUNT" != $scrubs ]; then + ceph health detail | grep "not scrubbed since" + return 1 + fi + COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l) + if [ "$COUNT" != $deep_scrubs ]; then + ceph health detail | grep "not deep-scrubbed since" + return 1 + fi + return 0 +} + # # Corrupt snapset in replicated pool # diff --git a/ceph/qa/standalone/special/ceph_objectstore_tool.py b/ceph/qa/standalone/special/ceph_objectstore_tool.py index 0c6097c1f..1bde02b76 100755 --- a/ceph/qa/standalone/special/ceph_objectstore_tool.py +++ b/ceph/qa/standalone/special/ceph_objectstore_tool.py @@ -686,8 +686,8 @@ def main(argv): EC_NAME = "ECobject" if len(argv) > 0 and argv[0] == 'large': PG_COUNT = 12 - NUM_REP_OBJECTS = 800 - NUM_CLONED_REP_OBJECTS = 100 + NUM_REP_OBJECTS = 200 + NUM_CLONED_REP_OBJECTS = 50 NUM_EC_OBJECTS = 12 NUM_NSPACES = 4 # Larger data sets for first object per namespace @@ -1470,7 +1470,7 @@ def main(argv): for basename in db[nspace].keys(): file = os.path.join(DATADIR, nspace + "-" + basename + "__head") JSON = db[nspace][basename]['json'] - GETNAME = "/tmp/getbytes.{pid}".format(pid=pid) + jsondict = json.loads(JSON) for pg in OBJREPPGS: OSDS = get_osds(pg, OSDDIR) for osd in OSDS: @@ -1481,12 +1481,33 @@ def main(argv): continue if int(basename.split(REP_NAME)[1]) > int(NUM_CLONED_REP_OBJECTS): continue + logging.debug("REPobject " + JSON) cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"snap\": 1,' > /dev/null").format(osd=osd, json=JSON) logging.debug(cmd) ret = call(cmd, shell=True) if ret != 0: logging.error("Invalid dump for {json}".format(json=JSON)) ERRORS += 1 + if 'shard_id' in jsondict[1]: + logging.debug("ECobject " + JSON) + for pg in OBJECPGS: + OSDS = get_osds(pg, OSDDIR) + jsondict = json.loads(JSON) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + if int(basename.split(EC_NAME)[1]) > int(NUM_EC_OBJECTS): + continue + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"hinfo\": [{{]' > /dev/null").format(osd=osd, json=json.dumps((pg, jsondict[1]))) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Invalid dump for {json}".format(json=JSON)) print("Test list-attrs get-attr") ATTRFILE = r"/tmp/attrs.{pid}".format(pid=pid) @@ -1497,16 +1518,16 @@ def main(argv): JSON = db[nspace][basename]['json'] jsondict = json.loads(JSON) - if 'shard_id' in jsondict: + if 'shard_id' in jsondict[1]: logging.debug("ECobject " + JSON) found = 0 for pg in OBJECPGS: OSDS = get_osds(pg, OSDDIR) # Fix shard_id since we only have one json instance for each object - jsondict['shard_id'] = int(pg.split('s')[1]) - JSON = json.dumps(jsondict) + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + JSON = json.dumps((pg, jsondict[1])) for osd in OSDS: - cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-attr hinfo_key").format(osd=osd, pg=pg, json=JSON) + cmd = (CFSD_PREFIX + " '{json}' get-attr hinfo_key").format(osd=osd, json=JSON) logging.debug("TRY: " + cmd) try: out = check_output(cmd, shell=True, stderr=subprocess.STDOUT) @@ -1522,12 +1543,12 @@ def main(argv): for pg in ALLPGS: # Make sure rep obj with rep pg or ec obj with ec pg - if ('shard_id' in jsondict) != (pg.find('s') > 0): + if ('shard_id' in jsondict[1]) != (pg.find('s') > 0): continue - if 'shard_id' in jsondict: + if 'shard_id' in jsondict[1]: # Fix shard_id since we only have one json instance for each object - jsondict['shard_id'] = int(pg.split('s')[1]) - JSON = json.dumps(jsondict) + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + JSON = json.dumps((pg, jsondict[1])) OSDS = get_osds(pg, OSDDIR) for osd in OSDS: DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) @@ -1536,7 +1557,7 @@ def main(argv): if not fnames: continue afd = open(ATTRFILE, "wb") - cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' list-attrs").format(osd=osd, pg=pg, json=JSON) + cmd = (CFSD_PREFIX + " '{json}' list-attrs").format(osd=osd, json=JSON) logging.debug(cmd) ret = call(cmd, shell=True, stdout=afd) afd.close() @@ -1556,7 +1577,7 @@ def main(argv): continue exp = values.pop(key) vfd = open(VALFILE, "wb") - cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key="_" + key) + cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key="_" + key) logging.debug(cmd) ret = call(cmd, shell=True, stdout=vfd) vfd.close() diff --git a/ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore.yaml b/ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore.yaml deleted file mode 120000 index bd7d7e004..000000000 --- a/ceph/qa/suites/ceph-deploy/basic/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/basic_functional/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/fs/basic_functional/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/fs/basic_functional/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/basic_functional/objectstore/bluestore.yaml b/ceph/qa/suites/fs/basic_functional/objectstore/bluestore.yaml deleted file mode 120000 index bd7d7e004..000000000 --- a/ceph/qa/suites/fs/basic_functional/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/basic_functional/tasks/libcephfs_java.yaml b/ceph/qa/suites/fs/basic_functional/tasks/libcephfs_java.yaml deleted file mode 100644 index aaffa0338..000000000 --- a/ceph/qa/suites/fs/basic_functional/tasks/libcephfs_java.yaml +++ /dev/null @@ -1,14 +0,0 @@ - -os_type: ubuntu -os_version: "14.04" - -overrides: - ceph-fuse: - disabled: true - kclient: - disabled: true -tasks: -- workunit: - clients: - client.0: - - libcephfs-java/test.sh diff --git a/ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore.yaml b/ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore.yaml deleted file mode 120000 index 1728accf9..000000000 --- a/ceph/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../cephfs/objectstore-ec/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml b/ceph/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml index 410606225..f0ed3366c 100644 --- a/ceph/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml +++ b/ceph/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i22073.yaml @@ -10,7 +10,6 @@ overrides: tasks: - exec: mon.a: - - "ceph tell mds.* config set mds_max_ratio_caps_per_client 1" - "ceph tell mds.* config set mds_min_caps_per_client 1" - background_exec: mon.a: diff --git a/ceph/qa/suites/fs/verify/validater/valgrind.yaml b/ceph/qa/suites/fs/verify/validater/valgrind.yaml index 6982cedfc..b25b71487 100644 --- a/ceph/qa/suites/fs/verify/validater/valgrind.yaml +++ b/ceph/qa/suites/fs/verify/validater/valgrind.yaml @@ -1,21 +1,20 @@ # see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126 os_type: centos -# Valgrind makes everything slow, so ignore slow requests -overrides: - ceph: - log-whitelist: - - slow requests are blocked - overrides: install: ceph: flavor: notcmalloc debuginfo: true ceph: + # Valgrind makes everything slow, so ignore slow requests and extend heartbeat grace + log-whitelist: + - slow requests are blocked conf: global: osd heartbeat grace: 40 + mds: + mds heartbeat grace: 60 mon: mon osd crush smoke test: false valgrind: diff --git a/ceph/qa/suites/powercycle/osd/whitelist_health.yaml b/ceph/qa/suites/powercycle/osd/whitelist_health.yaml index f9ab0a62b..f724302a4 100644 --- a/ceph/qa/suites/powercycle/osd/whitelist_health.yaml +++ b/ceph/qa/suites/powercycle/osd/whitelist_health.yaml @@ -3,4 +3,5 @@ overrides: log-whitelist: - \(MDS_TRIM\) - \(MDS_SLOW_REQUEST\) + - MDS_SLOW_METADATA_IO - Behind on trimming diff --git a/ceph/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml b/ceph/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml index 3b821bc0c..def613b67 100644 --- a/ceph/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml +++ b/ceph/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml @@ -11,6 +11,7 @@ overrides: - \(PG_ - \(POOL_APP_NOT_ENABLED\) - \(SMALLER_PGP_NUM\) + - slow request conf: global: debug objecter: 20 diff --git a/ceph/qa/suites/rados/rest/rest_test.yaml b/ceph/qa/suites/rados/rest/rest_test.yaml index 0fdb9dc6a..bc8eb8360 100644 --- a/ceph/qa/suites/rados/rest/rest_test.yaml +++ b/ceph/qa/suites/rados/rest/rest_test.yaml @@ -32,6 +32,7 @@ tasks: - \(SLOW_OPS\) - \(TOO_FEW_PGS\) - but it is still running + - slow request conf: client.rest0: debug ms: 1 diff --git a/ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore.yaml b/ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore.yaml deleted file mode 120000 index bd7d7e004..000000000 --- a/ceph/qa/suites/rados/singleton-bluestore/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml b/ceph/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml index a63400be3..01088ab89 100644 --- a/ceph/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml +++ b/ceph/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml @@ -22,6 +22,7 @@ tasks: - \(PG_ - \(OBJECT_ - \(REQUEST_SLOW\) + - slow request conf: osd: osd min pg log entries: 5 diff --git a/ceph/qa/suites/rados/singleton/all/osd-recovery.yaml b/ceph/qa/suites/rados/singleton/all/osd-recovery.yaml index 5479f79b7..fa8fa704b 100644 --- a/ceph/qa/suites/rados/singleton/all/osd-recovery.yaml +++ b/ceph/qa/suites/rados/singleton/all/osd-recovery.yaml @@ -20,8 +20,8 @@ tasks: - \(OSD_ - \(PG_ - \(OBJECT_DEGRADED\) - - \(SLOW_OPS\) - \(REQUEST_SLOW\) + - slow request conf: osd: osd min pg log entries: 5 diff --git a/ceph/qa/suites/rados/singleton/all/thrash-eio.yaml b/ceph/qa/suites/rados/singleton/all/thrash-eio.yaml index e184d911d..f4db6ca18 100644 --- a/ceph/qa/suites/rados/singleton/all/thrash-eio.yaml +++ b/ceph/qa/suites/rados/singleton/all/thrash-eio.yaml @@ -30,6 +30,7 @@ tasks: - \(PG_ - \(OBJECT_MISPLACED\) - \(OSD_ + - slow request - thrashosds: op_delay: 30 clean_interval: 120 diff --git a/ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml b/ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml new file mode 120000 index 000000000..635085f7f --- /dev/null +++ b/ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../thrash-erasure-code/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore.yaml b/ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore.yaml deleted file mode 120000 index 1249ffda0..000000000 --- a/ceph/qa/suites/rados/thrash-erasure-code-overwrites/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../thrash-erasure-code/objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml index 4a06055b5..9fd048c91 100644 --- a/ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml +++ b/ceph/qa/suites/rados/verify/tasks/rados_api_tests.yaml @@ -10,6 +10,7 @@ overrides: - \(CACHE_POOL_NEAR_FULL\) - \(POOL_APP_NOT_ENABLED\) - \(PG_AVAILABILITY\) + - slow request conf: client: debug ms: 1 diff --git a/ceph/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/ceph/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml index 7ab3185ec..32283b6ca 100644 --- a/ceph/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml +++ b/ceph/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml @@ -2,6 +2,7 @@ overrides: ceph: log-whitelist: - \(REQUEST_SLOW\) + - slow request tasks: - workunit: clients: diff --git a/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml b/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml deleted file mode 120000 index bd7d7e004..000000000 --- a/ceph/qa/suites/rbd/singleton-bluestore/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rgw/multisite/overrides.yaml b/ceph/qa/suites/rgw/multisite/overrides.yaml index 7fbd27605..0c2cd2e19 100644 --- a/ceph/qa/suites/rgw/multisite/overrides.yaml +++ b/ceph/qa/suites/rgw/multisite/overrides.yaml @@ -7,5 +7,7 @@ overrides: rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= rgw crypt require ssl: false rgw sync log trim interval: 0 + rgw md log max shards: 4 + rgw data log num shards: 4 rgw: compression type: random diff --git a/ceph/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/smoke/basic/objectstore/bluestore.yaml b/ceph/qa/suites/smoke/basic/objectstore/bluestore.yaml deleted file mode 120000 index bd7d7e004..000000000 --- a/ceph/qa/suites/smoke/basic/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore-bitmap.yaml b/ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml \ No newline at end of file diff --git a/ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore.yaml b/ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore.yaml deleted file mode 120000 index d6445987d..000000000 --- a/ceph/qa/suites/upgrade/kraken-x/stress-split/objectstore/bluestore.yaml +++ /dev/null @@ -1 +0,0 @@ -../../../../../objectstore/bluestore.yaml \ No newline at end of file diff --git a/ceph/qa/tasks/cephfs/test_client_limits.py b/ceph/qa/tasks/cephfs/test_client_limits.py index 1f1d54670..bc029cd8a 100644 --- a/ceph/qa/tasks/cephfs/test_client_limits.py +++ b/ceph/qa/tasks/cephfs/test_client_limits.py @@ -42,12 +42,14 @@ class TestClientLimits(CephFSTestCase): cache_size = open_files/2 self.set_conf('mds', 'mds cache size', cache_size) + self.set_conf('mds', 'mds_recall_max_caps', open_files/2) + self.set_conf('mds', 'mds_recall_warning_threshold', open_files) self.fs.mds_fail_restart() self.fs.wait_for_daemons() mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) + mds_recall_warning_decay_rate = self.fs.get_config("mds_recall_warning_decay_rate") self.assertTrue(open_files >= mds_min_caps_per_client) - mds_max_ratio_caps_per_client = float(self.fs.get_config("mds_max_ratio_caps_per_client")) mount_a_client_id = self.mount_a.get_global_id() path = "subdir/mount_a" if use_subdir else "mount_a" @@ -64,13 +66,11 @@ class TestClientLimits(CephFSTestCase): # MDS should not be happy about that, as the client is failing to comply # with the SESSION_RECALL messages it is being sent - mds_recall_state_timeout = float(self.fs.get_config("mds_recall_state_timeout")) - self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_state_timeout+10) + self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) # We can also test that the MDS health warning for oversized # cache is functioning as intended. - self.wait_for_health("MDS_CACHE_OVERSIZED", - mds_recall_state_timeout + 10) + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) # When the client closes the files, it should retain only as many caps as allowed # under the SESSION_RECALL policy @@ -84,14 +84,13 @@ class TestClientLimits(CephFSTestCase): # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, # which depend on the caps outstanding, cache size and overall ratio - recall_expected_value = int((1.0-mds_max_ratio_caps_per_client)*(open_files+2)) def expected_caps(): num_caps = self.get_session(mount_a_client_id)['num_caps'] if num_caps < mds_min_caps_per_client: raise RuntimeError("client caps fell below min!") elif num_caps == mds_min_caps_per_client: return True - elif recall_expected_value*.95 <= num_caps <= recall_expected_value*1.05: + elif num_caps < cache_size: return True else: return False @@ -237,3 +236,28 @@ class TestClientLimits(CephFSTestCase): def test_client_cache_size(self): self._test_client_cache_size(False) self._test_client_cache_size(True) + + def test_client_max_caps(self): + """ + That the MDS will not let a client sit above mds_max_caps_per_client caps. + """ + + mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) + mds_max_caps_per_client = 2*mds_min_caps_per_client + self.set_conf('mds', 'mds_max_caps_per_client', mds_max_caps_per_client) + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + def expected_caps(): + num_caps = self.get_session(mount_a_client_id)['num_caps'] + if num_caps < mds_min_caps_per_client: + raise RuntimeError("client caps fell below min!") + elif num_caps <= mds_max_caps_per_client: + return True + else: + return False + + self.wait_until_true(expected_caps, timeout=60) diff --git a/ceph/qa/tasks/cephfs/test_misc.py b/ceph/qa/tasks/cephfs/test_misc.py index c27278008..9c44e6c09 100644 --- a/ceph/qa/tasks/cephfs/test_misc.py +++ b/ceph/qa/tasks/cephfs/test_misc.py @@ -52,6 +52,9 @@ class TestMisc(CephFSTestCase): self.assertGreaterEqual(rctime, t-10) def test_fs_new(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + data_pool_name = self.fs.get_data_pool_name() self.fs.mds_stop() diff --git a/ceph/qa/tasks/radosbench.py b/ceph/qa/tasks/radosbench.py index 530a6f149..dd1f85dee 100644 --- a/ceph/qa/tasks/radosbench.py +++ b/ceph/qa/tasks/radosbench.py @@ -76,12 +76,12 @@ def task(ctx, config): else: pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) - osize = config.get('objectsize', 0) + osize = config.get('objectsize', 65536) if osize is 0: objectsize = [] else: objectsize = ['-o', str(osize)] - size = ['-b', str(config.get('size', 4<<20))] + size = ['-b', str(config.get('size', 65536))] # If doing a reading run then populate data if runtype != "write": proc = remote.run( diff --git a/ceph/qa/valgrind.supp b/ceph/qa/valgrind.supp new file mode 100644 index 000000000..cbd41a29e --- /dev/null +++ b/ceph/qa/valgrind.supp @@ -0,0 +1,622 @@ +{ + older boost mersenne twister uses uninitialized memory for randomness + Memcheck:Cond + ... + fun:*Monitor::prepare_new_fingerprint* + ... +} +{ + older boost mersenne twister uses uninitialized memory for randomness + Memcheck:Value8 + ... + fun:*Monitor::prepare_new_fingerprint* + ... +} +{ + apparent TLS leak in eglibc + Memcheck:Leak + fun:calloc + fun:_dl_allocate_tls + fun:pthread_create* + ... +} +{ + osd: ignore ec plugin loading (FIXME SOMEDAY) + Memcheck:Leak + ... + fun:*ErasureCodePluginRegistry*load* + ... +} +{ + osd: ignore ec plugin factory (FIXME SOMEDAY) + Memcheck:Leak + ... + fun:*ErasureCodePluginRegistry*factory* + ... +} +{ + tcmalloc: libboost_thread-mt.so.1.53 is linked with tcmalloc + Memcheck:Param + msync(start) + obj:/usr/lib64/libpthread-2.17.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + ... + fun:*tcmalloc*ThreadCache* + ... + obj:/usr/lib64/libboost_thread-mt.so.1.53.0 +} +{ + tcmalloc: msync heap allocation points to uninit bytes (centos 6.5) + Memcheck:Param + msync(start) + obj:/lib64/libpthread-2.12.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to unaddressible bytes (centos 6.5 #2) + Memcheck:Param + msync(start) + obj:/lib64/libpthread-2.12.so + obj:/usr/lib64/libunwind.so.7.0.0 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (rhel7) + Memcheck:Param + msync(start) + obj:/usr/lib64/libpthread-2.17.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (rhel7 #2) + Memcheck:Param + msync(start) + obj:/usr/lib64/libpthread-2.17.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + fun:_ULx86_64_step + obj:/usr/lib64/libtcmalloc.so.4.2.6 + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (wheezy) + Memcheck:Param + msync(start) + obj:/lib/x86_64-linux-gnu/libpthread-2.13.so + obj:/usr/lib/libunwind.so.7.0.0 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (precise) + Memcheck:Param + msync(start) + obj:/lib/x86_64-linux-gnu/libpthread-2.15.so + obj:/usr/lib/libunwind.so.7.0.0 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm + obj:/usr/lib/libtcmalloc.so.0.1.0 +} +{ + tcmalloc: msync heap allocation points to uninit bytes (trusty) + Memcheck:Param + msync(start) + obj:/lib/x86_64-linux-gnu/libpthread-2.19.so + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes 2 (trusty) + Memcheck:Param + msync(start) + fun:__msync_nocancel + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm + fun:_ZN8tcmalloc15CentralFreeList8PopulateEv + fun:_ZN8tcmalloc15CentralFreeList18FetchFromSpansSafeEv + fun:_ZN8tcmalloc15CentralFreeList11RemoveRangeEPPvS2_i +} +{ + tcmalloc: msync (xenial) + Memcheck:Param + msync(start) + fun:__msync_nocancel + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:*tcmalloc* + fun:*GetStackTrace* +} +{ + tcmalloc: string + Memcheck:Leak + ... + obj:*tcmalloc* + fun:call_init* + ... +} +{ + ceph global: deliberate onexit leak + Memcheck:Leak + ... + fun:*set_flush_on_exit* + ... +} +{ + libleveldb: ignore all static leveldb leaks + Memcheck:Leak + ... + fun:*leveldb* + ... +} +{ + libleveldb: ignore all dynamic libleveldb leaks + Memcheck:Leak + ... + obj:*libleveldb.so* + ... +} +{ + libcurl: ignore libcurl leaks + Memcheck:Leak + ... + fun:*curl_global_init +} +{ + ignore gnutls leaks + Memcheck:Leak + ... + fun:gnutls_global_init +} +{ + ignore libfcgi leak; OS_LibShutdown has no callers! + Memcheck:Leak + ... + fun:OS_LibInit + fun:FCGX_Init +} +{ + ignore libnss3 leaks + Memcheck:Leak + ... + obj:*libnss3* + ... +} +{ + strptime suckage + Memcheck:Cond + fun:__GI___strncasecmp_l + fun:__strptime_internal + ... +} +{ + strptime suckage 2 + Memcheck:Value8 + fun:__GI___strncasecmp_l + fun:__strptime_internal + ... +} +{ + strptime suckage 3 + Memcheck:Addr8 + fun:__GI___strncasecmp_l + fun:__strptime_internal + ... +} +{ + inet_ntop does something lame on local stack + Memcheck:Value8 + ... + fun:inet_ntop + ... +} +{ + inet_ntop does something lame on local stack + Memcheck:Addr8 + ... + fun:inet_ntop + ... +} +{ + dl-lookup.c thing .. Invalid write of size 8 + Memcheck:Value8 + fun:do_lookup_x + ... + fun:_dl_lookup_symbol_x + ... +} +{ + dl-lookup.c thing .. Invalid write of size 8 + Memcheck:Addr8 + fun:do_lookup_x + ... + fun:_dl_lookup_symbol_x + ... +} +{ + weird thing from libc + Memcheck:Leak + ... + fun:*sub_I_comparator* + fun:__libc_csu_init + ... +} +{ + libfuse leak + Memcheck:Leak + ... + fun:fuse_parse_cmdline + ... +} +{ + boost thread leaks on exit + Memcheck:Leak + ... + fun:*boost*detail* + ... + fun:exit +} +{ + lttng appears to not clean up state + Memcheck:Leak + ... + fun:lttng_ust_baddr_statedump_init + fun:lttng_ust_init + fun:call_init.part.0 + ... +} +{ + fun:PK11_CreateContextBySymKey race + Helgrind:Race + obj:/usr/*lib*/libfreebl*3.so + ... + obj:/usr/*lib*/libsoftokn3.so + ... + obj:/usr/*lib*/libnss3.so + fun:PK11_CreateContextBySymKey + ... +} +{ + thread init race + Helgrind:Race + fun:mempcpy + fun:_dl_allocate_tls_init + ... + fun:pthread_create@* + ... +} +{ + thread_local memory is falsely detected (https://svn.boost.org/trac/boost/ticket/3296) + Memcheck:Leak + ... + fun:*boost*detail*get_once_per_thread_epoch* + fun:*boost*call_once* + fun:*boost*detail*get_current_thread_data* + ... +} +{ + rocksdb thread local singletons + Memcheck:Leak + ... + fun:rocksdb::Env::Default() + ... +} +{ + rocksdb column thread local leaks + Memcheck:Leak + ... + fun:rocksdb::ThreadLocalPtr::StaticMeta::SetHandler* + fun:rocksdb::ColumnFamilyData::ColumnFamilyData* + ... +} +{ + rocksdb thread crap + Memcheck:Leak + ... + fun:*ThreadLocalPtr* + ... +} +{ + rocksdb singleton Env leak, blech + Memcheck:Leak + ... + fun:CreateThreadStatusUpdater + fun:PosixEnv + ... +} +{ + rocksdb::Env::Default() + Memcheck:Leak + ... + fun:*rocksdb*Env*Default* + ... +} +{ + rocksdb BGThreadWrapper + Memcheck:Leak + ... + fun:*BGThreadWrapper* + ... +} +{ + libstdc++ leak on xenial + Memcheck:Leak + fun:malloc + ... + fun:call_init.part.0 + fun:call_init + fun:_dl_init + ... +} +{ + strange leak of std::string memory from md_config_t seen in radosgw + Memcheck:Leak + ... + fun:_ZNSs4_Rep9_S_createEmmRKSaIcE + fun:_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag + ... + fun:_ZN11md_config_tC1Ev + fun:_ZN11CephContextC1Eji + ... +} +{ + python does not reset the member field when dealloc an object + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py_InitializeEx + ... +} +{ + statically allocated python types don't get members freed + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyType_Ready + ... +} +{ + manually constructed python module members don't get freed + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py_InitModule4_64 + ... +} +{ + manually constructed python module members don't get freed + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyModule_AddObject + ... +} +{ + python subinterpreters may not clean up properly + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py_NewInterpreter + ... +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyEval_EvalCode +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyImport_ImportModuleLevel +} +{ + python-owned threads may not full clean up after themselves + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyEval_CallObjectWithKeywords +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyEval_EvalFrameEx + ... + obj:/usr/lib64/libpython2.7.so.1.0 +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyObject_Call +} + +{ + rados cython constants + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:PyObject_Malloc + fun:PyCode_New + fun:__Pyx_InitCachedConstants + fun:initrados + fun:_PyImport_LoadDynamicModule + ... + fun:PyImport_ImportModuleLevel + ... + fun:PyObject_Call + fun:PyEval_CallObjectWithKeywords + fun:PyEval_EvalFrameEx +} + +{ + rbd cython constants + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:PyObject_Malloc + fun:PyCode_New + fun:__Pyx_InitCachedConstants + fun:initrbd + fun:_PyImport_LoadDynamicModule + ... + fun:PyImport_ImportModuleLevel + ... + fun:PyObject_Call + fun:PyEval_CallObjectWithKeywords + fun:PyEval_EvalFrameEx +} + +{ + dlopen() with -lceph-common https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=700899 + Memcheck:Leak + match-leak-kinds: reachable + fun:*alloc + ... + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 +} + +{ + ethdev_init_log thing + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:ethdev_init_log + ... +} + +{ + rte_log_init() in DPDK fails to reset strdup()'ed string at exit + Memcheck:Leak + match-leak-kinds: reachable + fun:*alloc + ... + fun:rte_log_init + fun:__libc_csu_init +} + +{ + libc_csu_init (strdup, rte_log_register, etc.) + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:__libc_csu_init + ... +} + +{ + Boost.Thread fails to call tls_destructor() when the thread exists + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:*boost*detail*make_external_thread_data* + fun:*boost*detail*add_new_tss_node* + fun:*boost*detail*set_tss_data* + ... +} + +{ + ignore *all* ceph-mgr python crap. this is overkill, but better than nothing + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py* + ... +} + +{ + something in glibc + Memcheck:Leak + match-leak-kinds: all + ... + fun:strdup + fun:__trans_list_add + ... + fun:_dl_init + ... +} + +# "Conditional jump or move depends on uninitialised value(s)" in OpenSSL +# while using aes-128-gcm with AES-NI enabled. Not observed while running +# with `OPENSSL_ia32cap="~0x200000200000000"`. +{ + + Memcheck:Cond + ... + fun:EVP_DecryptFinal_ex + fun:_ZN4ceph6crypto6onwire25AES128GCM_OnWireRxHandler34authenticated_decrypt_update_finalEONS_6buffer7v14_2_04listEj + fun:_ZN10ProtocolV231handle_read_frame_epilogue_mainEOSt10unique_ptrIN4ceph6buffer7v14_2_08ptr_nodeENS4_8disposerEEi + fun:_ZN10ProtocolV216run_continuationER2CtIS_E + ... + fun:_ZN15AsyncConnection7processEv + fun:_ZN11EventCenter14process_eventsEjPNSt6chrono8durationImSt5ratioILl1ELl1000000000EEEE + fun:operator() + fun:_ZNSt17_Function_handlerIFvvEZN12NetworkStack10add_threadEjEUlvE_E9_M_invokeERKSt9_Any_data + fun:execute_native_thread_routine + fun:start_thread + fun:clone +} + +{ + + Memcheck:Cond + fun:_ZN4ceph6crypto6onwire25AES128GCM_OnWireRxHandler34authenticated_decrypt_update_finalEONS_6buffer7v14_2_04listEj + fun:_ZN10ProtocolV231handle_read_frame_epilogue_mainEOSt10unique_ptrIN4ceph6buffer7v14_2_08ptr_nodeENS4_8disposerEEi + fun:_ZN10ProtocolV216run_continuationER2CtIS_E + ... + fun:_ZN15AsyncConnection7processEv + fun:_ZN11EventCenter14process_eventsEjPNSt6chrono8durationImSt5ratioILl1ELl1000000000EEEE + fun:operator() + fun:_ZNSt17_Function_handlerIFvvEZN12NetworkStack10add_threadEjEUlvE_E9_M_invokeERKSt9_Any_data + fun:execute_native_thread_routine + fun:start_thread + fun:clone +} diff --git a/ceph/qa/workunits/cephtool/test.sh b/ceph/qa/workunits/cephtool/test.sh index 36f9dc43e..b0ab5c051 100755 --- a/ceph/qa/workunits/cephtool/test.sh +++ b/ceph/qa/workunits/cephtool/test.sh @@ -49,7 +49,7 @@ function expect_false() } -TEMP_DIR=$(mktemp -d ${TMPDIR-/tmp}/cephtool.XXX) +TEMP_DIR=$(mktemp -d ${TMPDIR:-/tmp}/cephtool.XXX) trap "rm -fr $TEMP_DIR" 0 TMPFILE=$(mktemp $TEMP_DIR/test_invalid.XXX) @@ -578,7 +578,9 @@ function test_tiering_9() function test_auth() { - ceph auth add client.xx mon allow osd "allow *" + expect_false ceph auth add client.xx mon 'invalid' osd "allow *" + expect_false ceph auth add client.xx mon 'allow *' osd "allow *" invalid "allow *" + ceph auth add client.xx mon 'allow *' osd "allow *" ceph auth export client.xx >client.xx.keyring ceph auth add client.xx -i client.xx.keyring rm -f client.xx.keyring @@ -602,7 +604,7 @@ function test_auth() expect_false ceph auth get client.xx # (almost) interactive mode - echo -e 'auth add client.xx mon allow osd "allow *"\n' | ceph + echo -e 'auth add client.xx mon "allow *" osd "allow *"\n' | ceph ceph auth get client.xx # script mode echo 'auth del client.xx' | ceph diff --git a/ceph/qa/workunits/libcephfs-java/test.sh b/ceph/qa/workunits/libcephfs-java/test.sh deleted file mode 100755 index f299e9597..000000000 --- a/ceph/qa/workunits/libcephfs-java/test.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh -e - -echo "starting libcephfs-java tests" -# configure CEPH_CONF and LD_LIBRARY_PATH if they're not already set -conf="$CEPH_CONF" -if [ -z "$conf" ] ; then - echo "Setting conf to /etc/ceph/ceph.conf" - conf="/etc/ceph/ceph.conf" -else - echo "conf is set to $conf" -fi - -ld_lib_path="$LD_LIBRARY_PATH" -if [ -z "$ld_lib_path" ] ; then - echo "Setting ld_lib_path to /usr/lib/jni:/usr/lib64" - ld_lib_path="/usr/lib/jni:/usr/lib64" -else - echo "ld_lib_path was set to $ld_lib_path" -fi - -ceph_java="$CEPH_JAVA_PATH" -if [ -z "$ceph_java" ] ; then - echo "Setting ceph_java to /usr/share/java" - ceph_java="/usr/share/java" -else - echo "ceph_java was set to $ceph_java" -fi - -command="java -DCEPH_CONF_FILE=$conf -Djava.library.path=$ld_lib_path -cp /usr/share/java/junit4.jar:$ceph_java/libcephfs.jar:$ceph_java/libcephfs-test.jar org.junit.runner.JUnitCore com.ceph.fs.CephAllTests" - -echo "----------------------" -echo $command -echo "----------------------" - -$command - -echo "completed libcephfs-java tests" - -exit 0 diff --git a/ceph/qa/workunits/rados/test_health_warnings.sh b/ceph/qa/workunits/rados/test_health_warnings.sh index a4a9c11c6..19dec9a84 100755 --- a/ceph/qa/workunits/rados/test_health_warnings.sh +++ b/ceph/qa/workunits/rados/test_health_warnings.sh @@ -7,6 +7,7 @@ crushtool -o crushmap --build --num_osds 10 host straw 2 rack straw 2 row straw ceph osd setcrushmap -i crushmap ceph osd tree ceph tell osd.* injectargs --osd_max_markdown_count 1024 --osd_max_markdown_period 1 +ceph osd set noout wait_for_healthy() { while ceph health | grep down diff --git a/ceph/src/.git_version b/ceph/src/.git_version index 268e02b63..63186701d 100644 --- a/ceph/src/.git_version +++ b/ceph/src/.git_version @@ -1,2 +1,2 @@ -26dc3775efc7bb286a1d6d66faee0ba30ea23eee -v12.2.11 +1436006594665279fe734b4c15d7e08c13ebd777 +v12.2.12 diff --git a/ceph/src/CMakeLists.txt b/ceph/src/CMakeLists.txt index b0837ab1d..4ff19154f 100644 --- a/ceph/src/CMakeLists.txt +++ b/ceph/src/CMakeLists.txt @@ -742,7 +742,7 @@ install(TARGETS librados-config DESTINATION bin) # virtualenv base directory for ceph-disk and ceph-detect-init set(CEPH_BUILD_VIRTUALENV $ENV{TMPDIR}) if(NOT CEPH_BUILD_VIRTUALENV) - set(CEPH_BUILD_VIRTUALENV /tmp) + set(CEPH_BUILD_VIRTUALENV ${CMAKE_BINARY_DIR}) endif() add_subdirectory(pybind) diff --git a/ceph/src/auth/Crypto.cc b/ceph/src/auth/Crypto.cc index 150052bfe..626367858 100644 --- a/ceph/src/auth/Crypto.cc +++ b/ceph/src/auth/Crypto.cc @@ -291,8 +291,9 @@ public: keyItem.type = siBuffer; keyItem.data = (unsigned char*)secret.c_str(); keyItem.len = secret.length(); - key = PK11_ImportSymKey(slot, mechanism, PK11_OriginUnwrap, CKA_ENCRYPT, - &keyItem, NULL); + using ceph::crypto::PK11_ImportSymKey_FIPS; + key = PK11_ImportSymKey_FIPS(slot, mechanism, PK11_OriginUnwrap, CKA_ENCRYPT, + &keyItem, NULL); if (!key) { err << "cannot convert AES key for NSS: " << PR_GetError(); return -1; diff --git a/ceph/src/ceph-disk/run-tox.sh b/ceph/src/ceph-disk/run-tox.sh index 76935b9e1..5c51d149a 100755 --- a/ceph/src/ceph-disk/run-tox.sh +++ b/ceph/src/ceph-disk/run-tox.sh @@ -16,7 +16,7 @@ # # run from the ceph-disk directory or from its parent -: ${CEPH_DISK_VIRTUALENV:=/tmp/ceph-disk-virtualenv} +: ${CEPH_DISK_VIRTUALENV:=$CEPH_BUILD_DIR/ceph-disk-virtualenv} test -d ceph-disk && cd ceph-disk if [ -e tox.ini ]; then diff --git a/ceph/src/ceph-volume/ceph_volume/devices/simple/activate.py b/ceph/src/ceph-volume/ceph_volume/devices/simple/activate.py index 814c6fe37..3cf414fdc 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/simple/activate.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/simple/activate.py @@ -1,6 +1,7 @@ from __future__ import print_function import argparse import base64 +import glob import json import logging import os @@ -230,6 +231,12 @@ class Activate(object): nargs='?', help='The FSID of the OSD, similar to a SHA1' ) + parser.add_argument( + '--all', + help='Activate all OSDs with a OSD JSON config', + action='store_true', + default=False, + ) parser.add_argument( '--file', help='The path to a JSON file, from a scanned OSD' @@ -244,7 +251,7 @@ class Activate(object): print(sub_command_help) return args = parser.parse_args(self.argv) - if not args.file: + if not args.file and not args.all: if not args.osd_id and not args.osd_fsid: terminal.error('ID and FSID are required to find the right OSD to activate') terminal.error('from a scanned OSD location in /etc/ceph/osd/') @@ -253,13 +260,22 @@ class Activate(object): # implicitly indicate that it would be possible to activate a json file # at a non-default location which would not work at boot time if the # custom location is not exposed through an ENV var + self.skip_systemd = args.skip_systemd json_dir = os.environ.get('CEPH_VOLUME_SIMPLE_JSON_DIR', '/etc/ceph/osd/') - if args.file: - json_config = args.file + if args.all: + if args.file or args.osd_id: + mlogger.warn('--all was passed, ignoring --file and ID/FSID arguments') + json_configs = glob.glob('{}/*.json'.format(json_dir)) + for json_config in json_configs: + mlogger.info('activating OSD specified in {}'.format(json_config)) + args.json_config = json_config + self.activate(args) else: - json_config = os.path.join(json_dir, '%s-%s.json' % (args.osd_id, args.osd_fsid)) - if not os.path.exists(json_config): - raise RuntimeError('Expected JSON config path not found: %s' % json_config) - args.json_config = json_config - self.skip_systemd = args.skip_systemd - self.activate(args) + if args.file: + json_config = args.file + else: + json_config = os.path.join(json_dir, '%s-%s.json' % (args.osd_id, args.osd_fsid)) + if not os.path.exists(json_config): + raise RuntimeError('Expected JSON config path not found: %s' % json_config) + args.json_config = json_config + self.activate(args) diff --git a/ceph/src/ceph-volume/ceph_volume/devices/simple/scan.py b/ceph/src/ceph-volume/ceph_volume/devices/simple/scan.py index f2f7d3dc9..78a1493bd 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/simple/scan.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/simple/scan.py @@ -7,6 +7,7 @@ import os from textwrap import dedent from ceph_volume import decorators, terminal, conf from ceph_volume.api import lvm +from ceph_volume.systemd import systemctl from ceph_volume.util import arg_validators, system, disk, encryption from ceph_volume.util.device import Device @@ -40,7 +41,7 @@ def parse_keyring(file_contents): class Scan(object): - help = 'Capture metadata from an OSD data partition or directory' + help = 'Capture metadata from all running ceph-disk OSDs, OSD data partition or directory' def __init__(self, argv): self.argv = argv @@ -283,7 +284,7 @@ class Scan(object): def main(self): sub_command_help = dedent(""" - Scan an OSD directory (or data device) for files and configurations + Scan running OSDs, an OSD directory (or data device) for files and configurations that will allow to take over the management of the OSD. Scanned OSDs will get their configurations stored in @@ -298,13 +299,19 @@ class Scan(object): /etc/ceph/osd/0-a9d50838-e823-43d6-b01f-2f8d0a77afc2.json - To a scan an existing, running, OSD: + To scan all running OSDs: + + ceph-volume simple scan + + To a scan a specific running OSD: ceph-volume simple scan /var/lib/ceph/osd/{cluster}-{osd id} And to scan a device (mounted or unmounted) that has OSD data in it, for example /dev/sda1 ceph-volume simple scan /dev/sda1 + + Scanning a device or directory that belongs to an OSD not created by ceph-disk will be ingored. """) parser = argparse.ArgumentParser( prog='ceph-volume simple scan', @@ -329,25 +336,40 @@ class Scan(object): metavar='OSD_PATH', type=arg_validators.OSDPath(), nargs='?', + default=None, help='Path to an existing OSD directory or OSD data partition' ) - if len(self.argv) == 0: - print(sub_command_help) - return - args = parser.parse_args(self.argv) - device = Device(args.osd_path) - if device.is_partition: - if device.ceph_disk.type != 'data': - label = device.ceph_disk.partlabel - msg = 'Device must be the ceph data partition, but PARTLABEL reported: "%s"' % label - raise RuntimeError(msg) + paths = [] + if args.osd_path: + paths.append(args.osd_path) + else: + osd_ids = systemctl.get_running_osd_ids() + for osd_id in osd_ids: + paths.append("/var/lib/ceph/osd/{}-{}".format( + conf.cluster, + osd_id, + )) # Capture some environment status, so that it can be reused all over self.device_mounts = system.get_mounts(devices=True) self.path_mounts = system.get_mounts(paths=True) - self.encryption_metadata = encryption.legacy_encrypted(args.osd_path) - self.is_encrypted = self.encryption_metadata['encrypted'] - self.scan(args) + for path in paths: + args.osd_path = path + device = Device(args.osd_path) + if device.is_partition: + if device.ceph_disk.type != 'data': + label = device.ceph_disk.partlabel + msg = 'Device must be the ceph data partition, but PARTLABEL reported: "%s"' % label + raise RuntimeError(msg) + + self.encryption_metadata = encryption.legacy_encrypted(args.osd_path) + self.is_encrypted = self.encryption_metadata['encrypted'] + + device = Device(self.encryption_metadata['device']) + if not device.is_ceph_disk_member: + terminal.warning("Ignoring %s because it's not a ceph-disk created osd." % path) + else: + self.scan(args) diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py b/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py index 41dbbc19e..778ad1479 100644 --- a/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py +++ b/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py @@ -1,8 +1,11 @@ """ Utilities to control systemd units """ +import logging + from ceph_volume import process +logger = logging.getLogger(__name__) def start(unit): process.run(['systemctl', 'start', unit]) @@ -34,6 +37,26 @@ def is_active(unit): ) return rc == 0 +def get_running_osd_ids(): + out, err, rc = process.call([ + 'systemctl', + 'show', + '--no-pager', + '--property=Id', + '--state=running', + 'ceph-osd@*', + ]) + osd_ids = [] + if rc == 0: + for line in out: + if line: + # example line looks like: Id=ceph-osd@1.service + try: + osd_id = line.split("@")[1].split(".service")[0] + osd_ids.append(osd_id) + except (IndexError, TypeError): + logger.warning("Failed to parse output from systemctl: %s", line) + return osd_ids def start_osd(id_): return start(osd_unit % id_) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py index a275bdd00..885a6ec25 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py @@ -22,6 +22,26 @@ class TestActivate(object): stdout, stderr = capsys.readouterr() assert 'Activate OSDs by mounting devices previously configured' in stdout + def test_activate_all(self, is_root, monkeypatch): + ''' + make sure Activate calls activate for each file returned by glob + ''' + mocked_glob = [] + def mock_glob(glob): + path = os.path.dirname(glob) + mocked_glob.extend(['{}/{}.json'.format(path, file_) for file_ in + ['1', '2', '3']]) + return mocked_glob + activate_files = [] + def mock_activate(self, args): + activate_files.append(args.json_config) + monkeypatch.setattr('glob.glob', mock_glob) + monkeypatch.setattr(activate.Activate, 'activate', mock_activate) + activate.Activate(['--all']).main() + assert activate_files == mocked_glob + + + class TestEnableSystemdUnits(object): diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_scan.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_scan.py index 08ca37f66..118493625 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_scan.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/simple/test_scan.py @@ -3,14 +3,6 @@ import pytest from ceph_volume.devices.simple import scan -class TestScan(object): - - def test_main_spits_help_with_no_arguments(self, capsys): - scan.Scan([]).main() - stdout, stderr = capsys.readouterr() - assert 'Scan an OSD directory (or data device) for files' in stdout - - class TestGetContents(object): def test_multiple_lines_are_left_as_is(self, tmpfile): diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini index 4c3af6811..db9652436 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini @@ -48,20 +48,20 @@ commands= # prepare nodes for testing with testinfra ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml - # test cluster state using ceph-ansible tests - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + # test cluster state using testinfra + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests # reboot all vms - attempt bash {toxinidir}/../scripts/vagrant_reload.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox} # retest to ensure cluster came back up correctly after rebooting - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests # destroy an OSD, zap it's device and recreate it using it's ID ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml # retest to ensure cluster came back up correctly - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests # test zap OSDs by ID ansible-playbook -vv -i {changedir}/hosts {changedir}/test_zap.yml diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml index 8caa1ce38..bbd5b45d3 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml @@ -93,6 +93,11 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + - name: list all OSDs command: "ceph-volume lvm list" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml index 17b74d524..91c9a1b84 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml @@ -97,6 +97,11 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + - name: list all OSDs command: "ceph-volume lvm list" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml index 353df127c..1e9b8c3e0 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml @@ -98,6 +98,11 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + - name: list all OSDs command: "ceph-volume lvm list" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml index e896c41b0..4e43839e8 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml @@ -119,6 +119,11 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + - name: list all OSDs command: "ceph-volume lvm list" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini index d2432c8a8..d61c23719 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini @@ -56,19 +56,19 @@ commands= # prepare nodes for testing with testinfra ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml - # test cluster state using ceph-ansible tests - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + # test cluster state using testinfra + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests # reboot all vms - attempt bash {toxinidir}/../scripts/vagrant_reload.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox} # retest to ensure cluster came back up correctly after rebooting - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests # destroy an OSD, zap it's device and recreate it using it's ID ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml # retest to ensure cluster came back up correctly - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests vagrant destroy --force diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml index 3e032e202..27290d933 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml @@ -93,6 +93,11 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + - name: list all OSDs command: "ceph-volume lvm list" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml index 17b74d524..91c9a1b84 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml @@ -97,6 +97,11 @@ environment: CEPH_VOLUME_DEBUG: 1 + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + - name: list all OSDs command: "ceph-volume lvm list" environment: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml index 24e2c0353..55ae7cc8e 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml @@ -4,28 +4,12 @@ become: yes tasks: - - name: list all OSD directories - find: - paths: /var/lib/ceph/osd - file_type: directory - register: osd_paths - - - name: scan all OSD directories - command: "ceph-volume --cluster={{ cluster }} simple scan {{ item.path }}" + - name: scan all running OSDs + command: "ceph-volume --cluster={{ cluster }} simple scan" environment: CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_paths.files }}" - - - name: list all OSD JSON files - find: - paths: /etc/ceph/osd - file_type: file - register: osd_configs - name: activate all scanned OSDs - command: "ceph-volume --cluster={{ cluster }} simple activate --file {{ item.path }}" + command: "ceph-volume --cluster={{ cluster }} simple activate --all" environment: CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_configs.files }}" diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/filestore/activate/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/filestore/activate/test.yml index 24e2c0353..0745f2571 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/filestore/activate/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/filestore/activate/test.yml @@ -24,8 +24,6 @@ register: osd_configs - name: activate all scanned OSDs - command: "ceph-volume --cluster={{ cluster }} simple activate --file {{ item.path }}" + command: "ceph-volume --cluster={{ cluster }} simple activate --all" environment: CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_configs.files }}" diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini index 391fb4ae9..2856d9ad0 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini @@ -46,8 +46,8 @@ commands= # prepare nodes for testing with testinfra ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml - # test cluster state using ceph-ansible tests - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + # test cluster state testinfra + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests # make ceph-volume simple take over all the OSDs that got deployed, disabling ceph-disk ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml @@ -59,6 +59,6 @@ commands= sleep 120 # retest to ensure cluster came back up correctly after rebooting - testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests + py.test -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {toxinidir}/../tests vagrant destroy --force diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/xenial/filestore/activate/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/xenial/filestore/activate/test.yml index 24e2c0353..55ae7cc8e 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/xenial/filestore/activate/test.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/xenial/filestore/activate/test.yml @@ -4,28 +4,12 @@ become: yes tasks: - - name: list all OSD directories - find: - paths: /var/lib/ceph/osd - file_type: directory - register: osd_paths - - - name: scan all OSD directories - command: "ceph-volume --cluster={{ cluster }} simple scan {{ item.path }}" + - name: scan all running OSDs + command: "ceph-volume --cluster={{ cluster }} simple scan" environment: CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_paths.files }}" - - - name: list all OSD JSON files - find: - paths: /etc/ceph/osd - file_type: file - register: osd_configs - name: activate all scanned OSDs - command: "ceph-volume --cluster={{ cluster }} simple activate --file {{ item.path }}" + command: "ceph-volume --cluster={{ cluster }} simple activate --all" environment: CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_configs.files }}" diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/__init__.py b/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/conftest.py b/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/conftest.py new file mode 100644 index 000000000..05c9aa521 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/conftest.py @@ -0,0 +1,103 @@ +import pytest +import os + + +@pytest.fixture() +def node(host, request): + """ This fixture represents a single node in the ceph cluster. Using the + host.ansible fixture provided by testinfra it can access all the ansible + variables provided to it by the specific test scenario being ran. + + You must include this fixture on any tests that operate on specific type + of node because it contains the logic to manage which tests a node + should run. + """ + ansible_vars = host.ansible.get_variables() + # tox/jenkins/user will pass in this environment variable. we need to do it this way + # because testinfra does not collect and provide ansible config passed in + # from using --extra-vars + ceph_dev_branch = os.environ.get("CEPH_DEV_BRANCH", "master") + group_names = ansible_vars["group_names"] + num_osd_ports = 4 + if ceph_dev_branch in ['luminous', 'mimic']: + num_osd_ports = 2 + + # capture the initial/default state + test_is_applicable = False + for marker in request.node.iter_markers(): + if marker.name in group_names or marker.name == 'all': + test_is_applicable = True + break + # Check if any markers on the test method exist in the nodes group_names. + # If they do not, this test is not valid for the node being tested. + if not test_is_applicable: + reason = "%s: Not a valid test for node type: %s" % ( + request.function, group_names) + pytest.skip(reason) + + osd_ids = [] + osds = [] + cluster_address = "" + # I can assume eth1 because I know all the vagrant + # boxes we test with use that interface + address = host.interface("eth1").addresses[0] + subnet = ".".join(ansible_vars["public_network"].split(".")[0:-1]) + num_mons = len(ansible_vars["groups"]["mons"]) + num_osds = len(ansible_vars.get("devices", [])) + if not num_osds: + num_osds = len(ansible_vars.get("lvm_volumes", [])) + osds_per_device = ansible_vars.get("osds_per_device", 1) + num_osds = num_osds * osds_per_device + + # If number of devices doesn't map to number of OSDs, allow tests to define + # that custom number, defaulting it to ``num_devices`` + num_osds = ansible_vars.get('num_osds', num_osds) + cluster_name = ansible_vars.get("cluster", "ceph") + conf_path = "/etc/ceph/{}.conf".format(cluster_name) + if "osds" in group_names: + # I can assume eth2 because I know all the vagrant + # boxes we test with use that interface. OSDs are the only + # nodes that have this interface. + cluster_address = host.interface("eth2").addresses[0] + cmd = host.run('sudo ls /var/lib/ceph/osd/ | sed "s/.*-//"') + if cmd.rc == 0: + osd_ids = cmd.stdout.rstrip("\n").split("\n") + osds = osd_ids + + data = dict( + address=address, + subnet=subnet, + vars=ansible_vars, + osd_ids=osd_ids, + num_mons=num_mons, + num_osds=num_osds, + num_osd_ports=num_osd_ports, + cluster_name=cluster_name, + conf_path=conf_path, + cluster_address=cluster_address, + osds=osds, + ) + return data + + +def pytest_collection_modifyitems(session, config, items): + for item in items: + test_path = item.location[0] + if "mon" in test_path: + item.add_marker(pytest.mark.mons) + elif "osd" in test_path: + item.add_marker(pytest.mark.osds) + elif "mds" in test_path: + item.add_marker(pytest.mark.mdss) + elif "mgr" in test_path: + item.add_marker(pytest.mark.mgrs) + elif "rbd-mirror" in test_path: + item.add_marker(pytest.mark.rbdmirrors) + elif "rgw" in test_path: + item.add_marker(pytest.mark.rgws) + elif "nfs" in test_path: + item.add_marker(pytest.mark.nfss) + elif "iscsi" in test_path: + item.add_marker(pytest.mark.iscsigws) + else: + item.add_marker(pytest.mark.all) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/__init__.py b/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/test_osds.py b/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/test_osds.py new file mode 100644 index 000000000..6d12babdb --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/tests/osd/test_osds.py @@ -0,0 +1,60 @@ +import json + + +class TestOSDs(object): + + def test_ceph_osd_package_is_installed(self, node, host): + assert host.package("ceph-osd").is_installed + + def test_osds_listen_on_public_network(self, node, host): + # TODO: figure out way to paramaterize this test + nb_port = (node["num_osds"] * node["num_osd_ports"]) + assert host.check_output( + "netstat -lntp | grep ceph-osd | grep %s | wc -l" % (node["address"])) == str(nb_port) # noqa E501 + + def test_osds_listen_on_cluster_network(self, node, host): + # TODO: figure out way to paramaterize this test + nb_port = (node["num_osds"] * node["num_osd_ports"]) + assert host.check_output("netstat -lntp | grep ceph-osd | grep %s | wc -l" % # noqa E501 + (node["cluster_address"])) == str(nb_port) + + def test_osd_services_are_running(self, node, host): + # TODO: figure out way to paramaterize node['osds'] for this test + for osd in node["osds"]: + assert host.service("ceph-osd@%s" % osd).is_running + + def test_osd_are_mounted(self, node, host): + # TODO: figure out way to paramaterize node['osd_ids'] for this test + for osd_id in node["osd_ids"]: + osd_path = "/var/lib/ceph/osd/{cluster}-{osd_id}".format( + cluster=node["cluster_name"], + osd_id=osd_id, + ) + assert host.mount_point(osd_path).exists + + def test_ceph_volume_is_installed(self, node, host): + host.exists('ceph-volume') + + def test_ceph_volume_systemd_is_installed(self, node, host): + host.exists('ceph-volume-systemd') + + def _get_osd_id_from_host(self, node, osd_tree): + children = [] + for n in osd_tree['nodes']: + if n['name'] == node['vars']['inventory_hostname'] and n['type'] == 'host': # noqa E501 + children = n['children'] + return children + + def _get_nb_up_osds_from_ids(self, node, osd_tree): + nb_up = 0 + ids = self._get_osd_id_from_host(node, osd_tree) + for n in osd_tree['nodes']: + if n['id'] in ids and n['status'] == 'up': + nb_up += 1 + return nb_up + + def test_all_osds_are_up_and_in(self, node, host): + cmd = "sudo ceph --cluster={cluster} --connect-timeout 5 --keyring /var/lib/ceph/bootstrap-osd/{cluster}.keyring -n client.bootstrap-osd osd tree -f json".format( # noqa E501 + cluster=node["cluster_name"]) + output = json.loads(host.check_output(cmd)) + assert node["num_osds"] == self._get_nb_up_osds_from_ids(node, output) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/systemd/test_systemctl.py b/ceph/src/ceph-volume/ceph_volume/tests/systemd/test_systemctl.py new file mode 100644 index 000000000..8eec4a3d4 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/systemd/test_systemctl.py @@ -0,0 +1,21 @@ +import pytest +from ceph_volume.systemd import systemctl + +class TestSystemctl(object): + + @pytest.mark.parametrize("stdout,expected", [ + (['Id=ceph-osd@1.service', '', 'Id=ceph-osd@2.service'], ['1','2']), + (['Id=ceph-osd1.service',], []), + (['Id=ceph-osd@1'], ['1']), + ([], []), + ]) + def test_get_running_osd_ids(self, stub_call, stdout, expected): + stub_call((stdout, [], 0)) + osd_ids = systemctl.get_running_osd_ids() + assert osd_ids == expected + + def test_returns_empty_list_on_nonzero_return_code(self, stub_call): + stdout = ['Id=ceph-osd@1.service', '', 'Id=ceph-osd@2.service'] + stub_call((stdout, [], 1)) + osd_ids = systemctl.get_running_osd_ids() + assert osd_ids == [] diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py index 8be5f8e4b..00cb5a885 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py @@ -43,6 +43,42 @@ class TestDevice(object): disk = device.Device("/dev/sda") assert disk.is_device is True + def test_device_is_rotational(self, device_info, pvolumes): + data = {"/dev/sda": {"rotational": "1"}} + lsblk = {"TYPE": "device"} + device_info(devices=data, lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.rotational + + def test_device_is_not_rotational(self, device_info, pvolumes): + data = {"/dev/sda": {"rotational": "0"}} + lsblk = {"TYPE": "device"} + device_info(devices=data, lsblk=lsblk) + disk = device.Device("/dev/sda") + assert not disk.rotational + + def test_device_is_rotational_lsblk(self, device_info, pvolumes): + data = {"/dev/sda": {"foo": "bar"}} + lsblk = {"TYPE": "device", "ROTA": "1"} + device_info(devices=data, lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.rotational + + def test_device_is_not_rotational_lsblk(self, device_info, pvolumes): + data = {"/dev/sda": {"rotational": "0"}} + lsblk = {"TYPE": "device", "ROTA": "0"} + device_info(devices=data, lsblk=lsblk) + disk = device.Device("/dev/sda") + assert not disk.rotational + + def test_device_is_rotational_defaults_true(self, device_info, pvolumes): + # rotational will default true if no info from sys_api or lsblk is found + data = {"/dev/sda": {"foo": "bar"}} + lsblk = {"TYPE": "device", "foo": "bar"} + device_info(devices=data, lsblk=lsblk) + disk = device.Device("/dev/sda") + assert disk.rotational + def test_disk_is_device(self, device_info, pvolumes): data = {"/dev/sda": {"foo": "bar"}} lsblk = {"TYPE": "disk"} diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py index e40c982d1..3fae20094 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py @@ -267,28 +267,6 @@ class TestGetDevices(object): assert len(result) == 1 assert result == [ceph_data_path] - def test_sda1_partition(self, tmpfile, tmpdir): - block_path, dev_path, mapper_path = self.setup_paths(tmpdir) - block_sda_path = os.path.join(block_path, 'sda') - block_sda1_path = os.path.join(block_sda_path, 'sda1') - block_sda1_holders = os.path.join(block_sda1_path, 'holders') - dev_sda_path = os.path.join(dev_path, 'sda') - dev_sda1_path = os.path.join(dev_path, 'sda1') - os.makedirs(block_sda_path) - os.makedirs(block_sda1_path) - os.makedirs(dev_sda1_path) - os.makedirs(block_sda1_holders) - os.makedirs(dev_sda_path) - tmpfile('size', '1024', directory=block_sda_path) - tmpfile('partition', '1', directory=block_sda1_path) - result = disk.get_devices( - _sys_block_path=block_path, - _dev_path=dev_path, - _mapper_path=mapper_path) - assert dev_sda_path in list(result.keys()) - assert '/dev/sda1' in list(result.keys()) - assert result['/dev/sda1']['holders'] == [] - def test_sda_size(self, tmpfile, tmpdir): block_path, dev_path, mapper_path = self.setup_paths(tmpdir) block_sda_path = os.path.join(block_path, 'sda') diff --git a/ceph/src/ceph-volume/ceph_volume/util/device.py b/ceph/src/ceph-volume/ceph_volume/util/device.py index 06f90cd37..29a01effa 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/device.py +++ b/ceph/src/ceph-volume/ceph_volume/util/device.py @@ -110,6 +110,14 @@ class Device(object): if not sys_info.devices: sys_info.devices = disk.get_devices() self.sys_api = sys_info.devices.get(self.abspath, {}) + if not self.sys_api: + # if no device was found check if we are a partition + partname = self.abspath.split('/')[-1] + for device, info in sys_info.devices.items(): + part = info['partitions'].get(partname, {}) + if part: + self.sys_api = part + break # start with lvm since it can use an absolute or relative path lv = lvm.get_lv_from_argument(self.path) @@ -257,7 +265,12 @@ class Device(object): @property def rotational(self): - return self.sys_api['rotational'] == '1' + rotational = self.sys_api.get('rotational') + if rotational is None: + # fall back to lsblk if not found in sys_api + # default to '1' if no value is found with lsblk either + rotational = self.disk_api.get('ROTA', '1') + return rotational == '1' @property def model(self): diff --git a/ceph/src/ceph-volume/ceph_volume/util/disk.py b/ceph/src/ceph-volume/ceph_volume/util/disk.py index c85d3be9a..da6411329 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/disk.py +++ b/ceph/src/ceph-volume/ceph_volume/util/disk.py @@ -815,9 +815,5 @@ def get_devices(_sys_block_path='/sys/block', _dev_path='/dev', _mapper_path='/d metadata['path'] = diskname metadata['locked'] = is_locked_raw_device(metadata['path']) - for part_name, part_metadata in metadata['partitions'].items(): - part_abspath = '/dev/%s' % part_name - device_facts[part_abspath] = part_metadata - device_facts[diskname] = metadata return device_facts diff --git a/ceph/src/ceph-volume/tox.ini b/ceph/src/ceph-volume/tox.ini index 514d208fa..fce465def 100644 --- a/ceph/src/ceph-volume/tox.ini +++ b/ceph/src/ceph-volume/tox.ini @@ -4,7 +4,7 @@ envlist = py27, py35, py36, flake8 [testenv] deps= pytest -commands=py.test -v {posargs:ceph_volume/tests} +commands=py.test -v {posargs:ceph_volume/tests} --ignore=ceph_volume/tests/functional [testenv:flake8] deps=flake8 diff --git a/ceph/src/ceph.in b/ceph/src/ceph.in index 7c1eda2c0..bde104763 100755 --- a/ceph/src/ceph.in +++ b/ceph/src/ceph.in @@ -21,7 +21,9 @@ Foundation. See file COPYING. from __future__ import print_function import codecs +import grp import os +import pwd import sys import platform @@ -270,7 +272,10 @@ def parse_cmdargs(args=None, target=''): help='input file, or "-" for stdin') parser.add_argument('-o', '--out-file', dest='output_file', help='output file, or "-" for stdout') - + parser.add_argument('--setuser', dest='setuser', + help='set user file permission') + parser.add_argument('--setgroup', dest='setgroup', + help='set group file permission') parser.add_argument('--id', '--user', dest='client_id', help='client id for authentication') parser.add_argument('--name', '-n', dest='client_name', @@ -990,6 +995,20 @@ def main(): except Exception as e: print('Can\'t open output file {0}: {1}'.format(parsed_args.output_file, e), file=sys.stderr) return 1 + if parsed_args.setuser: + try: + ownerid = pwd.getpwnam(parsed_args.setuser).pw_uid + os.fchown(outf.fileno(), ownerid, -1) + except OSError as e: + print('Failed to change user ownership of {0} to {1}: {2}'.format(outf, parsed_args.setuser, e)) + return 1 + if parsed_args.setgroup: + try: + groupid = grp.getgrnam(parsed_args.setgroup).gr_gid + os.fchown(outf.fileno(), -1, groupid) + except OSError as e: + print('Failed to change group ownership of {0} to {1}: {2}'.format(outf, parsed_args.setgroup, e)) + return 1 # -s behaves like a command (ceph status). if parsed_args.status: diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc index 9f78b24a5..0b240cbf7 100644 --- a/ceph/src/client/Client.cc +++ b/ceph/src/client/Client.cc @@ -909,9 +909,9 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from, return in; // as with readdir returning indoes in different snaprealms (no caps!) if (in->snapid == CEPH_NOSNAP) { - add_update_cap(in, session, st->cap.cap_id, st->cap.caps, st->cap.seq, - st->cap.mseq, inodeno_t(st->cap.realm), st->cap.flags, - request_perms); + add_update_cap(in, session, st->cap.cap_id, st->cap.caps, st->cap.wanted, + st->cap.seq, st->cap.mseq, inodeno_t(st->cap.realm), + st->cap.flags, request_perms); if (in->auth_cap && in->auth_cap->session == session) { in->max_size = st->max_size; in->rstat = st->rstat; @@ -2087,9 +2087,11 @@ void Client::handle_client_session(MClientSession *m) case CEPH_SESSION_RENEWCAPS: if (session->cap_renew_seq == m->get_seq()) { + bool was_stale = ceph_clock_now() >= session->cap_ttl; session->cap_ttl = session->last_cap_renew_request + mdsmap->get_session_timeout(); - wake_inode_waiters(session); + if (was_stale) + wake_up_session_caps(session, false); } break; @@ -2106,6 +2108,14 @@ void Client::handle_client_session(MClientSession *m) break; case CEPH_SESSION_FLUSHMSG: + /* flush cap release */ + { + auto& m = session->release; + if (m) { + session->con->send_message(std::move(m)); + m = nullptr; + } + } session->con->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK, m->get_seq())); break; @@ -2703,8 +2713,7 @@ void Client::handle_mds_map(MMDSMap* m) kick_requests(session); kick_flushing_caps(session); signal_context_list(session->waiting_for_open); - kick_maxsize_requests(session); - wake_inode_waiters(session); + wake_up_session_caps(session, true); } connect_mds_targets(mds); } else if (newstate == MDSMap::STATE_NULL && @@ -3255,10 +3264,8 @@ int Client::get_caps(Inode *in, int need, int want, int *phave, loff_t endoff) return ret; continue; } - if ((mds_wanted & file_wanted) == - (file_wanted & (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR))) { + if (!(file_wanted & ~mds_wanted)) in->flags &= ~I_CAP_DROPPED; - } } if (waitfor_caps) @@ -3424,23 +3431,30 @@ void Client::check_caps(Inode *in, unsigned flags) unsigned used = get_caps_used(in); unsigned cap_used; - if (in->is_dir() && (in->flags & I_COMPLETE)) { - // we do this here because we don't want to drop to Fs (and then - // drop the Fs if we do a create!) if that alone makes us send lookups - // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere - wanted |= CEPH_CAP_FILE_EXCL; - } - int implemented; int issued = in->caps_issued(&implemented); int revoking = implemented & ~issued; int retain = wanted | used | CEPH_CAP_PIN; - if (!unmounting) { - if (wanted) + if (!unmounting && in->nlink > 0) { + if (wanted) { retain |= CEPH_CAP_ANY; - else + } else if (in->is_dir() && + (issued & CEPH_CAP_FILE_SHARED) && + (in->flags & I_COMPLETE)) { + // we do this here because we don't want to drop to Fs (and then + // drop the Fs if we do a create!) if that alone makes us send lookups + // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere + wanted = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; + retain |= wanted; + } else { retain |= CEPH_CAP_ANY_SHARED; + // keep RD only if we didn't have the file open RW, + // because then the mds would revoke it anyway to + // journal max_size=0. + if (in->max_size == 0) + retain |= CEPH_CAP_ANY_RD; + } } ldout(cct, 10) << "check_caps on " << *in @@ -3520,9 +3534,8 @@ void Client::check_caps(Inode *in, unsigned flags) if (!revoking && unmounting && (cap_used == 0)) goto ack; - if (wanted == cap->wanted && // mds knows what we want. - ((cap->issued & ~retain) == 0) &&// and we don't have anything we wouldn't like - !in->dirty_caps) // and we have no dirty caps + if ((cap->issued & ~retain) == 0 && // and we don't have anything we wouldn't like + !in->dirty_caps) // and we have no dirty caps continue; if (now < in->hold_caps_until) { @@ -3743,12 +3756,26 @@ void Client::signal_context_list(list& ls) } } -void Client::wake_inode_waiters(MetaSession *s) +void Client::wake_up_session_caps(MetaSession *s, bool reconnect) { xlist::iterator iter = s->caps.begin(); while (!iter.end()){ - signal_cond_list((*iter)->inode->waitfor_caps); + auto cap = *iter; + auto in = cap->inode; ++iter; + if (reconnect) { + in->requested_max_size = 0; + in->wanted_max_size = 0; + } else { + if (cap->gen < s->cap_gen) { + // mds did not re-issue stale cap. + cap->issued = cap->implemented = CEPH_CAP_PIN; + // make sure mds knows what we want. + if (in->caps_file_wanted() & ~cap->wanted) + in->flags |= I_CAP_DROPPED; + } + } + signal_cond_list(in->waitfor_caps); } } @@ -3912,13 +3939,16 @@ void Client::check_cap_issue(Inode *in, Cap *cap, unsigned issued) } void Client::add_update_cap(Inode *in, MetaSession *mds_session, uint64_t cap_id, - unsigned issued, unsigned seq, unsigned mseq, inodeno_t realm, - int flags, const UserPerm& cap_perms) + unsigned issued, unsigned wanted, unsigned seq, unsigned mseq, + inodeno_t realm, int flags, const UserPerm& cap_perms) { Cap *cap = 0; mds_rank_t mds = mds_session->mds_num; - if (in->caps.count(mds)) { - cap = in->caps[mds]; + auto it = in->caps.find(mds); + if (it != in->caps.end()) { + cap = it->second; + if (cap->gen < mds_session->cap_gen) + cap->issued = cap->implemented = CEPH_CAP_PIN; /* * auth mds of the inode changed. we received the cap export @@ -3971,15 +4001,17 @@ void Client::add_update_cap(Inode *in, MetaSession *mds_session, uint64_t cap_id cap->cap_id = cap_id; cap->issued = issued; cap->implemented |= issued; + if (ceph_seq_cmp(mseq, cap->mseq) > 0) + cap->wanted = wanted; + else + cap->wanted |= wanted; cap->seq = seq; cap->issue_seq = seq; cap->mseq = mseq; cap->gen = mds_session->cap_gen; cap->latest_perms = cap_perms; ldout(cct, 10) << "add_update_cap issued " << ccap_string(old_caps) << " -> " << ccap_string(cap->issued) - << " from mds." << mds - << " on " << *in - << dendl; + << " from mds." << mds << " on " << *in << dendl; if ((issued & ~old_caps) && in->auth_cap == cap) { // non-auth MDS is revoking the newly grant caps ? @@ -4055,10 +4087,10 @@ void Client::remove_session_caps(MetaSession *s) dirty_caps = in->dirty_caps | in->flushing_caps; in->wanted_max_size = 0; in->requested_max_size = 0; - in->flags |= I_CAP_DROPPED; } + if (cap->wanted | cap->issued) + in->flags |= I_CAP_DROPPED; remove_cap(cap, false); - signal_cond_list(in->waitfor_caps); if (cap_snaps) { InodeRef tmp_ref(in); in->cap_snaps.clear(); @@ -4073,6 +4105,7 @@ void Client::remove_session_caps(MetaSession *s) in->mark_caps_clean(); put_inode(in); } + signal_cond_list(in->waitfor_caps); } s->flushing_caps_tids.clear(); sync_cond.Signal(); @@ -4425,17 +4458,6 @@ void Client::early_kick_flushing_caps(MetaSession *session) } } -void Client::kick_maxsize_requests(MetaSession *session) -{ - xlist::iterator iter = session->caps.begin(); - while (!iter.end()){ - (*iter)->inode->requested_max_size = 0; - (*iter)->inode->wanted_max_size = 0; - signal_cond_list((*iter)->inode->waitfor_caps); - ++iter; - } -} - void SnapRealm::build_snap_context() { set snaps; @@ -4840,8 +4862,8 @@ void Client::handle_cap_import(MetaSession *session, Inode *in, MClientCaps *m) update_snap_trace(m->snapbl, &realm); add_update_cap(in, session, m->get_cap_id(), - m->get_caps(), m->get_seq(), m->get_mseq(), m->get_realm(), - CEPH_CAP_FLAG_AUTH, cap_perms); + m->get_caps(), m->get_wanted(), m->get_seq(), m->get_mseq(), + m->get_realm(), CEPH_CAP_FLAG_AUTH, cap_perms); if (cap && cap->cap_id == m->peer.cap_id) { remove_cap(cap, (m->peer.flags & CEPH_CAP_FLAG_RELEASE)); @@ -4870,10 +4892,9 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, MClientCaps *m) if (in->caps.count(mds)) cap = in->caps[mds]; - const mds_rank_t peer_mds = mds_rank_t(m->peer.mds); - if (cap && cap->cap_id == m->get_cap_id()) { if (m->peer.cap_id) { + const mds_rank_t peer_mds = mds_rank_t(m->peer.mds); MetaSession *tsession = _get_or_open_mds_session(peer_mds); if (in->caps.count(peer_mds)) { Cap *tcap = in->caps[peer_mds]; @@ -4890,13 +4911,13 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, MClientCaps *m) adjust_session_flushing_caps(in, session, tsession); } } else { - add_update_cap(in, tsession, m->peer.cap_id, cap->issued, + add_update_cap(in, tsession, m->peer.cap_id, cap->issued, 0, m->peer.seq - 1, m->peer.mseq, (uint64_t)-1, cap == in->auth_cap ? CEPH_CAP_FLAG_AUTH : 0, cap->latest_perms); } } else { - if (cap == in->auth_cap) + if (cap->wanted | cap->issued) in->flags |= I_CAP_DROPPED; } @@ -5106,15 +5127,21 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient int used = get_caps_used(in); int wanted = in->caps_wanted(); - const int old_caps = cap->issued; - const int new_caps = m->get_caps(); + const unsigned new_caps = m->get_caps(); + const bool was_stale = session->cap_gen > cap->gen; ldout(cct, 5) << "handle_cap_grant on in " << m->get_ino() << " mds." << mds << " seq " << m->get_seq() << " caps now " << ccap_string(new_caps) - << " was " << ccap_string(old_caps) << dendl; + << " was " << ccap_string(cap->issued) + << (was_stale ? "" : " (stale)") << dendl; + + if (was_stale) + cap->issued = cap->implemented = CEPH_CAP_PIN; cap->seq = m->get_seq(); cap->gen = session->cap_gen; + check_cap_issue(in, cap, new_caps); + // update inode int issued; in->caps_issued(&issued); @@ -5181,13 +5208,21 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient } bool check = false; - if (m->get_op() == CEPH_CAP_OP_IMPORT && m->get_wanted() != wanted) + if ((was_stale || m->get_op() == CEPH_CAP_OP_IMPORT) && + (wanted & ~(cap->wanted | new_caps))) { + // If mds is importing cap, prior cap messages that update 'wanted' + // may get dropped by mds (migrate seq mismatch). + // + // We don't send cap message to update 'wanted' if what we want are + // already issued. If mds revokes caps, cap message that releases caps + // also tells mds what we want. But if caps got revoked by mds forcedly + // (session stale). We may haven't told mds what we want. check = true; + } - check_cap_issue(in, cap, new_caps); // update caps - int revoked = old_caps & ~new_caps; + auto revoked = cap->issued & ~new_caps; if (revoked) { ldout(cct, 10) << " revocation of " << ccap_string(revoked) << dendl; cap->issued = new_caps; @@ -5209,10 +5244,10 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient cap->wanted = 0; // don't let check_caps skip sending a response to MDS check = true; } - } else if (old_caps == new_caps) { - ldout(cct, 10) << " caps unchanged at " << ccap_string(old_caps) << dendl; + } else if (cap->issued == new_caps) { + ldout(cct, 10) << " caps unchanged at " << ccap_string(cap->issued) << dendl; } else { - ldout(cct, 10) << " grant, new caps are " << ccap_string(new_caps & ~old_caps) << dendl; + ldout(cct, 10) << " grant, new caps are " << ccap_string(new_caps & ~cap->issued) << dendl; cap->issued = new_caps; cap->implemented |= new_caps; diff --git a/ceph/src/client/Client.h b/ceph/src/client/Client.h index dd9e70ace..4a304466a 100644 --- a/ceph/src/client/Client.h +++ b/ceph/src/client/Client.h @@ -483,7 +483,7 @@ protected: Mutex client_lock; // helpers - void wake_inode_waiters(MetaSession *s); + void wake_up_session_caps(MetaSession *s, bool reconnect); void wait_on_context_list(list& ls); void signal_context_list(list& ls); @@ -630,8 +630,8 @@ protected: // file caps void check_cap_issue(Inode *in, Cap *cap, unsigned issued); void add_update_cap(Inode *in, MetaSession *session, uint64_t cap_id, - unsigned issued, unsigned seq, unsigned mseq, inodeno_t realm, - int flags, const UserPerm& perms); + unsigned issued, unsigned wanted, unsigned seq, unsigned mseq, + inodeno_t realm, int flags, const UserPerm& perms); void remove_cap(Cap *cap, bool queue_release); void remove_all_caps(Inode *in); void remove_session_caps(MetaSession *session); @@ -641,7 +641,6 @@ protected: void flush_caps(Inode *in, MetaSession *session, bool sync=false); void kick_flushing_caps(MetaSession *session); void early_kick_flushing_caps(MetaSession *session); - void kick_maxsize_requests(MetaSession *session); int get_caps(Inode *in, int need, int want, int *have, loff_t endoff); int get_caps_used(Inode *in); diff --git a/ceph/src/common/AsyncReserver.h b/ceph/src/common/AsyncReserver.h index d5c7a852d..6695b7d29 100644 --- a/ceph/src/common/AsyncReserver.h +++ b/ceph/src/common/AsyncReserver.h @@ -143,6 +143,78 @@ public: do_queues(); } + /** + * Update the priority of a reservation + * + * Note, on_reserved may be called following update_priority. Thus, + * the callback must be safe in that case. Callback will be called + * with no locks held. cancel_reservation must be called to release the + * reservation slot. + * + * Cases + * 1. Item is queued, re-queue with new priority + * 2. Item is queued, re-queue and preempt if new priority higher than an in progress item + * 3. Item is in progress, just adjust priority if no higher priority waiting + * 4. Item is in progress, adjust priority if higher priority items waiting preempt item + * + */ + void update_priority(T item, unsigned newprio) { + Mutex::Locker l(lock); + auto i = queue_pointers.find(item); + if (i != queue_pointers.end()) { + unsigned prio = i->second.first; + if (newprio == prio) + return; + Reservation r = *i->second.second; + rdout(10) << __func__ << " update " << r << " (was queued)" << dendl; + // Like cancel_reservation() without preempting + queues[prio].erase(i->second.second); + if (queues[prio].empty()) { + queues.erase(prio); + } + queue_pointers.erase(i); + + // Like request_reservation() to re-queue it but with new priority + assert(!queue_pointers.count(item) && + !in_progress.count(item)); + r.prio = newprio; + queues[newprio].push_back(r); + queue_pointers.insert(make_pair(item, + make_pair(newprio,--(queues[newprio]).end()))); + } else { + auto p = in_progress.find(item); + if (p != in_progress.end()) { + if (p->second.prio == newprio) + return; + rdout(10) << __func__ << " update " << p->second + << " (in progress)" << dendl; + // We want to preempt if priority goes down + // and smaller then highest priority waiting + if (p->second.preempt) { + if (newprio < p->second.prio && !queues.empty()) { + // choose highest priority queue + auto it = queues.end(); + --it; + assert(!it->second.empty()); + if (it->first > newprio) { + rdout(10) << __func__ << " update " << p->second + << " lowered priority let do_queues() preempt it" << dendl; + } + } + preempt_by_prio.erase(make_pair(p->second.prio, p->second.item)); + p->second.prio = newprio; + preempt_by_prio.insert(make_pair(p->second.prio, p->second.item)); + } else { + p->second.prio = newprio; + } + } else { + rdout(10) << __func__ << " update " << item << " (not found)" << dendl; + } + } + do_queues(); + return; + } + void dump(Formatter *f) { Mutex::Locker l(lock); _dump(f); diff --git a/ceph/src/common/ceph_crypto.cc b/ceph/src/common/ceph_crypto.cc index a0aa8767e..77454fb60 100644 --- a/ceph/src/common/ceph_crypto.cc +++ b/ceph/src/common/ceph_crypto.cc @@ -14,6 +14,7 @@ #include "common/config.h" #include "ceph_crypto.h" +#include "include/scope_guard.h" #ifdef USE_CRYPTOPP void ceph::crypto::init(CephContext *cct) @@ -44,6 +45,124 @@ static uint32_t crypto_refs = 0; static NSSInitContext *crypto_context = NULL; static pid_t crypto_init_pid = 0; +PK11SymKey *ceph::crypto::PK11_ImportSymKey_FIPS( + PK11SlotInfo * const slot, + const CK_MECHANISM_TYPE type, + const PK11Origin origin, + const CK_ATTRIBUTE_TYPE operation, + SECItem * const raw_key, + void * const wincx) +{ + if (PK11_IsFIPS() == PR_FALSE) { + // This isn't the FIPS mode, and thus PK11_ImportSymKey is available. Let's + // make use of it to avoid overhead related to e.g. creating extra PK11Ctx. + PK11SymKey *ret_key = nullptr; + ret_key = PK11_ImportSymKey(slot, type, origin, operation, raw_key, wincx); + + return ret_key; + } + + ceph_assert_always(wincx == nullptr); + + std::vector wrapped_key; + + // getting 306 on my system which is CKM_DES3_ECB. + const CK_MECHANISM_TYPE wrap_mechanism = PK11_GetBestWrapMechanism(slot); + + // Generate a wrapping key. It will be used exactly twice over the scope: + // * to encrypt raw_key giving wrapped_key, + // * to decrypt wrapped_key in the internals of PK11_UnwrapSymKey(). + PK11SymKey * const wrapping_key = PK11_KeyGen( + slot, + wrap_mechanism, + nullptr, + PK11_GetBestKeyLength(slot, wrap_mechanism), + nullptr); + if (wrapping_key == nullptr) { + return nullptr; + } + auto wk_guard = make_scope_guard([wrapping_key] { + PK11_FreeSymKey(wrapping_key); + }); + + // Prepare a PK11 context for the raw_key -> wrapped_key encryption. + SECItem tmp_sec_item; + ::memset(&tmp_sec_item, 0, sizeof(tmp_sec_item)); + PK11Context * const wrap_key_crypt_context = PK11_CreateContextBySymKey( + wrap_mechanism, + CKA_ENCRYPT, + wrapping_key, + &tmp_sec_item); + if (wrap_key_crypt_context == nullptr) { + return nullptr; + } + auto wkcc_guard = make_scope_guard([wrap_key_crypt_context] { + PK11_DestroyContext(wrap_key_crypt_context, PR_TRUE); + }); + + + // Finally wrap the key. Important note is that the wrapping mechanism + // selection (read: just grabbing a cipher) offers, at least in my NSS + // copy, mostly CKM_*_ECB ciphers (with 3DES as the leading one, see + // wrapMechanismList[] in pk11mech.c). There is no CKM_*_*_PAD variant + // which means that plaintext we are providing to PK11_CipherOp() must + // be aligned to cipher's block size. For 3DES it's 64 bits. + { + const auto block_size = PK11_GetBlockSize(wrap_mechanism, nullptr); + SECItem * const raw_key_aligned = PK11_BlockData(raw_key, block_size); + if (raw_key_aligned == nullptr) { + return nullptr; + } + auto rka_guard = make_scope_guard([raw_key_aligned] { + SECITEM_FreeItem(raw_key_aligned, PR_TRUE); + }); + + // PARANOIA: always add space for one extra cipher's block. This seems + // unnecessary at the moment as padding is never used (see the comment + // above) but let's assume it can change in the future. Just in case. + wrapped_key.resize(raw_key_aligned->len + block_size, 0x0); + int out_len = 0; + + int ret = PK11_CipherOp( + wrap_key_crypt_context, + wrapped_key.data(), + &out_len, + wrapped_key.size(), // max space + raw_key_aligned->data, + raw_key_aligned->len); + if (ret != SECSuccess) { + return nullptr; + } + + ret = PK11_Finalize(wrap_key_crypt_context); + if (ret != SECSuccess) { + return nullptr; + } + + ceph_assert(out_len <= static_cast(wrapped_key.size())); + wrapped_key.resize(out_len); + } + + // Key is wrapped now so we can acquire the ultimate PK11SymKey through + // unwrapping it. Of course these two opposite operations form NOP with + // a side effect: FIPS level 1 compatibility. + ::memset(&tmp_sec_item, 0, sizeof(tmp_sec_item)); + + SECItem wrapped_key_item; + ::memset(&wrapped_key_item, 0, sizeof(wrapped_key_item)); + wrapped_key_item.data = wrapped_key.data(); + wrapped_key_item.len = wrapped_key.size(); + + return PK11_UnwrapSymKey( + wrapping_key, + wrap_mechanism, + &tmp_sec_item, + &wrapped_key_item, + type, + operation, + raw_key->len); +} + void ceph::crypto::init(CephContext *cct) { pid_t pid = getpid(); diff --git a/ceph/src/common/ceph_crypto.h b/ceph/src/common/ceph_crypto.h index 9c3023929..c58f1d0b5 100644 --- a/ceph/src/common/ceph_crypto.h +++ b/ceph/src/common/ceph_crypto.h @@ -67,6 +67,20 @@ namespace ceph { // ugly bit of CryptoPP that we have to emulate here :( typedef unsigned char byte; +namespace ceph { + namespace crypto { + // workaround for no PK11_ImportSymKey in FIPS mode + PK11SymKey *PK11_ImportSymKey_FIPS( + PK11SlotInfo *slot, + CK_MECHANISM_TYPE type, + PK11Origin origin, + CK_ATTRIBUTE_TYPE operation, + SECItem *key, + void *wincx); + } // namespace crypto +} // namespace + + namespace ceph { namespace crypto { void assert_init(); @@ -136,8 +150,8 @@ namespace ceph { keyItem.type = siBuffer; keyItem.data = (unsigned char*)key; keyItem.len = length; - symkey = PK11_ImportSymKey(slot, cktype, PK11_OriginUnwrap, - CKA_SIGN, &keyItem, NULL); + symkey = PK11_ImportSymKey_FIPS(slot, cktype, PK11_OriginUnwrap, + CKA_SIGN, &keyItem, NULL); assert(symkey); SECItem param; param.type = siBuffer; diff --git a/ceph/src/common/ceph_timer.h b/ceph/src/common/ceph_timer.h index 4b7438672..8e9330122 100644 --- a/ceph/src/common/ceph_timer.h +++ b/ceph/src/common/ceph_timer.h @@ -138,6 +138,8 @@ namespace ceph { } // Otherwise the event requeued itself } + if (suspended) + break; if (schedule.empty()) cond.wait(l); else diff --git a/ceph/src/common/legacy_config_opts.h b/ceph/src/common/legacy_config_opts.h index 828697758..7dac8782d 100644 --- a/ceph/src/common/legacy_config_opts.h +++ b/ceph/src/common/legacy_config_opts.h @@ -443,7 +443,6 @@ OPTION(mds_session_blacklist_on_timeout, OPT_BOOL) // whether to blacklist cl OPTION(mds_session_blacklist_on_evict, OPT_BOOL) // whether to blacklist clients whose sessions are dropped via admin commands OPTION(mds_sessionmap_keys_per_op, OPT_U32) // how many sessions should I try to load/store in a single OMAP operation? -OPTION(mds_recall_state_timeout, OPT_FLOAT) // detect clients which aren't trimming caps OPTION(mds_freeze_tree_timeout, OPT_FLOAT) // detecting freeze tree deadlock OPTION(mds_health_summarize_threshold, OPT_INT) // collapse N-client health metrics to a single 'many' OPTION(mds_reconnect_timeout, OPT_FLOAT) // seconds to wait for clients during mds restart @@ -1099,6 +1098,7 @@ OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL) OPTION(bluestore_fsck_on_mkfs, OPT_BOOL) OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL) OPTION(bluestore_sync_submit_transaction, OPT_BOOL) // submit kv txn in queueing thread (not kv_sync_thread) +OPTION(bluestore_fsck_read_bytes_cap, OPT_U64) OPTION(bluestore_throttle_bytes, OPT_U64) OPTION(bluestore_throttle_deferred_bytes, OPT_U64) OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64) diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc index 231a7651b..fdbc23312 100644 --- a/ceph/src/common/options.cc +++ b/ceph/src/common/options.cc @@ -444,7 +444,7 @@ std::vector