From eafe8130898c3d7229e1c84c100c2e62e32be0d0 Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Tue, 10 Dec 2019 12:49:26 +0100 Subject: [PATCH] import ceph 14.2.5 Signed-off-by: Thomas Lamprecht --- ceph/.github/pull_request_template.md | 17 +- ceph/CMakeLists.txt | 38 +- ceph/PendingReleaseNotes | 94 +- ceph/admin/build-doc | 5 +- ceph/admin/doc-requirements.txt | 9 +- ceph/alpine/APKBUILD | 6 +- ceph/ceph.spec | 80 +- ceph/ceph.spec.in | 74 +- ceph/changelog.upstream | 10 +- ceph/cmake/modules/BuildDPDK.cmake | 4 +- ceph/cmake/modules/CheckCxxAtomic.cmake | 8 +- ceph/cmake/modules/FindBoost.cmake | 2 +- ceph/cmake/modules/Findgenl.cmake | 23 + ceph/debian/control | 22 +- ceph/doc/cephfs/fs-volumes.rst | 155 + ceph/doc/cephfs/fstab.rst | 7 +- ceph/doc/cephfs/index.rst | 1 + ceph/doc/cephfs/kernel.rst | 14 +- ceph/doc/cephfs/posix.rst | 19 + .../osd_internals/recovery_reservation.rst | 8 + ceph/doc/dev/osd_internals/scrub.rst | 13 +- ceph/doc/man/8/ceph-bluestore-tool.rst | 14 + ceph/doc/man/8/ceph-kvstore-tool.rst | 5 + ceph/doc/man/8/mount.ceph.rst | 22 +- ceph/doc/man/8/rados.rst | 16 +- ceph/doc/man/8/rbd.rst | 4 +- ceph/doc/mgr/crash.rst | 25 +- ceph/doc/mgr/dashboard.rst | 72 +- ceph/doc/mgr/orchestrator_cli.rst | 16 +- ceph/doc/mgr/telemetry.rst | 72 +- .../rados/command/list-inconsistent-obj.json | 3 +- .../configuration/bluestore-config-ref.rst | 17 +- .../rados/configuration/mon-config-ref.rst | 45 +- .../configuration/mon-osd-interaction.rst | 15 +- ceph/doc/rados/operations/balancer.rst | 4 +- ceph/doc/rados/operations/health-checks.rst | 145 +- .../rados/operations/monitoring-osd-pg.rst | 5 + ceph/doc/rados/operations/monitoring.rst | 108 + ceph/doc/rados/operations/pg-states.rst | 4 +- .../doc/rados/operations/placement-groups.rst | 16 +- ceph/doc/radosgw/index.rst | 1 + ceph/doc/radosgw/multisite.rst | 6 +- ceph/doc/radosgw/notifications.rst | 291 + ceph/doc/radosgw/pubsub-module.rst | 446 +- .../radosgw/s3-notification-compatibility.rst | 122 + ceph/doc/radosgw/s3.rst | 3 +- ceph/doc/radosgw/s3/bucketops.rst | 294 + ceph/doc/radosgw/s3/objectops.rst | 100 +- ceph/doc/rbd/qemu-rbd.rst | 2 +- ceph/doc/rbd/rbd-mirroring.rst | 104 +- ceph/doc_deps.deb.txt | 8 +- ceph/install-deps.sh | 39 +- ceph/make-dist | 25 +- .../grafana/dashboards/cephfs-overview.json | 4 +- .../grafana/dashboards/host-details.json | 45 +- .../grafana/dashboards/hosts-overview.json | 8 +- .../dashboards/osd-device-details.json | 28 +- .../grafana/dashboards/osds-overview.json | 6 +- .../grafana/dashboards/pool-overview.json | 18 +- .../grafana/dashboards/radosgw-detail.json | 33 +- .../grafana/dashboards/rbd-overview.json | 6 +- ceph/qa/clusters/2-node-mgr.yaml | 10 + ceph/qa/debug/mgr.yaml | 16 + ceph/qa/rbd/krbd_discard.t | 74 +- ceph/qa/rbd/krbd_discard_4M.t | 74 +- ceph/qa/run-standalone.sh | 2 +- ceph/qa/standalone/ceph-helpers.sh | 51 +- .../erasure-code/test-erasure-eio.sh | 1 - ceph/qa/standalone/mgr/balancer.sh | 125 + ceph/qa/standalone/misc/network-ping.sh | 145 + ceph/qa/standalone/mon/mon-osdmap-prune.sh | 5 - ceph/qa/standalone/mon/osd-pool-create.sh | 5 +- ceph/qa/standalone/osd/divergent-priors.sh | 840 ++ .../qa/standalone/osd/ec-error-rollforward.sh | 2 +- ceph/qa/standalone/osd/osd-backfill-prio.sh | 24 +- ceph/qa/standalone/osd/osd-backfill-space.sh | 15 + .../standalone/osd/osd-bluefs-volume-ops.sh | 32 +- ceph/qa/standalone/osd/osd-dup.sh | 8 +- ceph/qa/standalone/osd/osd-recovery-prio.sh | 24 +- ceph/qa/standalone/osd/osd-recovery-space.sh | 179 + ceph/qa/standalone/osd/osd-rep-recov-eio.sh | 7 +- ceph/qa/standalone/scrub/osd-scrub-dump.sh | 173 + ceph/qa/standalone/scrub/osd-scrub-repair.sh | 246 +- ceph/qa/standalone/scrub/osd-scrub-snaps.sh | 18 +- .../special/ceph_objectstore_tool.py | 20 + .../tasks/client-recovery.yaml | 2 + .../fs/bugs/{ => client_trim_caps}/conf | 0 .../thrash/msgr-failures/osd-mds-delay.yaml | 2 + .../old_client/tasks/2-upgrade.yaml | 40 +- .../upgraded_client/tasks/2-upgrade.yaml | 40 +- .../striping/default/msgr-failures/few.yaml | 2 + .../striping/default/msgr-failures/many.yaml | 2 + .../fsx/striping/fancy/msgr-failures/few.yaml | 2 + .../krbd/rbd-nomount/msgr-failures/few.yaml | 2 + .../krbd/rbd-nomount/msgr-failures/many.yaml | 2 + .../tasks/krbd_udev_enumerate.yaml | 5 + .../tasks/krbd_udev_netlink_enobufs.yaml | 10 + .../qa/suites/krbd/rbd/msgr-failures/few.yaml | 2 + .../suites/krbd/rbd/msgr-failures/many.yaml | 2 + .../krbd/singleton/msgr-failures/few.yaml | 2 + .../krbd/singleton/msgr-failures/many.yaml | 2 + ceph/qa/suites/krbd/unmap/ceph/ceph.yaml | 5 + .../krbd/wac/wac/verify/many-resets.yaml | 2 + .../suites/rados/basic/msgr-failures/few.yaml | 2 + .../rados/basic/msgr-failures/many.yaml | 2 + ceph/qa/suites/rados/dashboard/% | 0 ceph/qa/suites/rados/dashboard/.qa | 1 + ceph/qa/suites/rados/dashboard/clusters/+ | 0 ceph/qa/suites/rados/dashboard/clusters/.qa | 1 + .../rados/dashboard/clusters/2-node-mgr.yaml | 1 + ceph/qa/suites/rados/dashboard/debug/.qa | 1 + ceph/qa/suites/rados/dashboard/debug/mgr.yaml | 1 + ceph/qa/suites/rados/dashboard/objectstore | 1 + .../rados/dashboard/supported-random-distro$ | 1 + ceph/qa/suites/rados/dashboard/tasks/.qa | 1 + .../{mgr => dashboard}/tasks/dashboard.yaml | 1 + .../suites/rados/mgr/clusters/2-node-mgr.yaml | 7 +- .../suites/rados/mgr/clusters/openstack.yaml | 4 - ceph/qa/suites/rados/mgr/debug/mgr.yaml | 17 +- ceph/qa/suites/rados/mgr/tasks/crash.yaml | 1 + ceph/qa/suites/rados/mgr/tasks/insights.yaml | 1 + .../rados/mgr/tasks/module_selftest.yaml | 1 + .../rados/monthrash/msgr-failures/few.yaml | 2 + .../monthrash/msgr-failures/mon-delay.yaml | 2 + .../rados/multimon/msgr-failures/few.yaml | 2 + .../rados/multimon/msgr-failures/many.yaml | 2 + .../msgr-failures/few.yaml | 2 + .../msgr-failures/many.yaml | 2 + .../rados/singleton-nomsgr/all/balancer.yaml | 10 + ...mon-memory-target-compliance.yaml.disabled | 152 + .../rados/singleton/all/test-crash.yaml | 1 + .../rados/singleton/msgr-failures/few.yaml | 2 + .../rados/singleton/msgr-failures/many.yaml | 2 + .../rados/standalone/workloads/mgr.yaml | 18 + .../msgr-failures/fastclose.yaml | 2 + .../thrash-old-clients/msgr-failures/few.yaml | 2 + .../msgr-failures/osd-delay.yaml | 2 + .../rados/thrash/msgr-failures/fastclose.yaml | 2 + .../rados/thrash/msgr-failures/few.yaml | 2 + .../rados/thrash/msgr-failures/osd-delay.yaml | 2 + .../rados/verify/msgr-failures/few.yaml | 2 + .../suites/rbd/basic/msgr-failures/few.yaml | 2 + ceph/qa/suites/rbd/cli/msgr-failures/few.yaml | 2 + .../suites/rbd/cli_v1/msgr-failures/few.yaml | 2 + .../rbd-mirror-bootstrap-workunit.yaml | 11 + .../qa/suites/rbd/qemu/msgr-failures/few.yaml | 1 + .../suites/rbd/thrash/msgr-failures/few.yaml | 2 + .../realms/three-zone-plus-pubsub.yaml | 23 + .../suites/rgw/verify/msgr-failures/few.yaml | 2 + .../suites/smoke/basic/tasks/mon_thrash.yaml | 1 + .../suites/smoke/basic/tasks/rados_bench.yaml | 1 + ceph/qa/suites/smoke/basic/tasks/rbd_fsx.yaml | 1 + .../suites/tgt/basic/msgr-failures/few.yaml | 2 + .../suites/tgt/basic/msgr-failures/many.yaml | 2 + ceph/qa/tasks/cbt.py | 15 + ceph/qa/tasks/ceph.conf.template | 4 + ceph/qa/tasks/ceph.py | 14 +- ceph/qa/tasks/cephfs/fuse_mount.py | 1 + ceph/qa/tasks/cephfs/kernel_mount.py | 64 +- ceph/qa/tasks/cephfs/mount.py | 8 + ceph/qa/tasks/cephfs/test_client_recovery.py | 40 + ceph/qa/tasks/cephfs/test_failover.py | 8 +- ceph/qa/tasks/cephfs/test_volume_client.py | 89 +- ceph/qa/tasks/cephfs/test_volumes.py | 195 +- ceph/qa/tasks/kclient.py | 8 - .../qa/tasks/mgr/dashboard/test_mgr_module.py | 69 +- ceph/qa/tasks/mgr/dashboard/test_pool.py | 5 +- ceph/qa/tasks/mgr/dashboard/test_rbd.py | 6 +- ceph/qa/tasks/mgr/dashboard/test_rgw.py | 25 +- ceph/qa/tasks/mgr/test_dashboard.py | 44 + ceph/qa/tasks/mgr/test_insights.py | 23 - ceph/qa/tasks/mgr/test_module_selftest.py | 4 + ceph/qa/tasks/rgw_multisite.py | 10 +- ceph/qa/tasks/rgw_multisite_tests.py | 13 +- ceph/qa/tasks/vstart_runner.py | 7 +- ceph/qa/valgrind.supp | 16 +- ceph/qa/workunits/ceph-helpers-root.sh | 51 + ceph/qa/workunits/cephtool/test.sh | 56 + ceph/qa/workunits/mon/pool_ops.sh | 10 + ceph/qa/workunits/rados/test_crash.sh | 8 +- .../rados/test_envlibrados_for_rocksdb.sh | 27 +- .../qa/workunits/rados/test_librados_build.sh | 4 +- ceph/qa/workunits/rbd/cli_generic.sh | 10 + ceph/qa/workunits/rbd/krbd_udev_enumerate.sh | 66 + .../rbd/krbd_udev_netlink_enobufs.sh | 24 + ceph/qa/workunits/rbd/rbd_mirror.sh | 29 + ceph/qa/workunits/rbd/rbd_mirror_bootstrap.sh | 49 + ceph/qa/workunits/rbd/rbd_mirror_helpers.sh | 75 +- ceph/src/.git_version | 4 +- ceph/src/CMakeLists.txt | 6 +- ceph/src/auth/cephx/CephxClientHandler.cc | 26 +- ceph/src/auth/cephx/CephxProtocol.cc | 108 +- ceph/src/auth/cephx/CephxServiceHandler.cc | 27 +- ceph/src/auth/cephx/CephxSessionHandler.cc | 54 +- ceph/src/ceph-crash.in | 39 +- ceph/src/ceph-volume/ceph_volume/api/lvm.py | 1392 +-- .../ceph-volume/ceph_volume/configuration.py | 28 +- .../ceph_volume/devices/lvm/listing.py | 19 +- .../ceph_volume/devices/lvm/zap.py | 44 +- .../ceph-volume/ceph_volume/systemd/main.py | 2 +- .../ceph_volume/tests/api/test_lvm.py | 17 +- .../ceph-volume/ceph_volume/tests/conftest.py | 10 + .../devices/lvm/strategies/test_bluestore.py | 4 +- .../tests/devices/lvm/test_listing.py | 17 + .../ceph_volume/tests/devices/lvm/test_zap.py | 40 + .../tests/functional/playbooks/deploy.yml | 14 + .../ceph_volume/tests/test_configuration.py | 10 +- .../ceph_volume/tests/test_inventory.py | 4 +- .../ceph_volume/tests/util/test_device.py | 28 +- .../ceph_volume/tests/util/test_system.py | 62 + .../ceph-volume/ceph_volume/util/device.py | 4 +- .../ceph_volume/util/encryption.py | 3 +- .../ceph-volume/ceph_volume/util/system.py | 40 +- ceph/src/ceph-volume/shell_tox.ini | 11 + ceph/src/ceph-volume/tox.ini | 1 + ceph/src/ceph.in | 19 +- ceph/src/client/Client.cc | 294 +- ceph/src/client/Client.h | 9 +- ceph/src/client/Inode.h | 1 + ceph/src/client/MetaSession.h | 1 - ceph/src/cls/rbd/cls_rbd.cc | 17 +- ceph/src/cls/rbd/cls_rbd_client.cc | 4 +- ceph/src/cls/rbd/cls_rbd_client.h | 3 +- ceph/src/cls/rgw/cls_rgw_types.cc | 4 +- ceph/src/cls/rgw/cls_rgw_types.h | 2 +- ceph/src/cls/user/cls_user.cc | 30 +- ceph/src/common/CMakeLists.txt | 6 +- ceph/src/common/Checksummer.h | 21 +- ceph/src/common/Formatter.cc | 26 + ceph/src/common/Formatter.h | 2 + ceph/src/common/PriorityCache.cc | 349 +- ceph/src/common/PriorityCache.h | 86 +- ceph/src/common/Thread.h | 15 +- ceph/src/common/WorkQueue.h | 7 +- ceph/src/common/admin_socket.cc | 102 +- ceph/src/common/admin_socket.h | 1 + ceph/src/common/ceph_context.cc | 3 + ceph/src/common/config_proxy.h | 84 +- ceph/src/common/legacy_config_opts.h | 19 +- ceph/src/common/options.cc | 109 +- ceph/src/common/perf_counters.cc | 4 +- ceph/src/common/perf_counters.h | 12 +- ceph/src/common/safe_io.c | 5 +- ceph/src/common/safe_io.h | 5 +- ceph/src/common/scrub_types.h | 3 + ceph/src/common/secret.c | 4 +- ceph/src/common/simple_cache.hpp | 39 + ceph/src/common/subsys.h | 1 + ceph/src/crimson/os/Transaction.h | 114 +- ceph/src/crimson/os/cyan_store.cc | 2 +- ceph/src/dmclock/CMakeLists.txt | 8 +- ceph/src/global/global_init.cc | 31 +- ceph/src/include/byteorder.h | 18 +- ceph/src/include/ceph_fs.h | 28 + ceph/src/include/compat.h | 3 + ceph/src/include/config-h.in.cmake | 2 +- ceph/src/include/denc.h | 14 +- ceph/src/include/msgr.h | 13 + ceph/src/include/rados.h | 13 + ceph/src/include/rados/librados.hpp | 3 + ceph/src/include/rados/rados_types.hpp | 7 +- ceph/src/include/rados/rgw_file.h | 4 +- ceph/src/include/rbd/librbd.h | 18 + ceph/src/include/rbd/librbd.hpp | 12 + ceph/src/include/rbd_types.h | 21 +- ceph/src/include/types.h | 10 - ceph/src/krbd.cc | 390 +- ceph/src/kv/CMakeLists.txt | 13 +- ceph/src/kv/RocksDBStore.cc | 2 + ceph/src/kv/rocksdb_cache/BinnedLRUCache.cc | 14 +- ceph/src/librados/librados_cxx.cc | 14 + ceph/src/librbd/CMakeLists.txt | 1 + ceph/src/librbd/DeepCopyRequest.cc | 12 + ceph/src/librbd/ImageCtx.cc | 18 +- ceph/src/librbd/ImageCtx.h | 2 +- ceph/src/librbd/api/DiffIterate.cc | 6 + ceph/src/librbd/api/Image.cc | 20 +- ceph/src/librbd/api/Image.h | 2 + ceph/src/librbd/api/Migration.cc | 3 +- ceph/src/librbd/api/Mirror.cc | 503 +- ceph/src/librbd/api/Mirror.h | 8 + ceph/src/librbd/api/Snapshot.cc | 2 +- ceph/src/librbd/api/Trash.cc | 46 +- ceph/src/librbd/api/Trash.h | 7 +- .../cache/ObjectCacherObjectDispatch.cc | 1 + .../src/librbd/deep_copy/ObjectCopyRequest.cc | 2 + ceph/src/librbd/image/AttachChildRequest.cc | 3 +- ceph/src/librbd/image/DetachChildRequest.cc | 2 + ceph/src/librbd/image/OpenRequest.cc | 17 +- ceph/src/librbd/image/PreRemoveRequest.cc | 48 +- ceph/src/librbd/image/PreRemoveRequest.h | 19 +- ceph/src/librbd/image/RefreshRequest.cc | 8 +- ceph/src/librbd/image/RemoveRequest.cc | 5 + ceph/src/librbd/image/RemoveRequest.h | 2 +- ceph/src/librbd/io/CopyupRequest.cc | 1 + ceph/src/librbd/io/ImageRequestWQ.cc | 9 + ceph/src/librbd/io/ObjectRequest.cc | 1 + ceph/src/librbd/librbd.cc | 92 +- ceph/src/librbd/operation/ObjectMapIterate.cc | 11 + ceph/src/librbd/operation/ResizeRequest.cc | 4 +- .../librbd/operation/SnapshotCreateRequest.cc | 9 + .../librbd/operation/SnapshotRemoveRequest.cc | 5 + ceph/src/librbd/operation/SparsifyRequest.cc | 5 + ceph/src/librbd/operation/TrimRequest.cc | 9 + ceph/src/librbd/trash/RemoveRequest.cc | 171 + ceph/src/librbd/trash/RemoveRequest.h | 119 + ceph/src/mds/CInode.h | 5 +- ceph/src/mds/InoTable.cc | 15 +- ceph/src/mds/Locker.cc | 52 +- ceph/src/mds/MDBalancer.cc | 12 +- ceph/src/mds/MDCache.cc | 199 +- ceph/src/mds/MDCache.h | 58 +- ceph/src/mds/MDSContext.cc | 1 + ceph/src/mds/MDSRank.cc | 13 +- ceph/src/mds/MDSRank.h | 7 + ceph/src/mds/SnapRealm.h | 7 +- ceph/src/mds/locks.c | 11 +- ceph/src/mds/mdstypes.h | 12 +- ceph/src/messages/MBackfillReserve.h | 22 +- ceph/src/messages/MMonSubscribe.h | 4 +- ceph/src/mgr/ActivePyModules.cc | 29 +- ceph/src/mgr/BaseMgrModule.cc | 8 +- ceph/src/mgr/BaseMgrStandbyModule.cc | 2 + ceph/src/mgr/ClusterState.cc | 198 +- ceph/src/mgr/ClusterState.h | 7 +- ceph/src/mgr/DaemonHealthMetricCollector.cc | 17 + ceph/src/mgr/DaemonHealthMetricCollector.h | 2 +- ceph/src/mgr/DaemonServer.cc | 7 +- ceph/src/mgr/DaemonState.h | 4 + ceph/src/mgr/Mgr.cc | 2 + ceph/src/mgr/PyModuleRegistry.cc | 4 +- ceph/src/mgr/PyOSDMap.cc | 2 + ceph/src/mon/CMakeLists.txt | 5 +- ceph/src/mon/MonCap.cc | 19 +- ceph/src/mon/MonClient.cc | 48 +- ceph/src/mon/MonClient.h | 1 + ceph/src/mon/MonCommands.h | 2 +- ceph/src/mon/MonMap.cc | 11 +- ceph/src/mon/Monitor.cc | 2 +- ceph/src/mon/MonitorDBStore.h | 8 +- ceph/src/mon/OSDMonitor.cc | 451 +- ceph/src/mon/OSDMonitor.h | 33 +- ceph/src/mon/PGMap.cc | 164 +- ceph/src/mount/CMakeLists.txt | 6 +- ceph/src/mount/conf.cc | 95 + ceph/src/mount/mount.ceph.c | 455 +- ceph/src/mount/mount.ceph.h | 44 + ceph/src/msg/async/ProtocolV1.cc | 3 + ceph/src/msg/async/ProtocolV2.cc | 15 +- ceph/src/msg/async/frames_v2.h | 8 +- ceph/src/msg/msg_types.h | 32 +- ceph/src/msg/simple/Pipe.cc | 1 + ceph/src/msg/xio/XioMsg.h | 8 +- ceph/src/os/CMakeLists.txt | 5 +- ceph/src/os/ObjectStore.cc | 2 +- ceph/src/os/ObjectStore.h | 117 +- ceph/src/os/bluestore/Allocator.cc | 156 +- ceph/src/os/bluestore/Allocator.h | 13 +- ceph/src/os/bluestore/BitmapAllocator.cc | 14 +- ceph/src/os/bluestore/BitmapAllocator.h | 3 +- ceph/src/os/bluestore/BlueFS.cc | 410 +- ceph/src/os/bluestore/BlueFS.h | 15 + ceph/src/os/bluestore/BlueStore.cc | 2322 +++-- ceph/src/os/bluestore/BlueStore.h | 167 +- ceph/src/os/bluestore/KernelDevice.cc | 18 +- ceph/src/os/bluestore/KernelDevice.h | 4 +- ceph/src/os/bluestore/StupidAllocator.cc | 14 +- ceph/src/os/bluestore/StupidAllocator.h | 3 +- ceph/src/os/bluestore/bluefs_types.h | 1 + ceph/src/os/bluestore/bluestore_tool.cc | 74 +- ceph/src/os/bluestore/bluestore_types.h | 6 +- .../os/bluestore/fastbmap_allocator_impl.cc | 85 + .../os/bluestore/fastbmap_allocator_impl.h | 7 + ceph/src/os/filestore/DBObjectMap.h | 4 + ceph/src/os/filestore/FileStore.cc | 4 +- ceph/src/os/filestore/HashIndex.cc | 7 +- ceph/src/osd/DynamicPerfStats.h | 11 +- ceph/src/osd/OSD.cc | 462 +- ceph/src/osd/OSD.h | 45 +- ceph/src/osd/OSDCap.cc | 2 +- ceph/src/osd/OSDMap.cc | 111 +- ceph/src/osd/OSDMap.h | 16 +- ceph/src/osd/PG.cc | 109 +- ceph/src/osd/PG.h | 38 +- ceph/src/osd/PGBackend.cc | 24 +- ceph/src/osd/PGBackend.h | 2 +- ceph/src/osd/PGLog.cc | 16 +- ceph/src/osd/PGLog.h | 54 +- ceph/src/osd/PGPeeringEvent.h | 2 +- ceph/src/osd/PrimaryLogPG.cc | 14 +- ceph/src/osd/PrimaryLogPG.h | 2 +- ceph/src/osd/ReplicatedBackend.cc | 3 +- ceph/src/osd/osd_types.cc | 134 +- ceph/src/osd/osd_types.h | 15 + ceph/src/pybind/ceph_volume_client.py | 8 +- ceph/src/pybind/cephfs/cephfs.pyx | 66 +- ceph/src/pybind/mgr/balancer/module.py | 55 +- ceph/src/pybind/mgr/crash/module.py | 234 +- ceph/src/pybind/mgr/dashboard/CMakeLists.txt | 35 +- ceph/src/pybind/mgr/dashboard/HACKING.rst | 31 +- ceph/src/pybind/mgr/dashboard/__init__.py | 1 + .../mgr/dashboard/controllers/__init__.py | 36 +- .../pybind/mgr/dashboard/controllers/auth.py | 1 - .../pybind/mgr/dashboard/controllers/home.py | 115 + .../pybind/mgr/dashboard/controllers/iscsi.py | 258 +- .../pybind/mgr/dashboard/controllers/pool.py | 47 +- .../pybind/mgr/dashboard/controllers/rgw.py | 22 +- .../pybind/mgr/dashboard/controllers/saml2.py | 3 +- .../mgr/dashboard/frontend/angular.json | 100 +- .../frontend/dist/2.23eee776b9ebe5035afa.js | 1 - .../frontend/dist/6.2177204066439fcd8170.js | 1 - .../frontend/dist/7.a311be12e7802be9438f.js | 1 - .../frontend/dist/8.0ff37488868257bf7364.js | 1 - .../frontend/dist/9.f4b21c687db5d9cef117.js | 1 - .../dist/en-US/2.03f9acb8077098945447.js | 1 + .../dist/{ => en-US}/3rdpartylicenses.txt | 0 .../dist/en-US/6.3d63c004c315603efec1.js | 1 + .../dist/en-US/7.05e89b87dac0b3fdcc5c.js | 1 + .../dist/en-US/8.4a6a002d073ab0275cb2.js | 1 + .../dist/en-US/9.383a20140e10eb51367c.js | 1 + ...120411_fa_228x228.1ed169ccc35367a2dab2.png | Bin .../assets/1280px-Mimic_Octopus2.jpg | Bin .../assets/1280px-Nautilus_Octopus.jpg | Bin ...eph_Logo_Stacked_RGB_120411_fa_228x228.png | Bin ...eph_Logo_Stacked_RGB_120411_fa_348x348.png | Bin ...go_Stacked_RGB_White_120411_fa_256x256.png | Bin ...Ceph_Logo_Standard_RGB_White_120411_fa.png | Bin .../dist/{ => en-US}/assets/loading.gif | Bin .../dist/{ => en-US}/assets/logo-mini.png | Bin .../{ => en-US}/assets/notification-icons.png | Bin .../{ => en-US}/assets/prometheus_logo.svg | 0 .../common.8a53d98b04768bd15706.js | 0 .../frontend/dist/{ => en-US}/favicon.ico | Bin ...rkawesome-webfont.35e77a38ca9d85c4e897.eot | Bin ...awesome-webfont.3a9e014c2469ffa65a0e.woff2 | Bin ...kawesome-webfont.44bbdbbfb5a10ba2d1ce.woff | Bin ...rkawesome-webfont.78dcc9c4999659b8026a.svg | 0 ...rkawesome-webfont.fc46f3dae03b2b2e1cee.ttf | Bin ...lflings-regular.448c34a56d699c29117a.woff2 | Bin ...halflings-regular.89889688147bd7575d63.svg | 0 ...halflings-regular.e18bbf611f2a2e43afc0.ttf | Bin ...halflings-regular.f4769f9bdb7466be6508.eot | Bin ...alflings-regular.fa2772327f55d8198301.woff | Bin .../frontend/dist/{ => en-US}/index.html | 4 +- .../dist/en-US/main.4cca9699be2afd74460d.js | 1 + .../polyfills.f31db31652a3fd9f4bca.js | 0 .../prometheus_logo.074db273ef932a67d91b.svg | 0 .../en-US/runtime.e449ada3d1974ef4aa64.js | 1 + .../scripts.fc88ef4a23399c760d0b.js | 0 .../styles.f5317b15474518dffebc.css} | 2 +- .../dist/main.7ef9e458bdf2879bd531.js | 1 - .../dist/runtime.c2fa00b158659de6ccaa.js | 1 - .../frontend/e2e/block/images.e2e-spec.ts | 2 +- .../frontend/e2e/block/iscsi.e2e-spec.ts | 2 +- .../frontend/e2e/block/mirroring.e2e-spec.ts | 2 +- .../frontend/e2e/cluster/alerts.e2e-spec.ts | 2 +- .../e2e/cluster/configuration.e2e-spec.ts | 2 +- .../e2e/cluster/crush-map.e2e-spec.ts | 2 +- .../frontend/e2e/cluster/hosts.e2e-spec.ts | 2 +- .../frontend/e2e/cluster/logs.e2e-spec.ts | 2 +- .../e2e/cluster/mgr-modules.e2e-spec.ts | 2 +- .../frontend/e2e/cluster/monitors.e2e-spec.ts | 2 +- .../frontend/e2e/cluster/osds.e2e-spec.ts | 2 +- .../e2e/filesystems/filesystems.e2e-spec.ts | 2 +- .../mgr/dashboard/frontend/e2e/helper.po.ts | 10 +- .../frontend/e2e/nfs/nfs.e2e-spec.ts | 2 +- .../frontend/e2e/pools/pools.e2e-spec.ts | 2 +- .../dashboard/frontend/environment.build.js | 11 + .../mgr/dashboard/frontend/i18n.config.json | 12 + .../mgr/dashboard/frontend/package-lock.json | 5602 +++++------ .../mgr/dashboard/frontend/package.json | 30 +- .../frontend/src/app/app-routing.module.ts | 12 +- .../dashboard/frontend/src/app/app.module.ts | 28 +- .../src/app/ceph/block/block.module.ts | 6 +- .../iscsi-setting.component.html | 58 + .../iscsi-setting.component.scss | 0 .../iscsi-setting.component.spec.ts | 37 + .../iscsi-setting/iscsi-setting.component.ts | 31 + .../iscsi-target-details.component.ts | 83 +- .../iscsi-target-form.component.html | 148 +- .../iscsi-target-form.component.spec.ts | 89 +- .../iscsi-target-form.component.ts | 158 +- ...target-image-settings-modal.component.html | 126 +- ...get-image-settings-modal.component.spec.ts | 44 +- ...i-target-image-settings-modal.component.ts | 56 +- ...i-target-iqn-settings-modal.component.html | 30 +- ...arget-iqn-settings-modal.component.spec.ts | 22 +- ...csi-target-iqn-settings-modal.component.ts | 15 +- .../iscsi-target-list.component.html | 1 + .../iscsi-target-list.component.spec.ts | 30 +- .../iscsi-target-list.component.ts | 37 +- .../pool-list/pool-list.component.ts | 1 + .../block/rbd-form/rbd-form.component.spec.ts | 115 +- .../ceph/block/rbd-form/rbd-form.component.ts | 43 +- .../block/rbd-list/rbd-list.component.spec.ts | 35 +- .../ceph/block/rbd-list/rbd-list.component.ts | 7 +- .../rbd-snapshot-actions.model.ts | 19 +- .../rbd-snapshot-list.component.spec.ts | 24 +- .../rbd-snapshot-list.component.ts | 7 +- .../rbd-trash-list.component.spec.ts | 14 +- .../rbd-trash-list.component.ts | 9 +- .../cephfs-chart/cephfs-chart.component.html | 6 +- .../cephfs-chart.component.spec.ts | 50 + .../cephfs-chart/cephfs-chart.component.ts | 238 +- .../configuration/configuration.component.ts | 6 +- .../osd/osd-list/osd-list.component.ts | 22 +- .../silence-list/silence-list.component.ts | 1 + .../dashboard/health/health.component.spec.ts | 3 +- .../ceph/dashboard/health/health.component.ts | 22 +- .../ceph/nfs/nfs-form/nfs-form.component.html | 24 +- .../nfs/nfs-form/nfs-form.component.spec.ts | 10 +- .../ceph/nfs/nfs-form/nfs-form.component.ts | 40 +- .../nfs/nfs-list/nfs-list.component.spec.ts | 33 +- .../ceph/nfs/nfs-list/nfs-list.component.ts | 28 +- .../pool-details/pool-details.component.html | 2 +- .../pool-details.component.spec.ts | 20 +- .../pool-details/pool-details.component.ts | 5 + .../pool/pool-form/pool-form.component.html | 4 - .../pool-form/pool-form.component.spec.ts | 36 +- .../pool/pool-form/pool-form.component.ts | 9 +- .../pool-list/pool-list.component.spec.ts | 185 +- .../pool/pool-list/pool-list.component.ts | 12 +- .../frontend/src/app/ceph/pool/pool.ts | 7 +- .../rgw-bucket-list.component.ts | 1 + .../rgw-user-list/rgw-user-list.component.ts | 1 + .../auth/role-list/role-list.component.ts | 1 + .../sso-not-found.component.html | 2 +- .../auth/user-list/user-list.component.ts | 1 + .../core/not-found/not-found.component.html | 2 +- .../frontend/src/app/locale.helper.ts | 122 - .../src/app/shared/api/iscsi.service.ts | 55 +- ...critical-confirmation-modal.component.html | 14 +- ...critical-confirmation-modal.component.scss | 4 +- .../critical-confirmation-modal.component.ts | 1 + .../language-selector.component.spec.ts | 7 +- .../language-selector.component.ts | 25 +- .../src/app/shared/constants/app.constants.ts | 31 +- .../datatable/table/table.component.html | 2 +- .../shared/services/language.service.spec.ts | 34 + .../app/shared/services/language.service.ts | 24 + .../services/module-status-guard.service.ts | 2 +- .../shared/services/task-list.service.spec.ts | 25 +- .../app/shared/services/task-list.service.ts | 7 +- .../src/environments/environment.tpl.ts | 1 + .../frontend/src/locale/messages.en-US.xlf | 8390 +++++++++++++++++ .../pybind/mgr/dashboard/frontend/src/main.ts | 5 +- .../mgr/dashboard/frontend/src/styles.scss | 3 +- .../frontend/src/testing/unit-test-helper.ts | 10 + ceph/src/pybind/mgr/dashboard/module.py | 108 +- .../mgr/dashboard/run-backend-api-tests.sh | 166 +- .../mgr/dashboard/run-frontend-unittests.sh | 4 +- .../mgr/dashboard/services/access_control.py | 6 + .../mgr/dashboard/services/iscsi_client.py | 34 +- .../pybind/mgr/dashboard/tests/__init__.py | 22 +- .../pybind/mgr/dashboard/tests/test_home.py | 33 + .../pybind/mgr/dashboard/tests/test_iscsi.py | 84 +- .../pybind/mgr/dashboard/tests/test_pool.py | 117 + ceph/src/pybind/mgr/dashboard/tools.py | 7 +- ceph/src/pybind/mgr/dashboard/tox.ini | 2 +- ceph/src/pybind/mgr/devicehealth/module.py | 23 +- ceph/src/pybind/mgr/k8sevents/README.md | 81 + ceph/src/pybind/mgr/k8sevents/__init__.py | 1 + ceph/src/pybind/mgr/k8sevents/module.py | 1435 +++ .../src/pybind/mgr/k8sevents/rbac_sample.yaml | 45 + ceph/src/pybind/mgr/mgr_module.py | 3 +- ceph/src/pybind/mgr/mgr_util.py | 70 + .../src/pybind/mgr/orchestrator_cli/module.py | 8 +- ceph/src/pybind/mgr/pg_autoscaler/module.py | 9 +- ceph/src/pybind/mgr/prometheus/module.py | 9 +- ceph/src/pybind/mgr/rbd_support/module.py | 8 +- ceph/src/pybind/mgr/restful/api/request.py | 8 + ceph/src/pybind/mgr/restful/common.py | 15 +- ceph/src/pybind/mgr/telemetry/module.py | 665 +- ceph/src/pybind/mgr/volumes/fs/purge_queue.py | 35 +- ceph/src/pybind/mgr/volumes/fs/subvolspec.py | 16 +- ceph/src/pybind/mgr/volumes/fs/subvolume.py | 97 +- ceph/src/pybind/mgr/volumes/fs/volume.py | 119 +- ceph/src/pybind/mgr/volumes/module.py | 60 +- ceph/src/pybind/mgr/zabbix/module.py | 4 +- .../src/pybind/mgr/zabbix/zabbix_template.xml | 6 +- ceph/src/pybind/rados/rados.pyx | 4 +- ceph/src/pybind/rbd/rbd.pyx | 148 +- ceph/src/pybind/rgw/rgw.pyx | 15 +- ceph/src/rgw/CMakeLists.txt | 8 +- ceph/src/rgw/rgw_admin.cc | 80 +- ceph/src/rgw/rgw_aio_throttle.cc | 4 + ceph/src/rgw/rgw_amqp.cc | 165 +- ceph/src/rgw/rgw_amqp.h | 8 + ceph/src/rgw/rgw_arn.cc | 385 + ceph/src/rgw/rgw_arn.h | 121 + ceph/src/rgw/rgw_asio_frontend.cc | 26 +- ceph/src/rgw/rgw_auth_s3.cc | 3 +- ceph/src/rgw/rgw_basic_types.cc | 12 +- ceph/src/rgw/rgw_basic_types.h | 1 + ceph/src/rgw/rgw_bucket.cc | 32 +- ceph/src/rgw/rgw_bucket.h | 2 - ceph/src/rgw/rgw_common.cc | 9 +- ceph/src/rgw/rgw_common.h | 83 +- ceph/src/rgw/rgw_compression.cc | 2 +- ceph/src/rgw/rgw_coroutine.cc | 7 +- ceph/src/rgw/rgw_cr_rados.cc | 2 +- ceph/src/rgw/rgw_cr_rados.h | 3 + ceph/src/rgw/rgw_data_sync.cc | 39 +- ceph/src/rgw/rgw_file.cc | 73 +- ceph/src/rgw/rgw_file.h | 61 +- ceph/src/rgw/rgw_http_client.cc | 54 +- ceph/src/rgw/rgw_http_client.h | 7 +- ceph/src/rgw/rgw_iam_policy.cc | 361 +- ceph/src/rgw/rgw_iam_policy.h | 141 +- ceph/src/rgw/rgw_json_enc.cc | 24 +- ceph/src/rgw/rgw_lc.cc | 72 +- ceph/src/rgw/rgw_lc.h | 2 +- ceph/src/rgw/rgw_main.cc | 17 +- ceph/src/rgw/rgw_metadata.cc | 10 +- ceph/src/rgw/rgw_notify.cc | 137 + ceph/src/rgw/rgw_notify.h | 24 + ceph/src/rgw/rgw_notify_event_type.cc | 82 + ceph/src/rgw/rgw_notify_event_type.h | 35 + ceph/src/rgw/rgw_object_lock.cc | 96 + ceph/src/rgw/rgw_object_lock.h | 221 + ceph/src/rgw/rgw_op.cc | 526 +- ceph/src/rgw/rgw_op.h | 126 +- ceph/src/rgw/rgw_perf_counters.cc | 1 + ceph/src/rgw/rgw_perf_counters.h | 1 + ceph/src/rgw/rgw_pubsub.cc | 480 +- ceph/src/rgw/rgw_pubsub.h | 496 +- ceph/src/rgw/rgw_pubsub_push.cc | 322 +- ceph/src/rgw/rgw_pubsub_push.h | 21 +- ceph/src/rgw/rgw_rados.cc | 81 +- ceph/src/rgw/rgw_rados.h | 7 +- ceph/src/rgw/rgw_realm_watcher.cc | 6 +- ceph/src/rgw/rgw_rest.cc | 54 +- ceph/src/rgw/rgw_rest.h | 42 +- ceph/src/rgw/rgw_rest_pubsub.cc | 722 ++ ceph/src/rgw/rgw_rest_pubsub.h | 41 + ceph/src/rgw/rgw_rest_pubsub_common.cc | 204 + ceph/src/rgw/rgw_rest_pubsub_common.h | 281 + ceph/src/rgw/rgw_rest_role.cc | 8 +- ceph/src/rgw/rgw_rest_s3.cc | 608 +- ceph/src/rgw/rgw_rest_s3.h | 101 +- ceph/src/rgw/rgw_rest_sts.cc | 14 +- ceph/src/rgw/rgw_rest_sts.h | 6 +- ceph/src/rgw/rgw_rest_user_policy.cc | 8 +- ceph/src/rgw/rgw_sts.cc | 12 +- ceph/src/rgw/rgw_sts.h | 2 +- ceph/src/rgw/rgw_sync_log_trim.cc | 15 +- ceph/src/rgw/rgw_sync_module.h | 6 + ceph/src/rgw/rgw_sync_module_es.cc | 47 +- ceph/src/rgw/rgw_sync_module_pubsub.cc | 482 +- ceph/src/rgw/rgw_sync_module_pubsub.h | 4 + ceph/src/rgw/rgw_sync_module_pubsub_rest.cc | 750 +- ceph/src/rgw/rgw_sync_module_pubsub_rest.h | 7 +- ceph/src/rgw/rgw_user.cc | 1 + ceph/src/rgw/rgw_website.cc | 7 +- ceph/src/rgw/rgw_website.h | 6 +- ceph/src/rgw/rgw_xml_enc.cc | 2 + ceph/src/rgw/rgw_zone.cc | 2 +- ceph/src/script/ceph-backport.sh | 25 + ceph/src/seastar/fmt/.travis.yml | 71 +- ceph/src/seastar/fmt/CMakeLists.txt | 41 +- ceph/src/seastar/fmt/README.rst | 37 +- ceph/src/seastar/fmt/doc/api.rst | 61 +- ceph/src/seastar/fmt/doc/build.py | 1 + ceph/src/seastar/fmt/doc/index.rst | 4 +- ceph/src/seastar/fmt/doc/usage.rst | 7 +- ceph/src/seastar/fmt/include/fmt/chrono.h | 406 + ceph/src/seastar/fmt/include/fmt/color.h | 321 +- ceph/src/seastar/fmt/include/fmt/core.h | 859 +- ceph/src/seastar/fmt/include/fmt/format-inl.h | 502 +- ceph/src/seastar/fmt/include/fmt/format.h | 1012 +- ceph/src/seastar/fmt/include/fmt/locale.h | 77 + ceph/src/seastar/fmt/include/fmt/ostream.h | 20 +- ceph/src/seastar/fmt/include/fmt/posix.h | 4 +- ceph/src/seastar/fmt/include/fmt/printf.h | 228 +- ceph/src/seastar/fmt/include/fmt/time.h | 25 +- ceph/src/seastar/fmt/src/format.cc | 26 +- .../seastar/fmt/support/AndroidManifest.xml | 2 +- ceph/src/seastar/fmt/support/build.gradle | 27 +- .../src/seastar/fmt/support/cmake/cxx14.cmake | 9 +- ceph/src/seastar/fmt/support/cmake/fmt.pc.in | 11 + ceph/src/seastar/fmt/test/CMakeLists.txt | 13 +- ceph/src/seastar/fmt/test/chrono-test.cc | 170 + ceph/src/seastar/fmt/test/core-test.cc | 167 +- .../seastar/fmt/test/custom-formatter-test.cc | 4 +- ceph/src/seastar/fmt/test/format-impl-test.cc | 46 +- ceph/src/seastar/fmt/test/format-test.cc | 278 +- ceph/src/seastar/fmt/test/gtest-extra-test.cc | 12 +- ceph/src/seastar/fmt/test/gtest-extra.h | 2 +- ceph/src/seastar/fmt/test/locale-test.cc | 34 + ceph/src/seastar/fmt/test/mock-allocator.h | 4 +- ceph/src/seastar/fmt/test/ostream-test.cc | 9 +- ceph/src/seastar/fmt/test/posix-mock-test.cc | 16 +- ceph/src/seastar/fmt/test/posix-test.cc | 24 +- ceph/src/seastar/fmt/test/printf-test.cc | 179 +- ceph/src/seastar/fmt/test/ranges-test.cc | 3 +- ceph/src/seastar/fmt/test/time-test.cc | 15 +- ceph/src/seastar/fmt/test/util.h | 2 +- .../telemetry/server/ceph_telemetry/app.py | 3 +- .../server/ceph_telemetry/rest/device.py | 40 + .../test/cli-integration/balancer/misplaced.t | 28 + ceph/src/test/cli/rbd/help.t | 43 +- ceph/src/test/cls_rbd/test_cls_rbd.cc | 25 +- ceph/src/test/common/test_bloom_filter.cc | 38 +- ceph/src/test/common/test_time.cc | 2 +- ceph/src/test/fs/test_trim_caps.cc | 2 + ceph/src/test/libcephfs/access.cc | 2 +- ceph/src/test/libcephfs/acl.cc | 2 + ceph/src/test/libcephfs/caps.cc | 2 + ceph/src/test/libcephfs/ceph_pthread_self.h | 31 + ceph/src/test/libcephfs/deleg.cc | 2 +- ceph/src/test/libcephfs/flock.cc | 61 +- ceph/src/test/libcephfs/multiclient.cc | 2 + ceph/src/test/libcephfs/reclaim.cc | 6 +- ceph/src/test/libcephfs/recordlock.cc | 64 +- ceph/src/test/libcephfs/test.cc | 3 +- ceph/src/test/librados/aio.cc | 4 +- .../librados_test_stub/LibradosTestStub.cc | 13 + ceph/src/test/librbd/CMakeLists.txt | 1 + ceph/src/test/librbd/fsx.cc | 1 + .../image/test_mock_PreRemoveRequest.cc | 82 +- .../librbd/image/test_mock_RefreshRequest.cc | 2 +- ceph/src/test/librbd/mock/MockImageCtx.h | 2 +- .../object_map/test_mock_LockRequest.cc | 11 +- ceph/src/test/librbd/test_Trash.cc | 16 + ceph/src/test/librbd/test_internal.cc | 69 + ceph/src/test/librbd/test_mirroring.cc | 40 + .../librbd/trash/test_mock_RemoveRequest.cc | 231 + ceph/src/test/librgw_file.cc | 6 +- ceph/src/test/librgw_file_aw.cc | 4 +- ceph/src/test/librgw_file_gp.cc | 5 +- ceph/src/test/librgw_file_marker.cc | 7 +- ceph/src/test/librgw_file_nfsns.cc | 40 +- ceph/src/test/mon/CMakeLists.txt | 24 + ceph/src/test/mon/test_log_rss_usage.cc | 101 + ceph/src/test/mon/test_mon_memory_target.cc | 78 + ceph/src/test/mon/test_mon_rss_usage.cc | 72 + ceph/src/test/objectstore/Allocator_test.cc | 62 + ceph/src/test/objectstore/store_test.cc | 207 +- .../test/objectstore/test_bluestore_types.cc | 26 +- ceph/src/test/osd/TestOSDMap.cc | 86 + ceph/src/test/osd/TestPGLog.cc | 2 - ceph/src/test/pybind/test_rbd.py | 29 +- ceph/src/test/rbd_mirror/CMakeLists.txt | 2 +- .../image_deleter/test_mock_RemoveRequest.cc | 251 - .../test_mock_TrashMoveRequest.cc | 1 + .../test_mock_TrashRemoveRequest.cc | 452 + ceph/src/test/rgw/CMakeLists.txt | 8 +- ceph/src/test/rgw/amqp_mock.cc | 66 +- ceph/src/test/rgw/amqp_mock.h | 2 + ceph/src/test/rgw/rgw_multi/multisite.py | 8 +- ceph/src/test/rgw/rgw_multi/tests.py | 10 + ceph/src/test/rgw/rgw_multi/tests_ps.py | 3013 +++++- ceph/src/test/rgw/rgw_multi/zone_ps.py | 225 +- ceph/src/test/rgw/test_multi.md | 12 + ceph/src/test/rgw/test_multi.py | 36 +- ceph/src/test/rgw/test_rgw_amqp.cc | 246 +- ceph/src/test/rgw/test_rgw_arn.cc | 107 + .../test/rgw/test_rgw_dmclock_scheduler.cc | 2 + ceph/src/test/rgw/test_rgw_iam_policy.cc | 15 +- ceph/src/test/rgw/test_rgw_xml.cc | 28 + ceph/src/tools/ceph_objectstore_tool.cc | 4 +- ceph/src/tools/cephfs/Resetter.cc | 4 +- ceph/src/tools/cephfs/cephfs-shell | 351 +- ceph/src/tools/rados/rados.cc | 22 +- ceph/src/tools/rbd/action/MirrorPool.cc | 275 +- ceph/src/tools/rbd_ggate/main.cc | 6 +- ceph/src/tools/rbd_mirror/CMakeLists.txt | 2 +- ceph/src/tools/rbd_mirror/ImageDeleter.cc | 5 +- ceph/src/tools/rbd_mirror/ImageReplayer.cc | 3 +- ceph/src/tools/rbd_mirror/PoolReplayer.cc | 3 +- .../rbd_mirror/image_deleter/RemoveRequest.cc | 151 - .../image_deleter/TrashMoveRequest.cc | 2 +- .../image_deleter/TrashRemoveRequest.cc | 265 + .../{RemoveRequest.h => TrashRemoveRequest.h} | 51 +- ceph/src/tools/rbd_nbd/CMakeLists.txt | 3 +- ceph/src/tools/rbd_nbd/nbd-netlink.h | 70 + ceph/src/tools/rbd_nbd/rbd-nbd.cc | 733 +- ceph/src/tools/rebuild_mondb.cc | 18 +- 778 files changed, 45991 insertions(+), 13970 deletions(-) create mode 100644 ceph/cmake/modules/Findgenl.cmake create mode 100644 ceph/doc/cephfs/fs-volumes.rst create mode 100644 ceph/doc/radosgw/notifications.rst create mode 100644 ceph/doc/radosgw/s3-notification-compatibility.rst create mode 100644 ceph/qa/clusters/2-node-mgr.yaml create mode 100644 ceph/qa/debug/mgr.yaml create mode 100755 ceph/qa/standalone/mgr/balancer.sh create mode 100755 ceph/qa/standalone/misc/network-ping.sh create mode 100755 ceph/qa/standalone/osd/divergent-priors.sh create mode 100755 ceph/qa/standalone/osd/osd-recovery-space.sh create mode 100755 ceph/qa/standalone/scrub/osd-scrub-dump.sh rename ceph/qa/suites/fs/bugs/{ => client_trim_caps}/conf (100%) create mode 100644 ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml create mode 100644 ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml create mode 100644 ceph/qa/suites/rados/dashboard/% create mode 120000 ceph/qa/suites/rados/dashboard/.qa create mode 100644 ceph/qa/suites/rados/dashboard/clusters/+ create mode 120000 ceph/qa/suites/rados/dashboard/clusters/.qa create mode 120000 ceph/qa/suites/rados/dashboard/clusters/2-node-mgr.yaml create mode 120000 ceph/qa/suites/rados/dashboard/debug/.qa create mode 120000 ceph/qa/suites/rados/dashboard/debug/mgr.yaml create mode 120000 ceph/qa/suites/rados/dashboard/objectstore create mode 120000 ceph/qa/suites/rados/dashboard/supported-random-distro$ create mode 120000 ceph/qa/suites/rados/dashboard/tasks/.qa rename ceph/qa/suites/rados/{mgr => dashboard}/tasks/dashboard.yaml (97%) mode change 100644 => 120000 ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml delete mode 100644 ceph/qa/suites/rados/mgr/clusters/openstack.yaml mode change 100644 => 120000 ceph/qa/suites/rados/mgr/debug/mgr.yaml create mode 100644 ceph/qa/suites/rados/singleton-nomsgr/all/balancer.yaml create mode 100644 ceph/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled create mode 100644 ceph/qa/suites/rados/standalone/workloads/mgr.yaml create mode 100644 ceph/qa/suites/rbd/mirror/workloads/rbd-mirror-bootstrap-workunit.yaml create mode 100644 ceph/qa/suites/rgw/multisite/realms/three-zone-plus-pubsub.yaml create mode 100755 ceph/qa/workunits/rbd/krbd_udev_enumerate.sh create mode 100755 ceph/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh create mode 100755 ceph/qa/workunits/rbd/rbd_mirror_bootstrap.sh create mode 100644 ceph/src/ceph-volume/shell_tox.ini create mode 100644 ceph/src/librbd/trash/RemoveRequest.cc create mode 100644 ceph/src/librbd/trash/RemoveRequest.h create mode 100644 ceph/src/mount/conf.cc create mode 100644 ceph/src/mount/mount.ceph.h create mode 100644 ceph/src/pybind/mgr/dashboard/controllers/home.py delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/2.23eee776b9ebe5035afa.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/6.2177204066439fcd8170.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/7.a311be12e7802be9438f.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/8.0ff37488868257bf7364.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/9.f4b21c687db5d9cef117.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/2.03f9acb8077098945447.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/3rdpartylicenses.txt (100%) create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/6.3d63c004c315603efec1.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/7.05e89b87dac0b3fdcc5c.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/8.4a6a002d073ab0275cb2.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/9.383a20140e10eb51367c.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/Ceph_Logo_Stacked_RGB_120411_fa_228x228.1ed169ccc35367a2dab2.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/1280px-Mimic_Octopus2.jpg (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/1280px-Nautilus_Octopus.jpg (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/Ceph_Logo_Stacked_RGB_120411_fa_228x228.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/Ceph_Logo_Stacked_RGB_120411_fa_348x348.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/Ceph_Logo_Stacked_RGB_White_120411_fa_256x256.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/Ceph_Logo_Standard_RGB_White_120411_fa.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/loading.gif (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/logo-mini.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/notification-icons.png (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/assets/prometheus_logo.svg (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/common.8a53d98b04768bd15706.js (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/favicon.ico (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/forkawesome-webfont.35e77a38ca9d85c4e897.eot (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/forkawesome-webfont.3a9e014c2469ffa65a0e.woff2 (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/forkawesome-webfont.44bbdbbfb5a10ba2d1ce.woff (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/forkawesome-webfont.78dcc9c4999659b8026a.svg (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/forkawesome-webfont.fc46f3dae03b2b2e1cee.ttf (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/glyphicons-halflings-regular.448c34a56d699c29117a.woff2 (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/glyphicons-halflings-regular.89889688147bd7575d63.svg (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/glyphicons-halflings-regular.e18bbf611f2a2e43afc0.ttf (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/glyphicons-halflings-regular.f4769f9bdb7466be6508.eot (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/glyphicons-halflings-regular.fa2772327f55d8198301.woff (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/index.html (83%) create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/main.4cca9699be2afd74460d.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/polyfills.f31db31652a3fd9f4bca.js (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/prometheus_logo.074db273ef932a67d91b.svg (100%) create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/runtime.e449ada3d1974ef4aa64.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/{ => en-US}/scripts.fc88ef4a23399c760d0b.js (100%) rename ceph/src/pybind/mgr/dashboard/frontend/dist/{styles.e201ff296d50970399c4.css => en-US/styles.f5317b15474518dffebc.css} (77%) delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/main.7ef9e458bdf2879bd531.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/runtime.c2fa00b158659de6ccaa.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/i18n.config.json create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-setting/iscsi-setting.component.html create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-setting/iscsi-setting.component.scss create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-setting/iscsi-setting.component.spec.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-setting/iscsi-setting.component.ts delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/locale.helper.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/shared/services/language.service.spec.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/shared/services/language.service.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/locale/messages.en-US.xlf create mode 100644 ceph/src/pybind/mgr/dashboard/tests/test_home.py create mode 100644 ceph/src/pybind/mgr/dashboard/tests/test_pool.py create mode 100644 ceph/src/pybind/mgr/k8sevents/README.md create mode 100644 ceph/src/pybind/mgr/k8sevents/__init__.py create mode 100644 ceph/src/pybind/mgr/k8sevents/module.py create mode 100644 ceph/src/pybind/mgr/k8sevents/rbac_sample.yaml create mode 100644 ceph/src/rgw/rgw_arn.cc create mode 100644 ceph/src/rgw/rgw_arn.h create mode 100644 ceph/src/rgw/rgw_notify.cc create mode 100644 ceph/src/rgw/rgw_notify.h create mode 100644 ceph/src/rgw/rgw_notify_event_type.cc create mode 100644 ceph/src/rgw/rgw_notify_event_type.h create mode 100644 ceph/src/rgw/rgw_object_lock.cc create mode 100644 ceph/src/rgw/rgw_object_lock.h create mode 100644 ceph/src/rgw/rgw_rest_pubsub.cc create mode 100644 ceph/src/rgw/rgw_rest_pubsub.h create mode 100644 ceph/src/rgw/rgw_rest_pubsub_common.cc create mode 100644 ceph/src/rgw/rgw_rest_pubsub_common.h create mode 100644 ceph/src/seastar/fmt/include/fmt/chrono.h create mode 100644 ceph/src/seastar/fmt/include/fmt/locale.h create mode 100644 ceph/src/seastar/fmt/support/cmake/fmt.pc.in create mode 100644 ceph/src/seastar/fmt/test/chrono-test.cc create mode 100644 ceph/src/seastar/fmt/test/locale-test.cc create mode 100644 ceph/src/telemetry/server/ceph_telemetry/rest/device.py create mode 100644 ceph/src/test/cli-integration/balancer/misplaced.t create mode 100644 ceph/src/test/libcephfs/ceph_pthread_self.h create mode 100644 ceph/src/test/librbd/trash/test_mock_RemoveRequest.cc create mode 100644 ceph/src/test/mon/test_log_rss_usage.cc create mode 100644 ceph/src/test/mon/test_mon_memory_target.cc create mode 100644 ceph/src/test/mon/test_mon_rss_usage.cc delete mode 100644 ceph/src/test/rbd_mirror/image_deleter/test_mock_RemoveRequest.cc create mode 100644 ceph/src/test/rbd_mirror/image_deleter/test_mock_TrashRemoveRequest.cc create mode 100644 ceph/src/test/rgw/test_rgw_arn.cc delete mode 100644 ceph/src/tools/rbd_mirror/image_deleter/RemoveRequest.cc create mode 100644 ceph/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc rename ceph/src/tools/rbd_mirror/image_deleter/{RemoveRequest.h => TrashRemoveRequest.h} (50%) create mode 100644 ceph/src/tools/rbd_nbd/nbd-netlink.h diff --git a/ceph/.github/pull_request_template.md b/ceph/.github/pull_request_template.md index bcd25ac68..69a755f04 100644 --- a/ceph/.github/pull_request_template.md +++ b/ceph/.github/pull_request_template.md @@ -30,8 +30,23 @@ This was just a quick overview. More information for contributors is available https://raw.githubusercontent.com/ceph/ceph/master/SubmittingPatches.rst --> - +## Checklist - [ ] References tracker ticket - [ ] Updates documentation if necessary - [ ] Includes tests for new functionality or reproducer for bug +--- + +
+Show available Jenkins commands + +- `jenkins retest this please` +- `jenkins test make check` +- `jenkins test make check arm64` +- `jenkins test submodules` +- `jenkins test dashboard` +- `jenkins test dashboard backend` +- `jenkins test docs` +- `jenkins render docs` + +
diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt index 4e187f261..1e9ebfbb4 100644 --- a/ceph/CMakeLists.txt +++ b/ceph/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.5.1) project(ceph CXX C ASM) -set(VERSION 14.2.4) +set(VERSION 14.2.5) if(POLICY CMP0028) cmake_policy(SET CMP0028 NEW) @@ -210,20 +210,33 @@ endif() find_package(Backtrace) +# remote block storage +option(WITH_RBD "Remote block storage is here" ON) + if(LINUX) find_package(udev REQUIRED) set(HAVE_UDEV ${UDEV_FOUND}) find_package(blkid REQUIRED) set(HAVE_BLKID ${BLKID_FOUND}) + if(WITH_RBD) + find_package(genl REQUIRED) + set(HAVE_GENL $GENL_FOUND) + endif() + find_package(keyutils REQUIRED) + set(HAVE_KEYUTILS ${KEYUTILS_FOUND}) elseif(FREEBSD) set(HAVE_UDEV OFF) set(HAVE_LIBAIO OFF) set(HAVE_BLKID OFF) + set(HAVE_GENL OFF) + set(HAVE_KEYUTILS OFF) else() set(HAVE_UDEV OFF) message(STATUS "Not using udev") set(HAVE_BLKID OFF) message(STATUS "Not using BLKID") + set(HAVE_GENL OFF) + message(STATUS "Not using GENL") endif(LINUX) option(WITH_OPENLDAP "OPENLDAP is here" ON) @@ -267,11 +280,7 @@ if(WITH_BLUESTORE) endif() endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "i386|i686|amd64|x86_64|AMD64|aarch64") - option(WITH_SPDK "Enable SPDK" ON) -else() - option(WITH_SPDK "Enable SPDK" OFF) -endif() +option(WITH_SPDK "Enable SPDK" OFF) if(WITH_SPDK) if(NOT WITH_BLUESTORE) message(SEND_ERROR "Please enable WITH_BLUESTORE for using SPDK") @@ -311,15 +320,19 @@ option(WITH_LIBCEPHFS "libcephfs client library" ON) # key-value store option(WITH_KVS "Key value store is here" ON) -# remote block storage -option(WITH_RBD "Remote block storage is here" ON) - # KERNEL remote block storage option(WITH_KRBD "Kernel Remote block storage is here" ON) if(WITH_KRBD AND WITHOUT_RBD) message(FATAL_ERROR "Cannot have WITH_KRBD with WITH_RBD.") endif() +if(LINUX) + if(WITH_LIBCEPHFS OR WITH_KRBD) + # keyutils is only used when talking to the Linux Kernel key store + find_package(keyutils REQUIRED) + set(HAVE_KEYUTILS ${KEYUTILS_FOUND}) + endif() +endif() option(WITH_LEVELDB "LevelDB is here" ON) if(WITH_LEVELDB) @@ -386,10 +399,6 @@ else() set(EXE_LINKER_USE_PIE ${ENABLE_SHARED}) endif() -if(WITH_LIBCEPHFS OR WITH_KRBD) - find_package(keyutils REQUIRED) -endif() - find_package(CURL REQUIRED) set(CMAKE_REQUIRED_INCLUDES ${CURL_INCLUDE_DIRS}) set(CMAKE_REQUIRED_LIBRARIES ${CURL_LIBRARIES}) @@ -681,6 +690,9 @@ if(WITH_SYSTEM_NPM) message(FATAL_ERROR "Can't find npm.") endif() endif() +set(DASHBOARD_FRONTEND_LANGS "" CACHE STRING + "List of comma separated ceph-dashboard frontend languages to build. \ + Use value `ALL` to build all languages") include_directories(SYSTEM ${PROJECT_BINARY_DIR}/include) diff --git a/ceph/PendingReleaseNotes b/ceph/PendingReleaseNotes index 131801d14..d4fb7f981 100644 --- a/ceph/PendingReleaseNotes +++ b/ceph/PendingReleaseNotes @@ -1,4 +1,13 @@ ->=14.2.2 +14.2.4 +------ + +* In the Zabbix Mgr Module there was a typo in the key being send + to Zabbix for PGs in backfill_wait state. The key that was sent + was 'wait_backfill' and the correct name is 'backfill_wait'. + Update your Zabbix template accordingly so that it accepts the + new key being send to Zabbix. + +14.2.3 -------- * Nautilus-based librbd clients can now open images on Jewel clusters. @@ -8,6 +17,12 @@ multiply your current "objecter_inflight_ops" and "objecter_inflight_op_bytes" paramaeters by the old "num_rados_handles" to get the same throttle behavior. + +* The ``bluestore_no_per_pool_stats_tolerance`` config option has been + replaced with ``bluestore_fsck_error_on_no_per_pool_stats`` + (default: false). The overall default behavior has not changed: + fsck will warn but not fail on legacy stores, and repair will + convert to per-pool stats. 14.2.2 ------ @@ -24,3 +39,80 @@ bucket reshard in earlier versions of RGW. One subcommand lists such objects and the other deletes them. Read the troubleshooting section of the dynamic resharding docs for details. + +14.2.5 +------ + +* The telemetry module now has a 'device' channel, enabled by default, that + will report anonymized hard disk and SSD health metrics to telemetry.ceph.com + in order to build and improve device failure prediction algorithms. Because + the content of telemetry reports has changed, you will need to either re-opt-in + with:: + + ceph telemetry on + + You can view exactly what information will be reported first with:: + + ceph telemetry show + ceph telemetry show device # specifically show the device channel + + If you are not comfortable sharing device metrics, you can disable that + channel first before re-opting-in: + + ceph config set mgr mgr/telemetry/channel_crash false + ceph telemetry on + +* The telemetry module now reports more information about CephFS file systems, + including: + + - how many MDS daemons (in total and per file system) + - which features are (or have been) enabled + - how many data pools + - approximate file system age (year + month of creation) + - how many files, bytes, and snapshots + - how much metadata is being cached + + We have also added: + + - which Ceph release the monitors are running + - whether msgr v1 or v2 addresses are used for the monitors + - whether IPv4 or IPv6 addresses are used for the monitors + - whether RADOS cache tiering is enabled (and which mode) + - whether pools are replicated or erasure coded, and + which erasure code profile plugin and parameters are in use + - how many hosts are in the cluster, and how many hosts have each type of daemon + - whether a separate OSD cluster network is being used + - how many RBD pools and images are in the cluster, and how many pools have RBD mirroring enabled + - how many RGW daemons, zones, and zonegroups are present; which RGW frontends are in use + - aggregate stats about the CRUSH map, like which algorithms are used, how big buckets are, how many rules are defined, and what tunables are in use + + If you had telemetry enabled, you will need to re-opt-in with:: + + ceph telemetry on + + You can view exactly what information will be reported first with:: + + ceph telemetry show # see everything + ceph telemetry show basic # basic cluster info (including all of the new info) + +* A health warning is now generated if the average osd heartbeat ping + time exceeds a configurable threshold for any of the intervals + computed. The OSD computes 1 minute, 5 minute and 15 minute + intervals with average, minimum and maximum values. New configuration + option ``mon_warn_on_slow_ping_ratio`` specifies a percentage of + ``osd_heartbeat_grace`` to determine the threshold. A value of zero + disables the warning. New configuration option + ``mon_warn_on_slow_ping_time`` specified in milliseconds over-rides the + computed value, causes a warning + when OSD heartbeat pings take longer than the specified amount. + New admin command ``ceph daemon mgr.# dump_osd_network [threshold]`` command will + list all connections with a ping time longer than the specified threshold or + value determined by the config options, for the average for any of the 3 intervals. + New admin command ``ceph daemon osd.# dump_osd_network [threshold]`` will + do the same but only including heartbeats initiated by the specified OSD. + +* New OSD daemon command dump_recovery_reservations which reveals the + recovery locks held (in_progress) and waiting in priority queues. + +* New OSD daemon command dump_scrub_reservations which reveals the + scrub reservations that are held for local (primary) and remote (replica) PGs. diff --git a/ceph/admin/build-doc b/ceph/admin/build-doc index 6e94d459d..7a23c7d4b 100755 --- a/ceph/admin/build-doc +++ b/ceph/admin/build-doc @@ -20,7 +20,8 @@ if command -v dpkg >/dev/null; then exit 1 fi elif command -v yum >/dev/null; then - for package in python-devel python-pip python-virtualenv doxygen ditaa ant libxml2-devel libxslt-devel Cython graphviz; do + python_package="python$(rpm --eval '%{python3_pkgversion}')" + for package in "$python_package"-devel "$python_package"-pip "$python_package"-virtualenv doxygen ditaa ant libxml2-devel libxslt-devel "$python_package"-Cython graphviz; do if ! rpm -q --whatprovides $package >/dev/null ; then missing="${missing:+$missing }$package" fi @@ -57,7 +58,7 @@ cd build-doc [ -z "$vdir" ] && vdir="$TOPDIR/build-doc/virtualenv" if [ ! -e $vdir ]; then - virtualenv --system-site-packages $vdir + virtualenv --python=python3 --system-site-packages $vdir fi $vdir/bin/pip install --quiet -r $TOPDIR/admin/doc-requirements.txt diff --git a/ceph/admin/doc-requirements.txt b/ceph/admin/doc-requirements.txt index 0939b6813..0af3bde92 100644 --- a/ceph/admin/doc-requirements.txt +++ b/ceph/admin/doc-requirements.txt @@ -1,7 +1,4 @@ -Sphinx == 1.8.3 +Sphinx == 2.1.2 git+https://github.com/ceph/sphinx-ditaa.git@py3#egg=sphinx-ditaa -# newer versions of breathe will require Sphinx >= 2.0.0 and are Python3 only -breathe==4.12.0 -# 4.2 is not yet release at the time of writing, to address CVE-2017-18342, -# we have to use its beta release. -pyyaml>=4.2b1 +breathe == 4.13.1 +pyyaml >= 5.1.2 diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD index 116118724..8f05680d6 100644 --- a/ceph/alpine/APKBUILD +++ b/ceph/alpine/APKBUILD @@ -1,7 +1,7 @@ # Contributor: John Coyle # Maintainer: John Coyle pkgname=ceph -pkgver=14.2.4 +pkgver=14.2.5 pkgrel=0 pkgdesc="Ceph is a distributed object store and file system" pkgusers="ceph" @@ -64,7 +64,7 @@ makedepends=" xmlstarlet yasm " -source="ceph-14.2.4.tar.bz2" +source="ceph-14.2.5.tar.bz2" subpackages=" $pkgname-base $pkgname-common @@ -117,7 +117,7 @@ _sysconfdir=/etc _udevrulesdir=/etc/udev/rules.d _python_sitelib=/usr/lib/python2.7/site-packages -builddir=$srcdir/ceph-14.2.4 +builddir=$srcdir/ceph-14.2.5 build() { export CEPH_BUILD_VIRTUALENV=$builddir diff --git a/ceph/ceph.spec b/ceph/ceph.spec index 4b93d8052..e6ad065e9 100644 --- a/ceph/ceph.spec +++ b/ceph/ceph.spec @@ -22,6 +22,7 @@ # bcond syntax! ################################################################################# %bcond_with make_check +%bcond_without ceph_test_package %ifarch s390 s390x %bcond_with tcmalloc %else @@ -29,7 +30,6 @@ %endif %if 0%{?fedora} || 0%{?rhel} %bcond_without selinux -%bcond_without ceph_test_package %bcond_without cephfs_java %bcond_without lttng %bcond_without libradosstriper @@ -39,7 +39,6 @@ %endif %if 0%{?suse_version} %bcond_with selinux -%bcond_with ceph_test_package %bcond_with cephfs_java %bcond_with amqp_endpoint #Compat macro for new _fillupdir macro introduced in Nov 2017 @@ -86,6 +85,7 @@ %{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d} %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create} %{!?python3_pkgversion: %global python3_pkgversion 3} +%{!?python3_version_nodots: %global python3_version_nodots 3} %{!?python3_version: %global python3_version 3} # define _python_buildid macro which will expand to the empty string when # building with python2 @@ -101,7 +101,7 @@ # main package definition ################################################################################# Name: ceph -Version: 14.2.4 +Version: 14.2.5 Release: 0%{?dist} %if 0%{?fedora} || 0%{?rhel} Epoch: 2 @@ -117,7 +117,7 @@ License: LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and Group: System/Filesystems %endif URL: http://ceph.com/ -Source0: %{?_remote_tarball_prefix}ceph-14.2.4.tar.bz2 +Source0: %{?_remote_tarball_prefix}ceph-14.2.5.tar.bz2 %if 0%{?suse_version} # _insert_obs_source_lines_here ExclusiveArch: x86_64 aarch64 ppc64le s390x @@ -149,7 +149,11 @@ BuildRequires: fuse-devel %if 0%{?rhel} == 7 # devtoolset offers newer make and valgrind-devel, but the old ones are good # enough. +%ifarch x86_64 +BuildRequires: devtoolset-8-gcc-c++ >= 8.2.1 +%else BuildRequires: devtoolset-7-gcc-c++ >= 7.3.1-5.13 +%endif %else BuildRequires: gcc-c++ %endif @@ -166,14 +170,16 @@ BuildRequires: leveldb-devel > 1.2 BuildRequires: libaio-devel BuildRequires: libblkid-devel >= 2.17 BuildRequires: libcurl-devel +BuildRequires: libcap-ng-devel BuildRequires: libudev-devel +BuildRequires: libnl3-devel BuildRequires: liboath-devel BuildRequires: libtool BuildRequires: libxml2-devel -BuildRequires: libuuid-devel BuildRequires: make BuildRequires: ncurses-devel BuildRequires: parted +BuildRequires: patch BuildRequires: perl BuildRequires: pkgconfig BuildRequires: procps @@ -194,6 +200,7 @@ BuildRequires: librabbitmq-devel %endif %if 0%{with make_check} BuildRequires: jq +BuildRequires: libuuid-devel BuildRequires: python%{_python_buildid}-bcrypt BuildRequires: python%{_python_buildid}-coverage BuildRequires: python%{_python_buildid}-nose @@ -228,6 +235,7 @@ BuildRequires: pkgconfig(systemd) BuildRequires: systemd-rpm-macros %{?systemd_requires} PreReq: %fillup_prereq +BuildRequires: fdupes BuildRequires: net-tools BuildRequires: libbz2-devel BuildRequires: mozilla-nss-devel @@ -264,7 +272,11 @@ BuildRequires: python2-Cython %endif BuildRequires: python%{python3_pkgversion}-devel BuildRequires: python%{python3_pkgversion}-setuptools +%if 0%{?rhel} +BuildRequires: python%{python3_version_nodots}-Cython +%else BuildRequires: python%{python3_pkgversion}-Cython +%endif BuildRequires: python%{_python_buildid}-prettytable BuildRequires: python%{_python_buildid}-sphinx BuildRequires: lz4-devel >= 1.7 @@ -443,6 +455,7 @@ Recommends: ceph-mgr-dashboard = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-diskprediction-local = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-diskprediction-cloud = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-rook = %{_epoch_prefix}%{version}-%{release} +Recommends: ceph-mgr-k8sevents = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-ssh = %{_epoch_prefix}%{version}-%{release} %endif %if 0%{?rhel} == 7 @@ -533,6 +546,18 @@ Requires: python%{_python_buildid}-kubernetes ceph-mgr-rook is a ceph-mgr plugin for orchestration functions using a Rook backend. +%package mgr-k8sevents +BuildArch: noarch +Summary: Ceph Manager plugin to orchestrate ceph-events to kubernetes' events API +%if 0%{?suse_version} +Group: System/Filesystems +%endif +Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release} +Requires: python%{_python_buildid}-kubernetes +%description mgr-k8sevents +ceph-mgr-k8sevents is a ceph-mgr plugin that sends every ceph-events +to kubernetes' events API + %package mgr-ssh Summary: ceph-mgr ssh module BuildArch: noarch @@ -705,6 +730,7 @@ Group: Development/Libraries/Python %endif Requires: librgw2 = %{_epoch_prefix}%{version}-%{release} Requires: python-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-rgw} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-rgw This package contains Python 2 libraries for interacting with Cephs RADOS @@ -718,7 +744,7 @@ Group: Development/Libraries/Python %endif Requires: librgw2 = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-rados = %{_epoch_prefix}%{version}-%{release} -Provides: python3-rgw = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-rgw} %if 0%{without python2} Provides: python-rgw = %{_epoch_prefix}%{version}-%{release} Obsoletes: python-rgw < %{_epoch_prefix}%{version}-%{release} @@ -734,6 +760,7 @@ Summary: Python 2 libraries for the RADOS object store Group: Development/Libraries/Python %endif Requires: librados2 = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-rados} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-rados This package contains Python 2 libraries for interacting with Cephs RADOS @@ -747,7 +774,7 @@ Group: Development/Libraries/Python %endif Requires: python%{python3_pkgversion} Requires: librados2 = %{_epoch_prefix}%{version}-%{release} -Provides: python3-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-rados} %if 0%{without python2} Provides: python-rados = %{_epoch_prefix}%{version}-%{release} Obsoletes: python-rados < %{_epoch_prefix}%{version}-%{release} @@ -825,6 +852,7 @@ Group: Development/Libraries/Python %endif Requires: librbd1 = %{_epoch_prefix}%{version}-%{release} Requires: python-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-rbd} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-rbd This package contains Python 2 libraries for interacting with Cephs RADOS @@ -838,6 +866,7 @@ Group: Development/Libraries/Python %endif Requires: librbd1 = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-rbd} Provides: python3-rbd = %{_epoch_prefix}%{version}-%{release} %if 0%{without python2} Provides: python-rbd = %{_epoch_prefix}%{version}-%{release} @@ -886,6 +915,7 @@ Group: Development/Libraries/Python Requires: libcephfs2 = %{_epoch_prefix}%{version}-%{release} Requires: python-rados = %{_epoch_prefix}%{version}-%{release} Requires: python-ceph-argparse = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-cephfs} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-cephfs This package contains Python 2 libraries for interacting with Cephs distributed @@ -900,7 +930,7 @@ Group: Development/Libraries/Python Requires: libcephfs2 = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-rados = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-ceph-argparse = %{_epoch_prefix}%{version}-%{release} -Provides: python3-cephfs = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-cephfs} %if 0%{without python2} Provides: python-cephfs = %{_epoch_prefix}%{version}-%{release} Obsoletes: python-cephfs < %{_epoch_prefix}%{version}-%{release} @@ -927,7 +957,7 @@ Summary: Python 3 utility libraries for Ceph CLI %if 0%{?suse_version} Group: Development/Libraries/Python %endif -Provides: python3-ceph-argparse = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-ceph-argparse} %description -n python%{python3_pkgversion}-ceph-argparse This package contains types and routines for Python 3 used by the Ceph CLI as well as the RESTful interface. These have to do with querying the daemons for @@ -1075,7 +1105,7 @@ This package provides Ceph’s default alerts for Prometheus. # common ################################################################################# %prep -%autosetup -p1 -n ceph-14.2.4 +%autosetup -p1 -n ceph-14.2.5 %build # LTO can be enabled as soon as the following GCC bug is fixed: @@ -1083,7 +1113,7 @@ This package provides Ceph’s default alerts for Prometheus. %define _lto_cflags %{nil} %if 0%{?rhel} == 7 -. /opt/rh/devtoolset-7/enable +. /opt/rh/devtoolset-8/enable %endif %if 0%{with cephfs_java} @@ -1116,7 +1146,7 @@ free -h echo "System limits:" ulimit -a if test -n "$CEPH_SMP_NCPUS" -a "$CEPH_SMP_NCPUS" -gt 1 ; then - mem_per_process=1800 + mem_per_process=2500 max_mem=$(LANG=C free -m | sed -n "s|^Mem: *\([0-9]*\).*$|\1|p") max_jobs="$(($max_mem / $mem_per_process))" test "$CEPH_SMP_NCPUS" -gt "$max_jobs" && CEPH_SMP_NCPUS="$max_jobs" && echo "Warning: Reducing build parallelism to -j$max_jobs because of memory limits" @@ -1261,7 +1291,10 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd-mirror %py3_compile %{buildroot}%{python3_sitelib} # prometheus alerts install -m 644 -D monitoring/prometheus/alerts/ceph_default_alerts.yml %{buildroot}/etc/prometheus/SUSE/default_rules/ceph_default_alerts.yml +# hardlink duplicate files under /usr to save space +%fdupes %{buildroot}%{_prefix} %endif + %if 0%{?rhel} == 8 %py_byte_compile %{__python3} %{buildroot}%{python3_sitelib} %endif @@ -1644,6 +1677,19 @@ if [ $1 -eq 1 ] ; then /usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || : fi +%files mgr-k8sevents +%{_datadir}/ceph/mgr/k8sevents + +%post mgr-k8sevents +if [ $1 -eq 1 ] ; then + /usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || : +fi + +%postun mgr-k8sevents +if [ $1 -eq 1 ] ; then + /usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || : +fi + %files mgr-ssh %{_datadir}/ceph/mgr/ssh @@ -1903,6 +1949,7 @@ fi %if %{with lttng} %{_libdir}/librados_tp.so.* %endif +%dir %{_sysconfdir}/ceph %post -n librados2 -p /sbin/ldconfig @@ -1979,8 +2026,8 @@ fi %{_libdir}/librgw.so.* %{_libdir}/librgw_admin_user.so.* %if %{with lttng} -%{_libdir}/librgw_op_tp.so* -%{_libdir}/librgw_rados_tp.so* +%{_libdir}/librgw_op_tp.so.* +%{_libdir}/librgw_rados_tp.so.* %endif %post -n librgw2 -p /sbin/ldconfig @@ -1994,6 +2041,10 @@ fi %{_includedir}/rados/rgw_file.h %{_libdir}/librgw.so %{_libdir}/librgw_admin_user.so +%if %{with lttng} +%{_libdir}/librgw_op_tp.so +%{_libdir}/librgw_rados_tp.so +%endif %if 0%{with python2} %files -n python-rgw @@ -2017,6 +2068,7 @@ fi %files -n libcephfs2 %{_libdir}/libcephfs.so.* +%dir %{_sysconfdir}/ceph %post -n libcephfs2 -p /sbin/ldconfig diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in index 734542815..bcf706d18 100644 --- a/ceph/ceph.spec.in +++ b/ceph/ceph.spec.in @@ -22,6 +22,7 @@ # bcond syntax! ################################################################################# %bcond_with make_check +%bcond_without ceph_test_package %ifarch s390 s390x %bcond_with tcmalloc %else @@ -29,7 +30,6 @@ %endif %if 0%{?fedora} || 0%{?rhel} %bcond_without selinux -%bcond_without ceph_test_package %bcond_without cephfs_java %bcond_without lttng %bcond_without libradosstriper @@ -39,7 +39,6 @@ %endif %if 0%{?suse_version} %bcond_with selinux -%bcond_with ceph_test_package %bcond_with cephfs_java %bcond_with amqp_endpoint #Compat macro for new _fillupdir macro introduced in Nov 2017 @@ -86,6 +85,7 @@ %{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d} %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create} %{!?python3_pkgversion: %global python3_pkgversion 3} +%{!?python3_version_nodots: %global python3_version_nodots 3} %{!?python3_version: %global python3_version 3} # define _python_buildid macro which will expand to the empty string when # building with python2 @@ -149,7 +149,11 @@ BuildRequires: fuse-devel %if 0%{?rhel} == 7 # devtoolset offers newer make and valgrind-devel, but the old ones are good # enough. +%ifarch x86_64 +BuildRequires: devtoolset-8-gcc-c++ >= 8.2.1 +%else BuildRequires: devtoolset-7-gcc-c++ >= 7.3.1-5.13 +%endif %else BuildRequires: gcc-c++ %endif @@ -166,14 +170,16 @@ BuildRequires: leveldb-devel > 1.2 BuildRequires: libaio-devel BuildRequires: libblkid-devel >= 2.17 BuildRequires: libcurl-devel +BuildRequires: libcap-ng-devel BuildRequires: libudev-devel +BuildRequires: libnl3-devel BuildRequires: liboath-devel BuildRequires: libtool BuildRequires: libxml2-devel -BuildRequires: libuuid-devel BuildRequires: make BuildRequires: ncurses-devel BuildRequires: parted +BuildRequires: patch BuildRequires: perl BuildRequires: pkgconfig BuildRequires: procps @@ -194,6 +200,7 @@ BuildRequires: librabbitmq-devel %endif %if 0%{with make_check} BuildRequires: jq +BuildRequires: libuuid-devel BuildRequires: python%{_python_buildid}-bcrypt BuildRequires: python%{_python_buildid}-coverage BuildRequires: python%{_python_buildid}-nose @@ -228,6 +235,7 @@ BuildRequires: pkgconfig(systemd) BuildRequires: systemd-rpm-macros %{?systemd_requires} PreReq: %fillup_prereq +BuildRequires: fdupes BuildRequires: net-tools BuildRequires: libbz2-devel BuildRequires: mozilla-nss-devel @@ -264,7 +272,11 @@ BuildRequires: python2-Cython %endif BuildRequires: python%{python3_pkgversion}-devel BuildRequires: python%{python3_pkgversion}-setuptools +%if 0%{?rhel} +BuildRequires: python%{python3_version_nodots}-Cython +%else BuildRequires: python%{python3_pkgversion}-Cython +%endif BuildRequires: python%{_python_buildid}-prettytable BuildRequires: python%{_python_buildid}-sphinx BuildRequires: lz4-devel >= 1.7 @@ -443,6 +455,7 @@ Recommends: ceph-mgr-dashboard = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-diskprediction-local = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-diskprediction-cloud = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-rook = %{_epoch_prefix}%{version}-%{release} +Recommends: ceph-mgr-k8sevents = %{_epoch_prefix}%{version}-%{release} Recommends: ceph-mgr-ssh = %{_epoch_prefix}%{version}-%{release} %endif %if 0%{?rhel} == 7 @@ -533,6 +546,18 @@ Requires: python%{_python_buildid}-kubernetes ceph-mgr-rook is a ceph-mgr plugin for orchestration functions using a Rook backend. +%package mgr-k8sevents +BuildArch: noarch +Summary: Ceph Manager plugin to orchestrate ceph-events to kubernetes' events API +%if 0%{?suse_version} +Group: System/Filesystems +%endif +Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release} +Requires: python%{_python_buildid}-kubernetes +%description mgr-k8sevents +ceph-mgr-k8sevents is a ceph-mgr plugin that sends every ceph-events +to kubernetes' events API + %package mgr-ssh Summary: ceph-mgr ssh module BuildArch: noarch @@ -705,6 +730,7 @@ Group: Development/Libraries/Python %endif Requires: librgw2 = %{_epoch_prefix}%{version}-%{release} Requires: python-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-rgw} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-rgw This package contains Python 2 libraries for interacting with Cephs RADOS @@ -718,7 +744,7 @@ Group: Development/Libraries/Python %endif Requires: librgw2 = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-rados = %{_epoch_prefix}%{version}-%{release} -Provides: python3-rgw = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-rgw} %if 0%{without python2} Provides: python-rgw = %{_epoch_prefix}%{version}-%{release} Obsoletes: python-rgw < %{_epoch_prefix}%{version}-%{release} @@ -734,6 +760,7 @@ Summary: Python 2 libraries for the RADOS object store Group: Development/Libraries/Python %endif Requires: librados2 = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-rados} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-rados This package contains Python 2 libraries for interacting with Cephs RADOS @@ -747,7 +774,7 @@ Group: Development/Libraries/Python %endif Requires: python%{python3_pkgversion} Requires: librados2 = %{_epoch_prefix}%{version}-%{release} -Provides: python3-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-rados} %if 0%{without python2} Provides: python-rados = %{_epoch_prefix}%{version}-%{release} Obsoletes: python-rados < %{_epoch_prefix}%{version}-%{release} @@ -825,6 +852,7 @@ Group: Development/Libraries/Python %endif Requires: librbd1 = %{_epoch_prefix}%{version}-%{release} Requires: python-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-rbd} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-rbd This package contains Python 2 libraries for interacting with Cephs RADOS @@ -838,6 +866,7 @@ Group: Development/Libraries/Python %endif Requires: librbd1 = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-rados = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-rbd} Provides: python3-rbd = %{_epoch_prefix}%{version}-%{release} %if 0%{without python2} Provides: python-rbd = %{_epoch_prefix}%{version}-%{release} @@ -886,6 +915,7 @@ Group: Development/Libraries/Python Requires: libcephfs2 = %{_epoch_prefix}%{version}-%{release} Requires: python-rados = %{_epoch_prefix}%{version}-%{release} Requires: python-ceph-argparse = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python-cephfs} Obsoletes: python-ceph < %{_epoch_prefix}%{version}-%{release} %description -n python-cephfs This package contains Python 2 libraries for interacting with Cephs distributed @@ -900,7 +930,7 @@ Group: Development/Libraries/Python Requires: libcephfs2 = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-rados = %{_epoch_prefix}%{version}-%{release} Requires: python%{python3_pkgversion}-ceph-argparse = %{_epoch_prefix}%{version}-%{release} -Provides: python3-cephfs = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-cephfs} %if 0%{without python2} Provides: python-cephfs = %{_epoch_prefix}%{version}-%{release} Obsoletes: python-cephfs < %{_epoch_prefix}%{version}-%{release} @@ -927,7 +957,7 @@ Summary: Python 3 utility libraries for Ceph CLI %if 0%{?suse_version} Group: Development/Libraries/Python %endif -Provides: python3-ceph-argparse = %{_epoch_prefix}%{version}-%{release} +%{?python_provide:%python_provide python%{python3_pkgversion}-ceph-argparse} %description -n python%{python3_pkgversion}-ceph-argparse This package contains types and routines for Python 3 used by the Ceph CLI as well as the RESTful interface. These have to do with querying the daemons for @@ -1083,7 +1113,7 @@ This package provides Ceph’s default alerts for Prometheus. %define _lto_cflags %{nil} %if 0%{?rhel} == 7 -. /opt/rh/devtoolset-7/enable +. /opt/rh/devtoolset-8/enable %endif %if 0%{with cephfs_java} @@ -1116,7 +1146,7 @@ free -h echo "System limits:" ulimit -a if test -n "$CEPH_SMP_NCPUS" -a "$CEPH_SMP_NCPUS" -gt 1 ; then - mem_per_process=1800 + mem_per_process=2500 max_mem=$(LANG=C free -m | sed -n "s|^Mem: *\([0-9]*\).*$|\1|p") max_jobs="$(($max_mem / $mem_per_process))" test "$CEPH_SMP_NCPUS" -gt "$max_jobs" && CEPH_SMP_NCPUS="$max_jobs" && echo "Warning: Reducing build parallelism to -j$max_jobs because of memory limits" @@ -1261,7 +1291,10 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd-mirror %py3_compile %{buildroot}%{python3_sitelib} # prometheus alerts install -m 644 -D monitoring/prometheus/alerts/ceph_default_alerts.yml %{buildroot}/etc/prometheus/SUSE/default_rules/ceph_default_alerts.yml +# hardlink duplicate files under /usr to save space +%fdupes %{buildroot}%{_prefix} %endif + %if 0%{?rhel} == 8 %py_byte_compile %{__python3} %{buildroot}%{python3_sitelib} %endif @@ -1644,6 +1677,19 @@ if [ $1 -eq 1 ] ; then /usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || : fi +%files mgr-k8sevents +%{_datadir}/ceph/mgr/k8sevents + +%post mgr-k8sevents +if [ $1 -eq 1 ] ; then + /usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || : +fi + +%postun mgr-k8sevents +if [ $1 -eq 1 ] ; then + /usr/bin/systemctl try-restart ceph-mgr.target >/dev/null 2>&1 || : +fi + %files mgr-ssh %{_datadir}/ceph/mgr/ssh @@ -1903,6 +1949,7 @@ fi %if %{with lttng} %{_libdir}/librados_tp.so.* %endif +%dir %{_sysconfdir}/ceph %post -n librados2 -p /sbin/ldconfig @@ -1979,8 +2026,8 @@ fi %{_libdir}/librgw.so.* %{_libdir}/librgw_admin_user.so.* %if %{with lttng} -%{_libdir}/librgw_op_tp.so* -%{_libdir}/librgw_rados_tp.so* +%{_libdir}/librgw_op_tp.so.* +%{_libdir}/librgw_rados_tp.so.* %endif %post -n librgw2 -p /sbin/ldconfig @@ -1994,6 +2041,10 @@ fi %{_includedir}/rados/rgw_file.h %{_libdir}/librgw.so %{_libdir}/librgw_admin_user.so +%if %{with lttng} +%{_libdir}/librgw_op_tp.so +%{_libdir}/librgw_rados_tp.so +%endif %if 0%{with python2} %files -n python-rgw @@ -2017,6 +2068,7 @@ fi %files -n libcephfs2 %{_libdir}/libcephfs.so.* +%dir %{_sysconfdir}/ceph %post -n libcephfs2 -p /sbin/ldconfig diff --git a/ceph/changelog.upstream b/ceph/changelog.upstream index 2ef88be7c..bd3f51adf 100644 --- a/ceph/changelog.upstream +++ b/ceph/changelog.upstream @@ -1,8 +1,14 @@ -ceph (14.2.4-1xenial) xenial; urgency=medium +ceph (14.2.5-1xenial) xenial; urgency=medium * - -- Jenkins Build Slave User Fri, 13 Sep 2019 18:29:06 +0000 + -- Jenkins Build Slave User Fri, 06 Dec 2019 16:53:42 +0000 + +ceph (14.2.5-1) stable; urgency=medium + + * New upstream release + + -- Ceph Release Team Fri, 06 Dec 2019 16:42:32 +0000 ceph (14.2.4-1) stable; urgency=medium diff --git a/ceph/cmake/modules/BuildDPDK.cmake b/ceph/cmake/modules/BuildDPDK.cmake index 12a831a8b..fe4a0acf1 100644 --- a/ceph/cmake/modules/BuildDPDK.cmake +++ b/ceph/cmake/modules/BuildDPDK.cmake @@ -71,11 +71,13 @@ function(do_build_dpdk dpdk_dir) "\"${target}\" not listed in ${supported_targets}") endif() + set(EXTRA_CFLAGS "-Wno-unknown-warning-option -Wno-stringop-truncation -Wno-address-of-packed-member -fPIC") + include(ExternalProject) ExternalProject_Add(dpdk-ext SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/spdk/dpdk CONFIGURE_COMMAND $(MAKE) config O=${dpdk_dir} T=${target} - BUILD_COMMAND env CC=${CMAKE_C_COMPILER} $(MAKE) O=${dpdk_dir} EXTRA_CFLAGS=-fPIC + BUILD_COMMAND env CC=${CMAKE_C_COMPILER} $(MAKE) O=${dpdk_dir} EXTRA_CFLAGS=${EXTRA_CFLAGS} BUILD_IN_SOURCE 1 INSTALL_COMMAND "true") ExternalProject_Add_Step(dpdk-ext patch-config diff --git a/ceph/cmake/modules/CheckCxxAtomic.cmake b/ceph/cmake/modules/CheckCxxAtomic.cmake index 074e4db89..68efa1531 100644 --- a/ceph/cmake/modules/CheckCxxAtomic.cmake +++ b/ceph/cmake/modules/CheckCxxAtomic.cmake @@ -15,7 +15,13 @@ int main() { std::atomic w2; std::atomic w4; std::atomic w8; - return w1 + w2 + w4 + w8; +#ifdef __s390x__ + // Boost needs 16-byte atomics for tagged pointers. + std::atomic w16; +#else + #define w16 0 +#endif + return w1 + w2 + w4 + w8 + w16; } " ${var}) endfunction(check_cxx_atomics) diff --git a/ceph/cmake/modules/FindBoost.cmake b/ceph/cmake/modules/FindBoost.cmake index dd08ba5e4..e2525b893 100644 --- a/ceph/cmake/modules/FindBoost.cmake +++ b/ceph/cmake/modules/FindBoost.cmake @@ -2134,7 +2134,7 @@ if(Boost_FOUND) endif() if(_Boost_${UPPERCOMPONENT}_COMPILER_FEATURES) set_target_properties(Boost::${COMPONENT} PROPERTIES - INTERFACE_COMPILE_FEATURES "${_Boost_${UPPERCOMPONENT}_COMPILER_FEATURES}") + CXX_STANDARD 17) endif() endif() endif() diff --git a/ceph/cmake/modules/Findgenl.cmake b/ceph/cmake/modules/Findgenl.cmake new file mode 100644 index 000000000..07c5f357b --- /dev/null +++ b/ceph/cmake/modules/Findgenl.cmake @@ -0,0 +1,23 @@ +# - Find libnl-genl3 +# Find the genl library and includes +# +# GENL_INCLUDE_DIR - where to find netlink.h, etc. +# GENL_LIBRARIES - List of libraries when using genl. +# GENL_FOUND - True if genl found. + +find_path(GENL_INCLUDE_DIR NAMES netlink/netlink.h PATH_SUFFIXES libnl3) + +find_library(LIBNL_LIB nl-3) +find_library(LIBNL_GENL_LIB nl-genl-3) +set(GENL_LIBRARIES + ${LIBNL_LIB} + ${LIBNL_GENL_LIB} + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(nl-genl-3 + DEFAULT_MSG GENL_LIBRARIES GENL_INCLUDE_DIR) + +mark_as_advanced( + GENL_LIBRARIES + GENL_INCLUDE_DIR) diff --git a/ceph/debian/control b/ceph/debian/control index b073073ba..4cfa1c4c3 100644 --- a/ceph/debian/control +++ b/ceph/debian/control @@ -26,6 +26,7 @@ Build-Depends: cmake (>= 3.5), libbabeltrace-ctf-dev, libbabeltrace-dev, libblkid-dev (>= 2.17), + libcap-ng-dev, libcunit1-dev, libcurl4-openssl-dev, libexpat1-dev, @@ -45,10 +46,13 @@ Build-Depends: cmake (>= 3.5), libssl-dev, libtool, libudev-dev, + libnl-3-dev, + libnl-genl-3-dev, libxml2-dev, librabbitmq-dev, lsb-release, parted, + patch, pkg-config, python (>= 2.7), python-all-dev, @@ -69,7 +73,7 @@ Build-Depends: cmake (>= 3.5), python3-all-dev, python3-setuptools, # Make-Check socat, - uuid-dev, +# Make-Check uuid-dev, uuid-runtime, valgrind, virtualenv | python-virtualenv, @@ -192,6 +196,7 @@ Recommends: ceph-mgr-dashboard, ceph-mgr-diskprediction-local, ceph-mgr-diskprediction-cloud, ceph-mgr-rook, + ceph-mgr-k8sevents, ceph-mgr-ssh Suggests: python-influxdb Replaces: ceph (<< 0.93-417), @@ -275,6 +280,21 @@ Description: rook plugin for ceph-mgr functionality, to allow ceph-mgr to install and configure ceph using Rook. +Package: ceph-mgr-k8sevents +Architecture: all +Depends: ceph-mgr (= ${binary:Version}), + python-kubernetes, + ${misc:Depends}, + ${python:Depends}, +Description: kubernetes events plugin for ceph-mgr + Ceph is a massively scalable, open-source, distributed + storage system that runs on commodity hardware and delivers object, + block and file system storage. + . + This package contains the k8sevents plugin, to allow ceph-mgr to send + ceph related events to the kubernetes events API, and track all events + that occur within the rook-ceph namespace. + Package: ceph-mgr-ssh Architecture: all Depends: ceph-mgr (= ${binary:Version}), diff --git a/ceph/doc/cephfs/fs-volumes.rst b/ceph/doc/cephfs/fs-volumes.rst new file mode 100644 index 000000000..af0f1154a --- /dev/null +++ b/ceph/doc/cephfs/fs-volumes.rst @@ -0,0 +1,155 @@ +.. _fs-volumes-and-subvolumes: + +FS volumes and subvolumes +========================= + +A single source of truth for CephFS exports is implemented in the volumes +module of the :term:`Ceph Manager` daemon (ceph-mgr). The OpenStack shared +file system service (manila_), Ceph Containter Storage Interface (CSI_), +storage administrators among others can use the common CLI provided by the +ceph-mgr volumes module to manage the CephFS exports. + +The ceph-mgr volumes module implements the following file system export +abstactions: + +* FS volumes, an abstraction for CephFS file systems + +* FS subvolumes, an abstraction for independent CephFS directory trees + +* FS subvolume groups, an abstraction for a directory level higher than FS + subvolumes to effect policies (e.g., :doc:`/cephfs/file-layouts`) across a + set of subvolumes + +Some possible use-cases for the export abstractions: + +* FS subvolumes used as manila shares or CSI volumes + +* FS subvolume groups used as manila share groups + +Requirements +------------ + +* Nautilus (14.2.x) or a later version of Ceph + +* Cephx client user (see :doc:`/rados/operations/user-management`) with + the following minimum capabilities:: + + mon 'allow r' + mgr 'allow rw' + + +FS Volumes +---------- + +Create a volume using:: + + $ ceph fs volume create + +This creates a CephFS file sytem and its data and metadata pools. It also tries +to create MDSes for the filesytem using the enabled ceph-mgr orchestrator +module (see :doc:`/mgr/orchestrator_cli`) , e.g., rook. + +Remove a volume using:: + + $ ceph fs volume rm [--yes-i-really-mean-it] + +This removes a file system and its data and metadata pools. It also tries to +remove MDSes using the enabled ceph-mgr orchestrator module. + +List volumes using:: + + $ ceph fs volume ls + +FS Subvolume groups +------------------- + +Create a subvolume group using:: + + $ ceph fs subvolumegroup create [--mode --pool_layout ] + +The command succeeds even if the subvolume group already exists. + +When creating a subvolume group you can specify its data pool layout (see +:doc:`/cephfs/file-layouts`), and file mode in octal numerals. By default, the +subvolume group is created with an octal file mode '755', and data pool layout +of its parent directory. + + +Remove a subvolume group using:: + + $ ceph fs subvolumegroup rm [--force] + +The removal of a subvolume group fails if it is not empty, e.g., has subvolumes +or snapshots, or is non-existent. Using the '--force' flag allows the command +to succeed even if the subvolume group is non-existent. + + +Fetch the absolute path of a subvolume group using:: + + $ ceph fs subvolumegroup getpath + +Create a snapshot (see :doc:`/cephfs/experimental-features`) of a +subvolume group using:: + + $ ceph fs subvolumegroup snapshot create + +This implicitly snapshots all the subvolumes under the subvolume group. + +Remove a snapshot of a subvolume group using:: + + $ ceph fs subvolumegroup snapshot rm [--force] + +Using the '--force' flag allows the command to succeed that would otherwise +fail if the snapshot did not exist. + + +FS Subvolumes +------------- + +Create a subvolume using:: + + $ ceph fs subvolume create [--group_name --mode --pool_layout --size ] + + +The command succeeds even if the subvolume already exists. + +When creating a subvolume you can specify its subvolume group, data pool layout, +file mode in octal numerals, and size in bytes. The size of the subvolume is +specified by setting a quota on it (see :doc:`/cephfs/quota`). By default a +subvolume is created within the default subvolume group, and with an octal file +mode '755', data pool layout of its parent directory and no size limit. + + +Remove a subvolume group using:: + + $ ceph fs subvolume rm [--group_name --force] + + +The command removes the subvolume and its contents. It does this in two steps. +First, it move the subvolume to a trash folder, and then asynchronously purges +its contents. + +The removal of a subvolume fails if it has snapshots, or is non-existent. +Using the '--force' flag allows the command to succeed even if the subvolume is +non-existent. + + +Fetch the absolute path of a subvolume using:: + + $ ceph fs subvolume getpath [--group_name ] + + +Create a snapshot of a subvolume using:: + + $ ceph fs subvolume snapshot create [--group_name ] + + +Remove a snapshot of a subvolume using:: + + $ ceph fs subvolume snapshot rm [--group_name --force] + +Using the '--force' flag allows the command to succeed that would otherwise +fail if the snapshot did not exist. + +.. _manila: https://github.com/openstack/manila +.. _CSI: https://github.com/ceph/ceph-csi diff --git a/ceph/doc/cephfs/fstab.rst b/ceph/doc/cephfs/fstab.rst index 785208d7f..344c80d9e 100644 --- a/ceph/doc/cephfs/fstab.rst +++ b/ceph/doc/cephfs/fstab.rst @@ -15,10 +15,11 @@ following to ``/etc/fstab``:: For example:: - 10.10.10.10:6789:/ /mnt/ceph ceph name=admin,secretfile=/etc/ceph/secret.key,noatime,_netdev 0 2 + 10.10.10.10:6789:/ /mnt/ceph ceph name=admin,noatime,_netdev 0 2 -.. important:: The ``name`` and ``secret`` or ``secretfile`` options are - mandatory when you have Ceph authentication running. +The default for the ``name=`` parameter is ``guest``. If the ``secret`` or +``secretfile`` options are not specified then the mount helper will attempt to +find a secret for the given ``name`` in one of the configured keyrings. See `User Management`_ for details. diff --git a/ceph/doc/cephfs/index.rst b/ceph/doc/cephfs/index.rst index d55701d13..925ca89a0 100644 --- a/ceph/doc/cephfs/index.rst +++ b/ceph/doc/cephfs/index.rst @@ -111,6 +111,7 @@ authentication keyring. Application best practices Scrub LazyIO + FS volume and subvolumes .. toctree:: :hidden: diff --git a/ceph/doc/cephfs/kernel.rst b/ceph/doc/cephfs/kernel.rst index 3b5a75086..89f481f9f 100644 --- a/ceph/doc/cephfs/kernel.rst +++ b/ceph/doc/cephfs/kernel.rst @@ -9,8 +9,12 @@ monitor host name(s) into IP address(es) for you. For example:: sudo mkdir /mnt/mycephfs sudo mount -t ceph 192.168.0.1:6789:/ /mnt/mycephfs -To mount the Ceph file system with ``cephx`` authentication enabled, you must -specify a user name and a secret. :: +To mount the Ceph file system with ``cephx`` authentication enabled, the kernel +must authenticate with the cluster. The default ``name=`` option is ``guest``. +The mount.ceph helper will automatically attempt to find a secret key in the +keyring. + +The secret can also be specified manually with the ``secret=`` option. :: sudo mount -t ceph 192.168.0.1:6789:/ /mnt/mycephfs -o name=admin,secret=AQATSKdNGBnwLhAAnNDKnH65FmVKpXZJVasUeQ== @@ -18,11 +22,11 @@ The foregoing usage leaves the secret in the Bash history. A more secure approach reads the secret from a file. For example:: sudo mount -t ceph 192.168.0.1:6789:/ /mnt/mycephfs -o name=admin,secretfile=/etc/ceph/admin.secret + +See `User Management`_ for details on cephx. -If you have more than one filesystem, specify which one to mount using +If you have more than one file system, specify which one to mount using the ``mds_namespace`` option, e.g. ``-o mds_namespace=myfs``. - -See `User Management`_ for details on cephx. To unmount the Ceph file system, you may use the ``umount`` command. For example:: diff --git a/ceph/doc/cephfs/posix.rst b/ceph/doc/cephfs/posix.rst index 3cd59ec57..34c2b44ad 100644 --- a/ceph/doc/cephfs/posix.rst +++ b/ceph/doc/cephfs/posix.rst @@ -80,3 +80,22 @@ than NFS when it comes to write atomicity. In other words, when it comes to POSIX, :: HDFS < NFS < CephFS < {XFS, ext4} + + +fsync() and error reporting +--------------------------- + +POSIX is somewhat vague about the state of an inode after fsync reports +an error. In general, CephFS uses the standard error-reporting +mechanisms in the client's kernel, and therefore follows the same +conventions as other filesystems. + +In modern Linux kernels (v4.17 or later), writeback errors are reported +once to every file description that is open at the time of the error. In +addition, unreported errors that occured before the file description was +opened will also be returned on fsync. + +See `PostgreSQL's summary of fsync() error reporting across operating systems +`_ and `Matthew Wilcox's +presentation on Linux IO error handling +`_ for more information. diff --git a/ceph/doc/dev/osd_internals/recovery_reservation.rst b/ceph/doc/dev/osd_internals/recovery_reservation.rst index 4ab03192f..a24ac1b15 100644 --- a/ceph/doc/dev/osd_internals/recovery_reservation.rst +++ b/ceph/doc/dev/osd_internals/recovery_reservation.rst @@ -48,6 +48,14 @@ necessary), the primary drops the local reservation and enters the Recovered state. Once all the PGs have reported they are clean, the primary enters the Clean state and marks itself active+clean. +----------------- +Dump Reservations +----------------- + +An OSD daemon command dumps total local and remote reservations:: + + ceph daemon osd. dump_recovery_reservations + -------------- Things to Note diff --git a/ceph/doc/dev/osd_internals/scrub.rst b/ceph/doc/dev/osd_internals/scrub.rst index 3343b3986..f20dc042e 100644 --- a/ceph/doc/dev/osd_internals/scrub.rst +++ b/ceph/doc/dev/osd_internals/scrub.rst @@ -1,6 +1,9 @@ +Scrub internals and diagnostics +=============================== + Scrubbing Behavior Table -======================== +------------------------ +-------------------------------------------------+----------+-----------+---------------+----------------------+ | Flags | none | noscrub | nodeep_scrub | noscrub/nodeep_scrub | @@ -28,3 +31,11 @@ State variables - Initiated scrub state is must_scrub && !must_deep_scrub && !time_for_deep - Initiated scrub after osd_deep_scrub_interval state is must scrub && !must_deep_scrub && time_for_deep - Initiated deep scrub state is must_scrub && must_deep_scrub + +Scrub Reservations +------------------ + +An OSD daemon command dumps total local and remote reservations:: + + ceph daemon osd. dump_scrub_reservations + diff --git a/ceph/doc/man/8/ceph-bluestore-tool.rst b/ceph/doc/man/8/ceph-bluestore-tool.rst index bc1058809..0ad5c20c3 100644 --- a/ceph/doc/man/8/ceph-bluestore-tool.rst +++ b/ceph/doc/man/8/ceph-bluestore-tool.rst @@ -22,6 +22,7 @@ Synopsis | **ceph-bluestore-tool** bluefs-bdev-new-wal --path *osd path* --dev-target *new-device* | **ceph-bluestore-tool** bluefs-bdev-new-db --path *osd path* --dev-target *new-device* | **ceph-bluestore-tool** bluefs-bdev-migrate --path *osd path* --dev-target *new-device* --devs-source *device1* [--devs-source *device2*] +| **ceph-bluestore-tool** free-dump|free-score --path *osd path* [ --allocator block/bluefs-wal/bluefs-db/bluefs-slow ] Description @@ -81,6 +82,15 @@ Commands Show device label(s). +:command:`free-dump` --path *osd path* [ --allocator block/bluefs-wal/bluefs-db/bluefs-slow ] + + Dump all free regions in allocator. + +:command:`free-score` --path *osd path* [ --allocator block/bluefs-wal/bluefs-db/bluefs-slow ] + + Give a [0-1] number that represents quality of fragmentation in allocator. + 0 represents case when all free space is in one chunk. 1 represents worst possible fragmentation. + Options ======= @@ -117,6 +127,10 @@ Options deep scrub/repair (read and validate object data, not just metadata) +.. option:: --allocator *name* + + Useful for *free-dump* and *free-score* actions. Selects allocator(s). + Device labels ============= diff --git a/ceph/doc/man/8/ceph-kvstore-tool.rst b/ceph/doc/man/8/ceph-kvstore-tool.rst index d7b88f08a..1eb99c030 100644 --- a/ceph/doc/man/8/ceph-kvstore-tool.rst +++ b/ceph/doc/man/8/ceph-kvstore-tool.rst @@ -80,6 +80,11 @@ which are as follows: Note that in the case of rocksdb this may corrupt an otherwise uncorrupted database--use this only as a last resort! +:command:`stats` + Prints statistics from underlying key-value database. This is only for informative purposes. + Format and information content may vary between releases. For RocksDB information includes + compactions stats, performance counters, memory usage and internal RocksDB stats. + Availability ============ diff --git a/ceph/doc/man/8/mount.ceph.rst b/ceph/doc/man/8/mount.ceph.rst index 0627f5cca..4f22cd29f 100644 --- a/ceph/doc/man/8/mount.ceph.rst +++ b/ceph/doc/man/8/mount.ceph.rst @@ -9,14 +9,14 @@ Synopsis ======== -| **mount.ceph** *monaddr1*\ [,\ *monaddr2*\ ,...]:/[*subdir*] *dir* [ +| **mount.ceph** [*monaddr1*\ ,\ *monaddr2*\ ,...]:/[*subdir*] *dir* [ -o *options* ] Description =========== -**mount.ceph** is a simple helper for mounting the Ceph file system on +**mount.ceph** is a helper for mounting the Ceph file system on a Linux host. It serves to resolve monitor hostname(s) into IP addresses and read authentication keys from disk; the Linux kernel client component does most of the real work. In fact, it is possible @@ -34,6 +34,10 @@ learn about all monitors from any responsive monitor. However, it is a good idea to specify more than one in case one happens to be down at the time of mount. +If the host portion of the device is left blank, then **mount.ceph** will +attempt to determine monitor addresses using local configuration files +and/or DNS SRV records. + A subdirectory subdir may be specified if a subset of the file system is to be mounted. @@ -126,6 +130,16 @@ Options :command:`noasyncreaddir` no dcache readdir +:command:`conf` + Path to a ceph.conf file. This is used to initialize the ceph context + for autodiscovery of monitor addresses and auth secrets. The default is + to use the standard search path for ceph.conf files. + +Mount Secrets +============= +If the `secret` and `secretfile` options are not specified on the command-line +then the mount helper will spawn a child process that will use the standard +ceph library routines to find a keyring and fetch the secret from it. Examples ======== @@ -143,6 +157,10 @@ port:: mount.ceph monhost1:7000,monhost2:7000,monhost3:7000:/ /mnt/foo +To automatically determine the monitor addresses from local configuration:: + + mount.ceph :/ /mnt/foo + To mount only part of the namespace:: mount.ceph monhost1:/some/small/thing /mnt/thing diff --git a/ceph/doc/man/8/rados.rst b/ceph/doc/man/8/rados.rst index 63fdc23f9..36b6c0b3e 100644 --- a/ceph/doc/man/8/rados.rst +++ b/ceph/doc/man/8/rados.rst @@ -26,6 +26,16 @@ Options Interact with the given pool. Required by most commands. +.. option:: --pgid + + As an alternative to ``--pool``, ``--pgid`` also allow users to specify the + PG id to which the command will be directed. With this option, certain + commands like ``ls`` allow users to limit the scope of the command to the given PG. + +.. option:: -N namespace, --namespace namespace + + Specify the rados namespace to use for the object. + .. option:: -s snap, --snap snap Read from the given pool snapshot. Valid for all pool-specific read operations. @@ -104,7 +114,7 @@ Pool specific commands List the watchers of object name. :command:`ls` *outfile* - List objects in given pool and write to outfile. + List objects in the given pool and write to outfile. Instead of ``--pool`` if ``--pgid`` will be specified, ``ls`` will only list the objects in the given PG. :command:`lssnap` List snapshots for given pool. @@ -199,6 +209,10 @@ To get a list object in pool foo sent to stdout:: rados -p foo ls - +To get a list of objects in PG 0.6:: + + rados --pgid 0.6 ls + To write an object:: rados -p foo put myobject blah.txt diff --git a/ceph/doc/man/8/rbd.rst b/ceph/doc/man/8/rbd.rst index 22f78ad63..45c3315a8 100644 --- a/ceph/doc/man/8/rbd.rst +++ b/ceph/doc/man/8/rbd.rst @@ -537,13 +537,13 @@ Commands :command:`mv` *src-image-spec* *dest-image-spec* Rename an image. Note: rename across pools is not supported. -:command:`namespace create` *pool-name* *namespace-name* +:command:`namespace create` *pool-name*/*namespace-name* Create a new image namespace within the pool. :command:`namespace list` *pool-name* List image namespaces defined within the pool. -:command:`namespace remove` *pool-name* *namespace-name* +:command:`namespace remove` *pool-name*/*namespace-name* Remove an empty image namespace from the pool. :command:`object-map check` *image-spec* | *snap-spec* diff --git a/ceph/doc/mgr/crash.rst b/ceph/doc/mgr/crash.rst index 8a9881030..76e0ce94a 100644 --- a/ceph/doc/mgr/crash.rst +++ b/ceph/doc/mgr/crash.rst @@ -37,7 +37,13 @@ Remove a specific crash dump. ceph crash ls -List the timestamp/uuid crashids for all saved crash info. +List the timestamp/uuid crashids for all new and archived crash info. + +:: + + ceph crash ls-new + +List the timestamp/uuid crashids for all newcrash info. :: @@ -57,4 +63,21 @@ Show all details of a saved crash. Remove saved crashes older than 'keep' days. must be an integer. +:: + + ceph crash archive + +Archive a crash report so that it is no longer considered for the ``RECENT_CRASH`` health check and does not appear in the ``crash ls-new`` output (it will still appear in the ``crash ls`` output). + +:: + + ceph crash archive-all + +Archive all new crash reports. + + +Options +------- +* ``mgr/crash/warn_recent_interval`` [default: 2 weeks] controls what constitutes "recent" for the purposes of raising the ``RECENT_CRASH`` health warning. +* ``mgr/crash/retain_interval`` [default: 1 year] controls how long crash reports are retained by the cluster before they are automatically purged. diff --git a/ceph/doc/mgr/dashboard.rst b/ceph/doc/mgr/dashboard.rst index bcec83414..8ba2111d2 100644 --- a/ceph/doc/mgr/dashboard.rst +++ b/ceph/doc/mgr/dashboard.rst @@ -772,13 +772,6 @@ to allow direct connections to the manager nodes, you could set up a proxy that automatically forwards incoming requests to the currently active ceph-mgr instance. -.. note:: - Note that putting the dashboard behind a load-balancing proxy like `HAProxy - `_ currently has some limitations, particularly if - you require the traffic between the proxy and the dashboard to be encrypted - via SSL/TLS. See `BUG#24662 `_ for - details. - Configuring a URL Prefix ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -793,6 +786,71 @@ to use hyperlinks that include your prefix, you can set the so you can access the dashboard at ``http://$IP:$PORT/$PREFIX/``. +Disable the redirection +^^^^^^^^^^^^^^^^^^^^^^^ + +If the dashboard is behind a load-balancing proxy like `HAProxy `_ +you might want to disable the redirection behaviour to prevent situations that +internal (unresolvable) URL's are published to the frontend client. Use the +following command to get the dashboard to respond with a HTTP error (500 by default) +instead of redirecting to the active dashboard:: + + $ ceph config set mgr mgr/dashboard/standby_behaviour "error" + +To reset the setting to the default redirection behaviour, use the following command:: + + $ ceph config set mgr mgr/dashboard/standby_behaviour "redirect" + +Configure the error status code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When the redirection behaviour is disabled, then you want to customize the HTTP status +code of standby dashboards. To do so you need to run the command:: + + $ ceph config set mgr mgr/dashboard/standby_error_status_code 503 + +HAProxy example configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Below you will find an example configuration for SSL/TLS pass through using +`HAProxy `_. + +Please note that the configuration works under the following conditions. +If the dashboard fails over, the front-end client might receive a HTTP redirect +(303) response and will be redirected to an unresolvable host. This happens when +the failover occurs during two HAProxy health checks. In this situation the +previously active dashboard node will now respond with a 303 which points to +the new active node. To prevent that situation you should consider to disable +the redirection behaviour on standby nodes. + +:: + + defaults + log global + option log-health-checks + timeout connect 5s + timeout client 50s + timeout server 450s + + frontend dashboard_front + mode http + bind *:80 + option httplog + redirect scheme https code 301 if !{ ssl_fc } + + frontend dashboard_front_ssl + mode tcp + bind *:443 + option tcplog + default_backend dashboard_back_ssl + + backend dashboard_back_ssl + mode tcp + option httpchk GET / + http-check expect status 200 + server x : check-ssl check verify none + server y : check-ssl check verify none + server z : check-ssl check verify none .. _dashboard-auditing: diff --git a/ceph/doc/mgr/orchestrator_cli.rst b/ceph/doc/mgr/orchestrator_cli.rst index da87aa710..6ee378457 100644 --- a/ceph/doc/mgr/orchestrator_cli.rst +++ b/ceph/doc/mgr/orchestrator_cli.rst @@ -49,7 +49,8 @@ The relation between the names is the following: Configuration ============= -You can select the orchestrator module to use with the ``set backend`` command:: +To enable the orchestrator, please select the orchestrator module to use +with the ``set backend`` command:: ceph orchestrator set backend @@ -62,6 +63,14 @@ You can then check backend is properly configured:: ceph orchestrator status +Disable the Orchestrator +~~~~~~~~~~~~~~~~~~~~~~~~ + +To disable the orchestrator again, use the empty string ``""``:: + + ceph orchestrator set backend ""`` + ceph mgr module disable rook + Usage ===== @@ -193,11 +202,11 @@ services of a particular type via optional --type parameter :: - ceph orchestrator service ls [--host host] [--svc_type type] [--refresh|--no-cache] + ceph orchestrator service ls [--host host] [--svc_type type] [--refresh] Discover the status of a particular service:: - ceph orchestrator service status  [--refresh] + ceph orchestrator service ls --svc_type type --svc_id [--refresh] Query the status of a particular service instance (mon, osd, mds, rgw). For OSDs @@ -256,7 +265,6 @@ This is an overview of the current implementation status of the orchestrators. device {ident,fault}-(on,off} ⚪ ⚪ ⚪ ⚪ device ls ✔️ ✔️ ✔️ ✔️ service ls ⚪ ✔️ ✔️ ⚪ - service status ⚪ ✔️ ✔️ ⚪ service-instance status ⚪ ⚪ ⚪ ⚪ iscsi {stop,start,reload} ⚪ ⚪ ⚪ ⚪ iscsi add ⚪ ⚪ ⚪ ⚪ diff --git a/ceph/doc/mgr/telemetry.rst b/ceph/doc/mgr/telemetry.rst index 2bd1edb09..9621a4a62 100644 --- a/ceph/doc/mgr/telemetry.rst +++ b/ceph/doc/mgr/telemetry.rst @@ -7,19 +7,36 @@ The telemetry module sends anonymous data about the cluster back to the Ceph developers to help understand how Ceph is used and what problems users may be experiencing. -Reported telemetry includes: +Channels +-------- + +The telemetry report is broken down into several "channels," each with +a different type of information. Assuming telemetry has been enabled, +individual channels can be turned on and off. (If telemetry is off, +the per-channel setting has no effect.) + +* **basic** (default: on): Basic information about the cluster - * capacity of the cluster - * number of monitors, managers, OSDs, MDSs, radosgws, or other daemons - * software version currently being used - * number and types of RADOS pools and CephFS file systems - * information about daemon crashes, including + - capacity of the cluster + - number of monitors, managers, OSDs, MDSs, radosgws, or other daemons + - software version currently being used + - number and types of RADOS pools and CephFS file systems + - names of configuration options that have been changed from their + default (but *not* their values) + +* **crash** (default: on): Information about daemon crashes, including - type of daemon - version of the daemon - operating system (OS distribution, kernel version) - stack trace identifying where in the Ceph code the crash occurred +* **ident** (default: on): User-provided identifying information about + the cluster + + - cluster description + - contact email address + The data being reported does *not* contain any sensitive data like pool names, object names, object contents, or hostnames. @@ -30,50 +47,61 @@ the way Ceph is used. Data is sent over HTTPS to *telemetry.ceph.com*. +Enabling the module +------------------- + +The module must first be enabled. Note that even if the module is +enabled, telemetry is still "off" by default, so simply enabling the +module will *NOT* result in any data being shared.:: + + ceph mgr module enable telemetry + Sample report ------------- You can look at what data is reported at any time with the command:: - ceph mgr module enable telemetry ceph telemetry show If you have any concerns about privacy with regard to the information included in this report, please contact the Ceph developers. -Enabling +Channels -------- -The *telemetry* module is enabled with:: +Individual channels can be enabled or disabled with:: + + ceph config set mgr mgr/telemetry/channel_ident false + ceph config set mgr mgr/telemetry/channel_basic false + ceph config set mgr mgr/telemetry/channel_crash false + ceph telemetry show + +Enabling Telemetry +------------------ + +To allow the *telemetry* module to start sharing data,:: - ceph mgr module enable telemetry ceph telemetry on -Telemetry can be disabled with:: +Telemetry can be disabled at any time with:: ceph telemetry off Interval -------- -The module compiles and sends a new report every 72 hours by default. +The module compiles and sends a new report every 24 hours by default. You can adjust this interval with:: - ceph config set mgr mgr/telemetry/interval 24 # report every day + ceph config set mgr mgr/telemetry/interval 72 # report every three days Contact and Description ----------------------- -A contact and description can be added to the report. This is completely optional.:: +A contact and description can be added to the report. This is +completely optional, and disabled by default.:: ceph config set mgr mgr/telemetry/contact 'John Doe ' ceph config set mgr mgr/telemetry/description 'My first Ceph cluster' + ceph config set mgr mgr/telemetry/channel_ident true -Show report ------------ - -The report is sent in JSON format, and can be printed:: - - ceph telemetry show - -So you can inspect the content if you have privacy concerns. diff --git a/ceph/doc/rados/command/list-inconsistent-obj.json b/ceph/doc/rados/command/list-inconsistent-obj.json index 637e3ed8f..2bdc5f74c 100644 --- a/ceph/doc/rados/command/list-inconsistent-obj.json +++ b/ceph/doc/rados/command/list-inconsistent-obj.json @@ -91,7 +91,8 @@ "attr_value_mismatch", "attr_name_mismatch", "snapset_inconsistency", - "hinfo_inconsistency" + "hinfo_inconsistency", + "size_too_large" ] }, "minItems": 0, diff --git a/ceph/doc/rados/configuration/bluestore-config-ref.rst b/ceph/doc/rados/configuration/bluestore-config-ref.rst index 8eb96bafa..7d1c50c91 100644 --- a/ceph/doc/rados/configuration/bluestore-config-ref.rst +++ b/ceph/doc/rados/configuration/bluestore-config-ref.rst @@ -231,12 +231,13 @@ The configured cache memory budget can be used in a few different ways: * BlueStore data (i.e., recently read or written object data) Cache memory usage is governed by the following options: -``bluestore_cache_meta_ratio``, ``bluestore_cache_kv_ratio``, and -``bluestore_cache_kv_max``. The fraction of the cache devoted to data -is 1.0 minus the meta and kv ratios. The memory devoted to kv -metadata (the RocksDB cache) is capped by ``bluestore_cache_kv_max`` -since our testing indicates there are diminishing returns beyond a -certain point. +``bluestore_cache_meta_ratio`` and ``bluestore_cache_kv_ratio``. +The fraction of the cache devoted to data +is governed by the effective bluestore cache size (depending on +``bluestore_cache_size[_ssd|_hdd]`` settings and the device class of the primary +device) as well as the meta and kv ratios. +The data fraction can be calculated by +`` * (1 - bluestore_cache_meta_ratio - bluestore_cache_kv_ratio)`` ``bluestore_cache_size`` @@ -264,14 +265,14 @@ certain point. :Description: The ratio of cache devoted to metadata. :Type: Floating point :Required: Yes -:Default: ``.01`` +:Default: ``.4`` ``bluestore_cache_kv_ratio`` :Description: The ratio of cache devoted to key/value data (rocksdb). :Type: Floating point :Required: Yes -:Default: ``.99`` +:Default: ``.4`` ``bluestore_cache_kv_max`` diff --git a/ceph/doc/rados/configuration/mon-config-ref.rst b/ceph/doc/rados/configuration/mon-config-ref.rst index fd10763f3..67318f490 100644 --- a/ceph/doc/rados/configuration/mon-config-ref.rst +++ b/ceph/doc/rados/configuration/mon-config-ref.rst @@ -393,6 +393,25 @@ by setting it in the ``[mon]`` section of the configuration file. :Default: True +``mon warn on slow ping ratio`` + +:Description: Issue a ``HEALTH_WARN`` in cluster log if any heartbeat + between OSDs exceeds ``mon warn on slow ping ratio`` + of ``osd heartbeat grace``. The default is 5%. +:Type: Float +:Default: ``0.05`` + + +``mon warn on slow ping time`` + +:Description: Override ``mon warn on slow ping ratio`` with a specific value. + Issue a ``HEALTH_WARN`` in cluster log if any heartbeat + between OSDs exceeds ``mon warn on slow ping time`` + milliseconds. The default is 0 (disabled). +:Type: Integer +:Default: ``0`` + + ``mon cache target full warn ratio`` :Description: Position between pool's ``cache_target_full`` and @@ -422,8 +441,8 @@ by setting it in the ``[mon]`` section of the configuration file. log (a non-positive number disables it). If current health summary is empty or identical to the last time, monitor will not send it to cluster log. -:Type: Integer -:Default: 3600 +:Type: Float +:Default: 60.000000 ``mon health to clog interval`` @@ -433,7 +452,7 @@ by setting it in the ``[mon]`` section of the configuration file. send the summary to cluster log no matter if the summary changes or not. :Type: Integer -:Default: 60 +:Default: 3600 @@ -1196,6 +1215,26 @@ Miscellaneous :Type: Integer :Default: 300 +``mon osd cache size min`` + +:Description: The minimum amount of bytes to be kept mapped in memory for osd + monitor caches. +:Type: 64-bit Integer +:Default: 134217728 + +``mon memory target`` + +:Description: The amount of bytes pertaining to osd monitor caches and kv cache + to be kept mapped in memory with cache auto-tuning enabled. +:Type: 64-bit Integer +:Default: 2147483648 + +``mon memory autotune`` + +:Description: Autotune the cache memory being used for osd monitors and kv + database. +:Type: Boolean +:Default: True .. _Paxos: https://en.wikipedia.org/wiki/Paxos_(computer_science) diff --git a/ceph/doc/rados/configuration/mon-osd-interaction.rst b/ceph/doc/rados/configuration/mon-osd-interaction.rst index e2c247714..a7324ebb0 100644 --- a/ceph/doc/rados/configuration/mon-osd-interaction.rst +++ b/ceph/doc/rados/configuration/mon-osd-interaction.rst @@ -24,10 +24,8 @@ monitoring the Ceph Storage Cluster. OSDs Check Heartbeats ===================== -Each Ceph OSD Daemon checks the heartbeat of other Ceph OSD Daemons every 6 -seconds. You can change the heartbeat interval by adding an ``osd heartbeat -interval`` setting under the ``[osd]`` section of your Ceph configuration file, -or by setting the value at runtime. If a neighboring Ceph OSD Daemon doesn't +Each Ceph OSD Daemon checks the heartbeat of other Ceph OSD Daemons at random +intervals less than every 6 seconds. If a neighboring Ceph OSD Daemon doesn't show a heartbeat within a 20 second grace period, the Ceph OSD Daemon may consider the neighboring Ceph OSD Daemon ``down`` and report it back to a Ceph Monitor, which will update the Ceph Cluster Map. You may change this grace @@ -379,6 +377,15 @@ OSD Settings :Default: ``30`` +``osd mon heartbeat stat stale`` + +:Description: Stop reporting on heartbeat ping times which haven't been updated for + this many seconds. Set to zero to disable this action. + +:Type: 32-bit Integer +:Default: ``3600`` + + ``osd mon report interval`` :Description: The number of seconds a Ceph OSD Daemon may wait diff --git a/ceph/doc/rados/operations/balancer.rst b/ceph/doc/rados/operations/balancer.rst index 930ef0d8c..530e0dc41 100644 --- a/ceph/doc/rados/operations/balancer.rst +++ b/ceph/doc/rados/operations/balancer.rst @@ -42,9 +42,9 @@ healed itself). When the cluster is healthy, the balancer will throttle its changes such that the percentage of PGs that are misplaced (i.e., that need to be moved) is below a threshold of (by default) 5%. The -``max_misplaced`` threshold can be adjusted with:: +``target_max_misplaced_ratio`` threshold can be adjusted with:: - ceph config set mgr mgr/balancer/max_misplaced .07 # 7% + ceph config set mgr target_max_misplaced_ratio .07 # 7% Modes diff --git a/ceph/doc/rados/operations/health-checks.rst b/ceph/doc/rados/operations/health-checks.rst index f35d7aaf0..2ed2da487 100644 --- a/ceph/doc/rados/operations/health-checks.rst +++ b/ceph/doc/rados/operations/health-checks.rst @@ -335,6 +335,59 @@ needs to be stopped and BlueFS informed of the device size change with:: ceph-bluestore-tool bluefs-bdev-expand --path /var/lib/ceph/osd/ceph-$ID +BLUEFS_AVAILABLE_SPACE +______________________ + +To check how much space is free for BlueFS do:: + + ceph daemon osd.123 bluestore bluefs available + +This will output up to 3 values: `BDEV_DB free`, `BDEV_SLOW free` and +`available_from_bluestore`. `BDEV_DB` and `BDEV_SLOW` report amount of space that +has been acquired by BlueFS and is considered free. Value `available_from_bluestore` +denotes ability of BlueStore to relinquish more space to BlueFS. +It is normal that this value is different from amount of BlueStore free space, as +BlueFS allocation unit is typically larger than BlueStore allocation unit. +This means that only part of BlueStore free space will be acceptable for BlueFS. + +BLUEFS_LOW_SPACE +_________________ + +If BlueFS is running low on available free space and there is little +`available_from_bluestore` one can consider reducing BlueFS allocation unit size. +To simulate available space when allocation unit is different do:: + + ceph daemon osd.123 bluestore bluefs available + +BLUESTORE_FRAGMENTATION +_______________________ + +As BlueStore works free space on underlying storage will get fragmented. +This is normal and unavoidable but excessive fragmentation will cause slowdown. +To inspect BlueStore fragmentation one can do:: + + ceph daemon osd.123 bluestore allocator score block + +Score is given in [0-1] range. +[0.0 .. 0.4] tiny fragmentation +[0.4 .. 0.7] small, acceptable fragmentation +[0.7 .. 0.9] considerable, but safe fragmentation +[0.9 .. 1.0] severe fragmentation, may impact BlueFS ability to get space from BlueStore + +If detailed report of free fragments is required do:: + + ceph daemon osd.123 bluestore allocator dump block + +In case when handling OSD process that is not running fragmentation can be +inspected with `ceph-bluestore-tool`. +Get fragmentation score:: + + ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-123 --allocator block free-score + +And dump detailed free chunks:: + + ceph-bluestore-tool --path /var/lib/ceph/osd/ceph-123 --allocator block free-dump + BLUESTORE_LEGACY_STATFS _______________________ @@ -489,16 +542,27 @@ The state of specific problematic PGs can be queried with:: ceph tell query -PG_DEGRADED_FULL +PG_RECOVERY_FULL +________________ + +Data redundancy may be reduced or at risk for some data due to a lack +of free space in the cluster. Specifically, one or more PGs has the +*recovery_toofull* flag set, meaning that the +cluster is unable to migrate or recover data because one or more OSDs +is above the *full* threshold. + +See the discussion for *OSD_FULL* above for steps to resolve this condition. + +PG_BACKFILL_FULL ________________ Data redundancy may be reduced or at risk for some data due to a lack of free space in the cluster. Specifically, one or more PGs has the -*backfill_toofull* or *recovery_toofull* flag set, meaning that the +*backfill_toofull* flag set, meaning that the cluster is unable to migrate or recover data because one or more OSDs is above the *backfillfull* threshold. -See the discussion for *OSD_BACKFILLFULL* or *OSD_FULL* above for +See the discussion for *OSD_BACKFILLFULL* above for steps to resolve this condition. PG_DAMAGED @@ -678,6 +742,12 @@ the pool is too large and should be reduced or set to zero with:: For more information, see :ref:`specifying_pool_target_size`. +TOO_FEW_OSDS +____________ + +The number of OSDs in the cluster is below the configurable +threshold of ``osd_pool_default_size``. + SMALLER_PGP_NUM _______________ @@ -840,3 +910,72 @@ happen if they are misplaced or degraded (see *PG_AVAILABILITY* and You can manually initiate a scrub of a clean PG with:: ceph pg deep-scrub + + +Miscellaneous +------------- + +RECENT_CRASH +____________ + +One or more Ceph daemons has crashed recently, and the crash has not +yet been archived (acknowledged) by the administrator. This may +indicate a software bug, a hardware problem (e.g., a failing disk), or +some other problem. + +New crashes can be listed with:: + + ceph crash ls-new + +Information about a specific crash can be examined with:: + + ceph crash info + +This warning can be silenced by "archiving" the crash (perhaps after +being examined by an administrator) so that it does not generate this +warning:: + + ceph crash archive + +Similarly, all new crashes can be archived with:: + + ceph crash archive-all + +Archived crashes will still be visible via ``ceph crash ls`` but not +``ceph crash ls-new``. + +The time period for what "recent" means is controlled by the option +``mgr/crash/warn_recent_interval`` (default: two weeks). + +These warnings can be disabled entirely with:: + + ceph config set mgr/crash/warn_recent_interval 0 + +TELEMETRY_CHANGED +_________________ + +Telemetry has been enabled, but the contents of the telemetry report +have changed since that time, so telemetry reports will not be sent. + +The Ceph developers periodically revise the telemetry feature to +include new and useful information, or to remove information found to +be useless or sensitive. If any new information is included in the +report, Ceph will require the administrator to re-enable telemetry to +ensure they have an opportunity to (re)review what information will be +shared. + +To review the contents of the telemetry report,:: + + ceph telemetry show + +Note that the telemetry report consists of several optional channels +that may be independently enabled or disabled. For more information, see +:ref:`telemetry`. + +To re-enable telemetry (and make this warning go away),:: + + ceph telemetry on + +To disable telemetry (and make this warning go away),:: + + ceph telemetry off diff --git a/ceph/doc/rados/operations/monitoring-osd-pg.rst b/ceph/doc/rados/operations/monitoring-osd-pg.rst index c490e1c3e..630d268b4 100644 --- a/ceph/doc/rados/operations/monitoring-osd-pg.rst +++ b/ceph/doc/rados/operations/monitoring-osd-pg.rst @@ -385,6 +385,11 @@ and, ``backfill_toofull`` indicates that a backfill operation was requested, but couldn't be completed due to insufficient storage capacity. When a placement group cannot be backfilled, it may be considered ``incomplete``. +The ``backfill_toofull`` state may be transient. It is possible that as PGs +are moved around, space may become available. The ``backfill_toofull`` is +similar to ``backfill_wait`` in that as soon as conditions change +backfill can proceed. + Ceph provides a number of settings to manage the load spike associated with reassigning placement groups to an OSD (especially a new OSD). By default, ``osd_max_backfills`` sets the maximum number of concurrent backfills to and from diff --git a/ceph/doc/rados/operations/monitoring.rst b/ceph/doc/rados/operations/monitoring.rst index 728309119..294e922de 100644 --- a/ceph/doc/rados/operations/monitoring.rst +++ b/ceph/doc/rados/operations/monitoring.rst @@ -159,6 +159,114 @@ to a health state: 2017-07-25 10:11:13.535493 mon.a mon.0 172.21.9.34:6789/0 110 : cluster [INF] Health check cleared: PG_DEGRADED (was: Degraded data redundancy: 2 pgs unclean, 2 pgs degraded, 2 pgs undersized) 2017-07-25 10:11:13.535577 mon.a mon.0 172.21.9.34:6789/0 111 : cluster [INF] Cluster is now healthy +Network Performance Checks +-------------------------- + +Ceph OSDs send heartbeat ping messages amongst themselves to monitor daemon availability. We +also use the response times to monitor network performance. +While it is possible that a busy OSD could delay a ping response, we can assume +that if a network switch fails mutiple delays will be detected between distinct pairs of OSDs. + +By default we will warn about ping times which exceed 1 second (1000 milliseconds). + +:: + + HEALTH_WARN Long heartbeat ping times on back interface seen, longest is 1118.001 msec + +The health detail will add the combination of OSDs are seeing the delays and by how much. There is a limit of 10 +detail line items. + +:: + + [WRN] OSD_SLOW_PING_TIME_BACK: Long heartbeat ping times on back interface seen, longest is 1118.001 msec + Slow heartbeat ping on back interface from osd.0 to osd.1 1118.001 msec + Slow heartbeat ping on back interface from osd.0 to osd.2 1030.123 msec + Slow heartbeat ping on back interface from osd.2 to osd.1 1015.321 msec + Slow heartbeat ping on back interface from osd.1 to osd.0 1010.456 msec + +To see even more detail and a complete dump of network performance information the ``dump_osd_network`` command can be used. Typically, this would be +sent to a mgr, but it can be limited to a particular OSD's interactions by issuing it to any OSD. The current threshold which defaults to 1 second +(1000 milliseconds) can be overridden as an argument in milliseconds. + +The following command will show all gathered network performance data by specifying a threshold of 0 and sending to the mgr. + +:: + + $ ceph daemon /var/run/ceph/ceph-mgr.x.asok dump_osd_network 0 + { + "threshold": 0, + "entries": [ + { + "last update": "Wed Sep 4 17:04:49 2019", + "stale": false, + "from osd": 2, + "to osd": 0, + "interface": "front", + "average": { + "1min": 1.023, + "5min": 0.860, + "15min": 0.883 + }, + "min": { + "1min": 0.818, + "5min": 0.607, + "15min": 0.607 + }, + "max": { + "1min": 1.164, + "5min": 1.173, + "15min": 1.544 + }, + "last": 0.924 + }, + { + "last update": "Wed Sep 4 17:04:49 2019", + "stale": false, + "from osd": 2, + "to osd": 0, + "interface": "back", + "average": { + "1min": 0.968, + "5min": 0.897, + "15min": 0.830 + }, + "min": { + "1min": 0.860, + "5min": 0.563, + "15min": 0.502 + }, + "max": { + "1min": 1.171, + "5min": 1.216, + "15min": 1.456 + }, + "last": 0.845 + }, + { + "last update": "Wed Sep 4 17:04:48 2019", + "stale": false, + "from osd": 0, + "to osd": 1, + "interface": "front", + "average": { + "1min": 0.965, + "5min": 0.811, + "15min": 0.850 + }, + "min": { + "1min": 0.650, + "5min": 0.488, + "15min": 0.466 + }, + "max": { + "1min": 1.252, + "5min": 1.252, + "15min": 1.362 + }, + "last": 0.791 + }, + ... + Detecting configuration issues ============================== diff --git a/ceph/doc/rados/operations/pg-states.rst b/ceph/doc/rados/operations/pg-states.rst index 651d924d2..c38a683f0 100644 --- a/ceph/doc/rados/operations/pg-states.rst +++ b/ceph/doc/rados/operations/pg-states.rst @@ -69,8 +69,8 @@ map is ``active + clean``. The placement group is waiting in line to start backfill. *backfill_toofull* - A backfill operation is waiting because the destination OSD is over its - full ratio. + A backfill operation is waiting because the destination OSD is over + the backfillfull ratio. *backfill_unfound* Backfill stopped due to unfound objects. diff --git a/ceph/doc/rados/operations/placement-groups.rst b/ceph/doc/rados/operations/placement-groups.rst index 63048cdd7..a2c89e541 100644 --- a/ceph/doc/rados/operations/placement-groups.rst +++ b/ceph/doc/rados/operations/placement-groups.rst @@ -382,14 +382,15 @@ makes every effort to evenly spread OSDs among all existing Placement Groups. As long as there are one or two orders of magnitude more Placement -Groups than OSDs, the distribution should be even. For instance, 300 -placement groups for 3 OSDs, 1000 placement groups for 10 OSDs etc. +Groups than OSDs, the distribution should be even. For instance, 256 +placement groups for 3 OSDs, 512 or 1024 placement groups for 10 OSDs +etc. Uneven data distribution can be caused by factors other than the ratio between OSDs and placement groups. Since CRUSH does not take into account the size of the objects, a few very large objects may create an imbalance. Let say one million 4K objects totaling 4GB are evenly -spread among 1000 placement groups on 10 OSDs. They will use 4GB / 10 +spread among 1024 placement groups on 10 OSDs. They will use 4GB / 10 = 400MB on each OSD. If one 400MB object is added to the pool, the three OSDs supporting the placement group in which the object has been placed will be filled with 400MB + 400MB = 800MB while the seven @@ -433,9 +434,12 @@ You should then check if the result makes sense with the way you designed your Ceph cluster to maximize `data durability`_, `object distribution`_ and minimize `resource usage`_. -The result should be **rounded up to the nearest power of two.** -Rounding up is optional, but recommended for CRUSH to more evenly balance -the number of objects among placement groups. +The result should always be **rounded up to the nearest power of two**. + +Only a power of two will evenly balance the number of objects among +placement groups. Other values will result in an uneven distribution of +data across your OSDs. Their use should be limited to incrementally +stepping from one power of two to another. As an example, for a cluster with 200 OSDs and a pool size of 3 replicas, you would estimate your number of PGs as follows:: diff --git a/ceph/doc/radosgw/index.rst b/ceph/doc/radosgw/index.rst index 453b56ca7..0523caadb 100644 --- a/ceph/doc/radosgw/index.rst +++ b/ceph/doc/radosgw/index.rst @@ -61,6 +61,7 @@ you may write data with one API and retrieve it with the other. Dynamic bucket index resharding Multi factor authentication Sync Modules + Bucket Notifications Data Layout in RADOS STS Lite Role diff --git a/ceph/doc/radosgw/multisite.rst b/ceph/doc/radosgw/multisite.rst index 431c2da2b..17132371e 100644 --- a/ceph/doc/radosgw/multisite.rst +++ b/ceph/doc/radosgw/multisite.rst @@ -229,7 +229,7 @@ the default zone group first. # radosgw-admin zonegroup remove --rgw-zonegroup=default --rgw-zone=default # radosgw-admin period update --commit - # radosgw-admin zone delete --rgw-zone=default + # radosgw-admin zone rm --rgw-zone=default # radosgw-admin period update --commit # radosgw-admin zonegroup delete --rgw-zonegroup=default # radosgw-admin period update --commit @@ -404,7 +404,7 @@ Delete the default zone if needed. :: - # radosgw-admin zone delete --rgw-zone=default + # radosgw-admin zone rm --rgw-zone=default Finally, delete the default pools in your Ceph storage cluster if needed. @@ -1277,7 +1277,7 @@ Next, delete the zone. Execute the following: :: - # radosgw-admin zone delete --rgw-zone + # radosgw-admin zone rm --rgw-zone Finally, update the period: diff --git a/ceph/doc/radosgw/notifications.rst b/ceph/doc/radosgw/notifications.rst new file mode 100644 index 000000000..152dc03f8 --- /dev/null +++ b/ceph/doc/radosgw/notifications.rst @@ -0,0 +1,291 @@ +==================== +Bucket Notifications +==================== + +.. versionadded:: Nautilus + +.. contents:: + +Bucket notifications provide a mechanism for sending information out of the radosgw when certain events are happening on the bucket. +Currently, notifications could be sent to HTTP and AMQP0.9.1 endpoints. + +Note, that if the events should be stored in Ceph, in addition, or instead of being pushed to an endpoint, +the `PubSub Module`_ should be used instead of the bucket notification mechanism. + +A user can create different topics. A topic entity is defined by its user and its name. A +user can only manage its own topics, and can only associate them with buckets it owns. + +In order to send notifications for events for a specific bucket, a notification entity needs to be created. A +notification can be created on a subset of event types, or for all event types (default). +The notification may also filter out events based on preffix/suffix and/or regular expression matching of the keys. As well as, +on the metadata attributes attached to the object. +There can be multiple notifications for any specific topic, and the same topic could be used for multiple notifications. + +REST API has been defined to provide configuration and control interfaces for the bucket notification +mechanism. This API is similar to the one defined as S3-compatible API of the pubsub sync module. + +.. toctree:: + :maxdepth: 1 + + S3 Bucket Notification Compatibility + +Notificatios Performance Stats +------------------------------ +Same counters are shared between the pubsub sync module and the bucket notification mechanism. + +- ``pubsub_event_triggered``: running counter of events with at lease one topic associated with them +- ``pubsub_event_lost``: running counter of events that had topics associated with them but that were not pushed to any of the endpoints +- ``pubsub_push_ok``: running counter, for all notifications, of events successfully pushed to their endpoint +- ``pubsub_push_fail``: running counter, for all notifications, of events failed to be pushed to their endpoint +- ``pubsub_push_pending``: gauge value of events pushed to an endpoint but not acked or nacked yet + +.. note:: + + ``pubsub_event_triggered`` and ``pubsub_event_lost`` are incremented per event, while: + ``pubsub_push_ok``, ``pubsub_push_fail``, are incremented per push action on each notification. + +Bucket Notification REST API +---------------------------- + +Topics +~~~~~~ + +Create a Topic +`````````````` + +This will create a new topic. The topic should be provided with push endpoint parameters that would be used later +when a notification is created. +Upon successful request, the response will include the topic ARN that could be later used to reference this topic in the notification request. +To update a topic, use the same command used for topic creation, with the topic name of an existing topic and different endpoint values. + +.. tip:: Any notification already associated with the topic needs to be re-created for the topic update to take effect + +:: + + POST + Action=CreateTopic + &Name= + &push-endpoint= + [&Attributes.entry.1.key=amqp-exchange&Attributes.entry.1.value=] + [&Attributes.entry.2.key=amqp-sck-level&Attributes.entry.2.value=ack-level] + &Attributes.entry.3.key=verify-sll&Attributes.entry.3.value=true|false] + +Request parameters: + +- push-endpoint: URI of endpoint to send push notification to + + - URI schema is: ``http[s]|amqp://[:@][:][/]`` + - Same schema is used for HTTP and AMQP endpoints (except amqp-vhost which is specific to AMQP) + - Default values for HTTP/S: no user/password, port 80/443 + - Default values for AMQP: user/password=guest/guest, port 5672, amqp-vhost is "/" + +- verify-ssl: can be used with https endpoints (ignored for other endpoints), indicate whether the server certificate is validated or not ("true" by default) +- amqp-exchange: mandatory parameter for AMQP endpoint. The exchanges must exist and be able to route messages based on topics +- amqp-ack-level: No end2end acking is required, as messages may persist in the broker before delivered into their final destination. 2 ack methods exist: + + - "none" - message is considered "delivered" if sent to broker + - "broker" message is considered "delivered" if acked by broker + +.. note:: + + - The key/value of a specific parameter does not have to reside in the same line, or in any specific order, but must use the same index + - Attribute indexing does not need to be sequntial or start from any specific value + - `AWS Create Topic`_ has detailed explanation on endpoint attributes format. However, in our case different keys and values are used + +The response will have the following format: + +:: + + + + + + + + + + +The topic ARN in the response will have the following format: + +:: + + arn:aws:sns::: + +Get Topic Information +````````````````````` + +Returns information about specific topic. This includes push-endpoint information, if provided. + +:: + + POST + Action=GetTopic&TopicArn= + +Response will have the following format: + +:: + + + + + + + + + + + + + + + + + + + +- User: name of the user that created the topic +- Name: name of the topic +- EndPoinjtAddress: the push-endpoint URL +- EndPointArgs: the push-endpoint args +- EndpointTopic: the topic name that should be sent to the endpoint (mat be different than the above topic name) +- TopicArn: topic ARN + +Delete Topic +```````````` + +:: + + POST + Action=DeleteTopic&TopicArn= + +Delete the specified topic. Note that deleting a deleted topic should result with no-op and not a failure. + +The response will have the following format: + +:: + + + + + + + +List Topics +``````````` + +List all topics that user defined. + +:: + + POST + Action=ListTopics + +Response will have the following format: + +:: + + + + + + + + + + + + + + + + + + + + + +Notifications +~~~~~~~~~~~~~ + +Detailed under: `Bucket Operations`_. + +.. note:: + + - "Abort Multipart Upload" request does not emit a notification + - "Delete Multiple Objects" request does not emit a notification + - Both "Initiate Multipart Upload" and "POST Object" requests will emit an ``s3:ObjectCreated:Post`` notification + + +Events +~~~~~~ + +The events are in JSON format (regardless of the actual endpoint), and share the same structure as the S3-compatible events +pushed or pulled using the pubsub sync module. + +:: + + {"Records":[ + { + "eventVersion":"2.1" + "eventSource":"aws:s3", + "awsRegion":"", + "eventTime":"", + "eventName":"", + "userIdentity":{ + "principalId":"" + }, + "requestParameters":{ + "sourceIPAddress":"" + }, + "responseElements":{ + "x-amz-request-id":"", + "x-amz-id-2":"" + }, + "s3":{ + "s3SchemaVersion":"1.0", + "configurationId":"", + "bucket":{ + "name":"", + "ownerIdentity":{ + "principalId":"" + }, + "arn":"", + "id:"" + }, + "object":{ + "key":"", + "size":"", + "eTag":"", + "versionId":"", + "sequencer": "", + "metadata":"" + } + }, + "eventId":"", + } + ]} + +- awsRegion: zonegroup +- eventTime: timestamp indicating when the event was triggered +- eventName: for list of supported events see: `S3 Notification Compatibility`_ +- userIdentity.principalId: user that triggered the change +- requestParameters.sourceIPAddress: not supported +- responseElements.x-amz-request-id: request ID of the original change +- responseElements.x_amz_id_2: RGW on which the change was made +- s3.configurationId: notification ID that created the event +- s3.bucket.name: name of the bucket +- s3.bucket.ownerIdentity.principalId: owner of the bucket +- s3.bucket.arn: ARN of the bucket +- s3.bucket.id: Id of the bucket (an extension to the S3 notification API) +- s3.object.key: object key +- s3.object.size: object size +- s3.object.eTag: object etag +- s3.object.version: object version in case of versioned bucket +- s3.object.sequencer: monotonically increasing identifier of the change per object (hexadecimal format) +- s3.object.metadata: any metadata set on the object sent as: ``x-amz-meta-`` (an extension to the S3 notification API) +- s3.eventId: not supported (an extension to the S3 notification API) + +.. _PubSub Module : ../pubsub-module +.. _S3 Notification Compatibility: ../s3-notification-compatibility +.. _AWS Create Topic: https://docs.aws.amazon.com/sns/latest/api/API_CreateTopic.html +.. _Bucket Operations: ../s3/bucketops diff --git a/ceph/doc/radosgw/pubsub-module.rst b/ceph/doc/radosgw/pubsub-module.rst index 796b18d36..11dbd7e1e 100644 --- a/ceph/doc/radosgw/pubsub-module.rst +++ b/ceph/doc/radosgw/pubsub-module.rst @@ -1,49 +1,73 @@ -========================= +================== PubSub Sync Module -========================= +================== .. versionadded:: Nautilus +.. contents:: + This sync module provides a publish and subscribe mechanism for the object store modification -events. Events are published into defined topics. Topics can be subscribed to, and events +events. Events are published into predefined topics. Topics can be subscribed to, and events can be pulled from them. Events need to be acked. Also, events will expire and disappear -after a period of time. A push notification mechanism exists too, currently supporting HTTP and -AMQP0.9.1 endpoints. +after a period of time. + +A push notification mechanism exists too, currently supporting HTTP and +AMQP0.9.1 endpoints, on top of storing the events in Ceph. If events should only be pushed to an endpoint +and do not need to be stored in Ceph, the `Bucket Notification`_ mechanism should be used instead of pubsub sync module. A user can create different topics. A topic entity is defined by its user and its name. A user can only manage its own topics, and can only subscribe to events published by buckets it owns. -In order to publish events for specific bucket a notification needs to be created. A -notification can be created only on subset of event types, or for all event types (default). -There can be multiple notifications for any specific topic. +In order to publish events for specific bucket a notification entity needs to be created. A +notification can be created on a subset of event types, or for all event types (default). +There can be multiple notifications for any specific topic, and the same topic could be used for multiple notifications. A subscription to a topic can also be defined. There can be multiple subscriptions for any specific topic. -A new REST api has been defined to provide configuration and control interfaces for the pubsub -mechanisms. +REST API has been defined to provide configuration and control interfaces for the pubsub +mechanisms. This API has two flavors, one is S3-compatible and one is not. The two flavors can be used +together, although it is recommended to use the S3-compatible one. +The S3-compatible API is similar to the one used in the bucket notification mechanism. -Events are stored as rgw objects in a special bucket, under a special user. Events cannot -be accessed directly, but need to be pulled and acked using the new REST api. +Events are stored as RGW objects in a special bucket, under a special user. Events cannot +be accessed directly, but need to be pulled and acked using the new REST API. +.. toctree:: + :maxdepth: 1 + S3 Bucket Notification Compatibility + +PubSub Zone Configuration +------------------------- -PubSub Tier Type Configuration -------------------------------------- +The pubsub sync module requires the creation of a new zone in a `Multisite`_ environment. +First, a master zone must exist, then a secondary zone should be created. +In the creation of the secondary zone, its tier type must be set to ``pubsub``: :: - { - "tenant": , # default: - "uid": , # default: "pubsub" - "data_bucket_prefix": # default: "pubsub-" - "data_oid_prefix": # + # radosgw-admin zone create --rgw-zonegroup={zone-group-name} \ + --rgw-zone={zone-name} \ + --endpoints={http://fqdn}[,{http://fqdn}] \ + --sync-from-all=0 \ + --sync-from={master-zone-name} \ + --tier-type=pubsub - "events_retention_days": # default: 7 - } +PubSub Zone Configuration Parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + { + "tenant": , # default: + "uid": , # default: "pubsub" + "data_bucket_prefix": # default: "pubsub-" + "data_oid_prefix": # + "events_retention_days": # default: 7 + } * ``tenant`` (string) @@ -65,82 +89,129 @@ The oid prefix for the stored events. How many days to keep events that weren't acked. -How to Configure -~~~~~~~~~~~~~~~~ - -See `Multisite Configuration`_ for how to multisite config instructions. The pubsub sync module requires a creation of a new zone. The zone -tier type needs to be defined as ``pubsub``: - -:: - - # radosgw-admin zone create --rgw-zonegroup={zone-group-name} \ - --rgw-zone={zone-name} \ - --endpoints={http://fqdn}[,{http://fqdn}] - --tier-type=pubsub - +Configuring Parameters via CLI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The tier configuration can be then done using the following command +The tier configuration could be set using the following command: :: - # radosgw-admin zone modify --rgw-zonegroup={zone-group-name} \ + # radosgw-admin zone modify --rgw-zonegroup={zone-group-name} \ --rgw-zone={zone-name} \ --tier-config={key}={val}[,{key}={val}] -The ``key`` in the configuration specifies the config variable that needs to be updated, and -the ``val`` specifies its new value. Nested values can be accessed using period. For example: +Where the ``key`` in the configuration specifies the configuration variable that needs to be updated (from the list above), and +the ``val`` specifies its new value. For example, setting the pubsub control user ``uid`` to ``user_ps``: :: - # radosgw-admin zone modify --rgw-zonegroup={zone-group-name} \ + # radosgw-admin zone modify --rgw-zonegroup={zone-group-name} \ --rgw-zone={zone-name} \ --tier-config=uid=pubsub - A configuration field can be removed by using ``--tier-config-rm={key}``. PubSub Performance Stats ------------------------- -- **pubsub_event_triggered**: running counter of events with at lease one pubsub topic associated with them -- **pubsub_event_lost**: running counter of events that had pubsub topics and subscriptions associated with them but that were not stored or pushed to any of the subscriptions -- **pubsub_store_ok**: running counter, for all subscriptions, of stored pubsub events -- **pubsub_store_fail**: running counter, for all subscriptions, of pubsub events that needed to be stored but failed -- **pubsub_push_ok**: running counter, for all subscriptions, of pubsub events successfully pushed to their endpoint -- **pubsub_push_fail**: running counter, for all subscriptions, of pubsub events failed to be pushed to their endpoint -- **pubsub_push_pending**: gauge value of pubsub events pushed to a endpoined but not acked or nacked yet +Same counters are shared between the pubsub sync module and the notification mechanism. + +- ``pubsub_event_triggered``: running counter of events with at lease one topic associated with them +- ``pubsub_event_lost``: running counter of events that had topics and subscriptions associated with them but that were not stored or pushed to any of the subscriptions +- ``pubsub_store_ok``: running counter, for all subscriptions, of stored events +- ``pubsub_store_fail``: running counter, for all subscriptions, of events failed to be stored +- ``pubsub_push_ok``: running counter, for all subscriptions, of events successfully pushed to their endpoint +- ``pubsub_push_fail``: running counter, for all subscriptions, of events failed to be pushed to their endpoint +- ``pubsub_push_pending``: gauge value of events pushed to an endpoint but not acked or nacked yet -Note that **pubsub_event_triggered** and **pubsub_event_lost** are incremented per event, while: **pubsub_store_ok**, **pubsub_store_fail**, **pubsub_push_ok**, **pubsub_push_fail**, are incremented per store/push action on each subscriptions. +.. note:: + + ``pubsub_event_triggered`` and ``pubsub_event_lost`` are incremented per event, while: + ``pubsub_store_ok``, ``pubsub_store_fail``, ``pubsub_push_ok``, ``pubsub_push_fail``, are incremented per store/push action on each subscriptions. PubSub REST API -------------------------- +--------------- +.. tip:: PubSub REST calls, and only them, should be sent to an RGW which belong to a PubSub zone Topics ~~~~~~ - + Create a Topic -`````````````````````````` +`````````````` + +This will create a new topic. Topic creation is needed both for both flavors of the API. +Optionally the topic could be provided with push endpoint parameters that would be used later +when an S3-compatible notification is created. +Upon successful request, the response will include the topic ARN that could be later used to reference this topic in an S3-compatible notification request. +To update a topic, use the same command used for topic creation, with the topic name of an existing topic and different endpoint values. -This will create a new topic. +.. tip:: Any S3-compatible notification already associated with the topic needs to be re-created for the topic update to take effect :: - PUT /topics/ + PUT /topics/[?push-endpoint=[&amqp-exchange=][&amqp-ack-level=][&verify-ssl=true|false]] + +Request parameters: + +- push-endpoint: URI of endpoint to send push notification to + + - URI schema is: ``http[s]|amqp://[:@][:][/]`` + - Same schema is used for HTTP and AMQP endpoints (except amqp-vhost which is specific to AMQP) + - Default values for HTTP/S: no user/password, port 80/443 + - Default values for AMQP: user/password=guest/guest, port 5672, amqp-vhost is "/" + +- verify-ssl: can be used with https endpoints (ignored for other endpoints), indicate whether the server certificate is validated or not ("true" by default) +- amqp-exchange: mandatory parameter for AMQP endpoint. The exchanges must exist and be able to route messages based on topics +- amqp-ack-level: No end2end acking is required, as messages may persist in the broker before delivered into their final destination. 2 ack methods exist: + + - "none" - message is considered "delivered" if sent to broker + - "broker" message is considered "delivered" if acked by broker + +The topic ARN in the response will have the following format: +:: + + arn:aws:sns::: Get Topic Information -```````````````````````````````` +````````````````````` -Returns information about specific topic. This includes subscriptions to that topic. +Returns information about specific topic. This includes subscriptions to that topic, and push-endpoint information, if provided. :: GET /topics/ +Response will have the following format (JSON): + +:: + { + "topic":{ + "user":"", + "name":"", + "dest":{ + "bucket_name":"", + "oid_prefix":"", + "push_endpoint":"", + "push_endpoint_args":"" + }, + "arn":"" + }, + "subs":[] + } + +- topic.user: name of the user that created the topic +- name: name of the topic +- dest.bucket_name: not used +- dest.oid_prefix: not used +- dest.push_endpoint: in case of S3-compliant notifications, this value will be used as the push-endpoint URL +- dest.push_endpoint_args: in case of S3-compliant notifications, this value will be used as the push-endpoint args +- topic.arn: topic ARN +- subs: list of subscriptions associated with this topic Delete Topic -```````````````````````````````````` +```````````` :: @@ -149,21 +220,35 @@ Delete Topic Delete the specified topic. List Topics -```````````````````````````````````` +``````````` List all topics that user defined. :: GET /topics + +S3-Compliant Notifications +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Detailed under: `Bucket Operations`_. +.. note:: + + - Notification creation will also create a subscription for pushing/pulling events + - The generated subscription's name will have the same as the notification Id, and could be used later to fetch and ack events with the subscription API. + - Notification deletion will deletes all generated subscriptions + - In case that bucket deletion implicitly deletes the notification, + the associated subscription will not be deleted automatically (any events of the deleted bucket could still be access), + and will have to be deleted explicitly with the subscription deletion API + - Filtering based on metadata (which is an extension to S3) is not supported, and such rules will be ignored -Notifications -~~~~~~~~~~~~~ + +Non S3-Compliant Notifications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create a Notification -`````````````````````````` +````````````````````` This will create a publisher for a specific bucket into a topic. @@ -171,15 +256,13 @@ This will create a publisher for a specific bucket into a topic. PUT /notifications/bucket/?topic=[&events=[,]] +Request parameters: -Request Params: - - topic-name: name of topic - - event: event type (string), one of: OBJECT_CREATE, OBJECT_DELETE - - - +- topic-name: name of topic +- event: event type (string), one of: ``OBJECT_CREATE``, ``OBJECT_DELETE``, ``DELETE_MARKER_CREATE`` + Delete Notification Information -```````````````````````````````` +``````````````````````````````` Delete publisher from a specific bucket into a specific topic. @@ -187,13 +270,47 @@ Delete publisher from a specific bucket into a specific topic. DELETE /notifications/bucket/?topic= -Request Params: - - topic-name: name of topic +Request parameters: + +- topic-name: name of topic + +.. note:: When the bucket is deleted, any notification defined on it is also deleted + +List Notifications +`````````````````` + +List all topics with associated events defined on a bucket. + +:: + + GET /notifications/bucket/ + +Response will have the following format (JSON): + +:: + {"topics":[ + { + "topic":{ + "user":"", + "name":"", + "dest":{ + "bucket_name":"", + "oid_prefix":"", + "push_endpoint":"", + "push_endpoint_args":"" + } + "arn":"" + }, + "events":[] + } + ]} +Subscriptions +~~~~~~~~~~~~~ -Create Subscription -```````````````````````````````````` +Create a Subscription +````````````````````` Creates a new subscription. @@ -201,61 +318,192 @@ Creates a new subscription. PUT /subscriptions/?topic=[&push-endpoint=[&amqp-exchange=][&amqp-ack-level=][&verify-ssl=true|false]] -Request Params: +Request parameters: - - topic-name: name of topic - - push-endpoint: URI of endpoint to send push notification to +- topic-name: name of topic +- push-endpoint: URI of endpoint to send push notification to - - URI schema is: ``http|amqp://[:@][:][/]`` - - Same schema is used for HTTP and AMQP endpoints (except amqp-vhost which is specific to AMQP) - - Default values for HTTP: no user/password, port 80 - - Default values for AMQP: user/password=guest/guest, port 5672, amqp-vhost is "/" + - URI schema is: ``http[s]|amqp://[:@][:][/]`` + - Same schema is used for HTTP and AMQP endpoints (except amqp-vhost which is specific to AMQP) + - Default values for HTTP/S: no user/password, port 80/443 + - Default values for AMQP: user/password=guest/guest, port 5672, amqp-vhost is "/" - - verify-ssl: can be used with https endpoints (ignored for other endpoints), indicate whether the server certificate is validated or not ("true" by default) - - amqp-exchange: mandatory parameter for AMQP endpoint. The exchanges must exist and be able to route messages based on topics - - amqp-ack-level: 2 ack levels exist: "none" - message is considered "delivered" if sent to broker; - "broker" message is considered "delivered" if acked by broker. - No end2end acking is required, as messages may persist in the broker before delivered into their final destination +- verify-ssl: can be used with https endpoints (ignored for other endpoints), indicate whether the server certificate is validated or not ("true" by default) +- amqp-exchange: mandatory parameter for AMQP endpoint. The exchanges must exist and be able to route messages based on topics +- amqp-ack-level: No end2end acking is required, as messages may persist in the broker before delivered into their final destination. 2 ack methods exist: -Get Subscription Info -```````````````````````````````````` + - "none": message is considered "delivered" if sent to broker + - "broker": message is considered "delivered" if acked by broker -Returns info about specific subscription +Get Subscription Information +```````````````````````````` + +Returns information about specific subscription. :: GET /subscriptions/ +Response will have the following format (JSON): + +:: + + { + "user":"", + "name":"", + "topic":"", + "dest":{ + "bucket_name":"", + "oid_prefix":"", + "push_endpoint":"", + "push_endpoint_args":"" + } + "s3_id":"" + } + +- user: name of the user that created the subscription +- name: name of the subscription +- topic: name of the topic the subscription is associated with Delete Subscription -````````````````````````````````` +``````````````````` -Removes a subscription +Removes a subscription. :: DELETE /subscriptions/ - Events ~~~~~~ Pull Events -````````````````````````````````` +``````````` -Pull events sent to a specific subscription +Pull events sent to a specific subscription. :: GET /subscriptions/?events[&max-entries=][&marker=] -Request Params: - - marker: pagination marker for list of events, if not specified will start from the oldest - - max-entries: max number of events to return +Request parameters: + +- marker: pagination marker for list of events, if not specified will start from the oldest +- max-entries: max number of events to return + +The response will hold information on the current marker and whether there are more events not fetched: + +:: + + {"next_marker":"","is_truncated":"",...} + + +The actual content of the response is depended with how the subscription was created. +In case that the subscription was created via an S3-compatible notification, +the events will have an S3-compatible record format (JSON): + +:: + + {"Records":[ + { + "eventVersion":"2.1" + "eventSource":"aws:s3", + "awsRegion":"", + "eventTime":"", + "eventName":"", + "userIdentity":{ + "principalId":"" + }, + "requestParameters":{ + "sourceIPAddress":"" + }, + "responseElements":{ + "x-amz-request-id":"", + "x-amz-id-2":"" + }, + "s3":{ + "s3SchemaVersion":"1.0", + "configurationId":"", + "bucket":{ + "name":"", + "ownerIdentity":{ + "principalId":"" + }, + "arn":"", + "id":"" + }, + "object":{ + "key":"", + "size":"0", + "eTag":"", + "versionId":"", + "sequencer":"", + "metadata":"" + } + }, + "eventId":"", + } + ]} + +- awsRegion: zonegroup +- eventTime: timestamp indicating when the event was triggered +- eventName: either ``s3:ObjectCreated:``, or ``s3:ObjectRemoved:`` +- userIdentity: not supported +- requestParameters: not supported +- responseElements: not supported +- s3.configurationId: notification ID that created the subscription for the event +- s3.eventId: unique ID of the event, that could be used for acking (an extension to the S3 notification API) +- s3.bucket.name: name of the bucket +- s3.bucket.ownerIdentity.principalId: owner of the bucket +- s3.bucket.arn: ARN of the bucket +- s3.bucket.id: Id of the bucket (an extension to the S3 notification API) +- s3.object.key: object key +- s3.object.size: not supported +- s3.object.eTag: object etag +- s3.object.version: object version in case of versioned bucket +- s3.object.sequencer: monotonically increasing identifier of the change per object (hexadecimal format) +- s3.object.metadata: not supported (an extension to the S3 notification API) +- s3.eventId: unique ID of the event, that could be used for acking (an extension to the S3 notification API) + +In case that the subscription was not created via a non S3-compatible notification, +the events will have the following event format (JSON): + +:: + {"events":[ + { + "id":"", + "event":"", + "timestamp":"", + "info":{ + "attrs":{ + "mtime":"" + }, + "bucket":{ + "bucket_id":"", + "name":"", + "tenant":"" + }, + "key":{ + "instance":"", + "name":"" + } + } + } + ]} + +- id: unique ID of the event, that could be used for acking +- event: one of: ``OBJECT_CREATE``, ``OBJECT_DELETE``, ``DELETE_MARKER_CREATE`` +- timestamp: timestamp indicating when the event was sent +- info.attrs.mtime: timestamp indicating when the event was triggered +- info.bucket.bucket_id: id of the bucket +- info.bucket.name: name of the bucket +- info.bucket.tenant: tenant the bucket belongs to +- info.key.instance: object version in case of versioned bucket +- info.key.name: object key Ack Event -````````````````````````````````` +````````` Ack event so that it can be removed from the subscription history. @@ -263,8 +511,10 @@ Ack event so that it can be removed from the subscription history. POST /subscriptions/?ack&event-id= +Request parameters: -Request Params: - - event-id: id of event to be acked +- event-id: id of event to be acked -.. _Multisite Configuration: ./multisite.rst +.. _Multisite : ../multisite +.. _Bucket Notification : ../notifications +.. _Bucket Operations: ../s3/bucketops diff --git a/ceph/doc/radosgw/s3-notification-compatibility.rst b/ceph/doc/radosgw/s3-notification-compatibility.rst new file mode 100644 index 000000000..6cc6ac028 --- /dev/null +++ b/ceph/doc/radosgw/s3-notification-compatibility.rst @@ -0,0 +1,122 @@ +===================================== +S3 Bucket Notifications Compatibility +===================================== + +Ceph's `Bucket Notifications`_ and `PubSub Module`_ APIs follow `AWS S3 Bucket Notifications API`_. However, some differences exist, as listed below. + + +.. note:: + + Compatibility is different depending on which of the above mechanism is used + +Supported Destination +--------------------- + +AWS supports: **SNS**, **SQS** and **Lambda** as possible destinations (AWS internal destinations). +Currently, we support: **HTTP/S** and **AMQP**. And also support pulling and acking of events stored in Ceph (as an intenal destination). + +We are using the **SNS** ARNs to represent the **HTTP/S** and **AMQP** destinations. + +Notification Configuration XML +------------------------------ + +Following tags (and the tags inside them) are not supported: + ++-----------------------------------+----------------------------------------------+ +| Tag | Remaks | ++===================================+==============================================+ +| ```` | not needed, we treat all destinations as SNS | ++-----------------------------------+----------------------------------------------+ +| ```` | not needed, we treat all destinations as SNS | ++-----------------------------------+----------------------------------------------+ + +REST API Extension +------------------ + +Ceph's bucket notification API has the following extensions: + +- Deletion of a specific notification, or all notifications on a bucket, using the ``DELETE`` verb + + - In S3, all notifications are deleted when the bucket is deleted, or when an empty notification is set on the bucket + +- Getting the information on a specific notification (when more than one exists on a bucket) + + - In S3, it is only possible to fetch all notifications on a bucket + +- In addition to filtering based on prefix/suffix of object keys we support: + + - Filtering based on regular expression matching + + - Filtering based on metadata attributes attached to the object + +- Filtering overlapping is allowed, so that same event could be sent as different notification + + +Unsupported Fields in the Event Record +-------------------------------------- + +The records sent for bucket notification follow format described in: `Event Message Structure`_. +However, the following fields may be sent empty, under the different deployment options (Notification/PubSub): + ++----------------------------------------+--------------+---------------+------------------------------------------------------------+ +| Field | Notification | PubSub | Description | ++========================================+==============+===============+============================================================+ +| ``userIdentity.principalId`` | Supported | Not Supported | The identity of the user that triggered the event | ++----------------------------------------+--------------+---------------+------------------------------------------------------------+ +| ``requestParameters.sourceIPAddress`` | Not Supported | The IP address of the client that triggered the event | ++----------------------------------------+--------------+---------------+------------------------------------------------------------+ +| ``requestParameters.x-amz-request-id`` | Supported | Not Supported | The request id that triggered the event | ++----------------------------------------+--------------+---------------+------------------------------------------------------------+ +| ``requestParameters.x-amz-id-2`` | Supported | Not Supported | The IP address of the RGW on which the event was triggered | ++----------------------------------------+--------------+---------------+------------------------------------------------------------+ +| ``s3.object.size`` | Supported | Not Supported | The size of the object | ++----------------------------------------+--------------+---------------+------------------------------------------------------------+ + +Event Types +----------- + ++----------------------------------------------+-----------------+-------------------------------------------+ +| Event | Notification | PubSub | ++==============================================+=================+===========================================+ +| ``s3:ObjectCreated:*`` | Supported | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectCreated:Put`` | Supported | Supported at ``s3:ObjectCreated:*`` level | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectCreated:Post`` | Supported | Not Supported | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectCreated:Copy`` | Supported | Supported at ``s3:ObjectCreated:*`` level | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectCreated:CompleteMultipartUpload`` | Supported | Supported at ``s3:ObjectCreated:*`` level | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectRemoved:*`` | Supported | Supported only the specific events below | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectRemoved:Delete`` | Supported | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectRemoved:DeleteMarkerCreated`` | Supported | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectRestore:Post`` | Not applicable to Ceph | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ObjectRestore:Complete`` | Not applicable to Ceph | ++----------------------------------------------+-----------------+-------------------------------------------+ +| ``s3:ReducedRedundancyLostObject`` | Not applicable to Ceph | ++----------------------------------------------+-----------------+-------------------------------------------+ + +Topic Configuration +------------------- +In the case of bucket notifications, the topics management API will be derived from `AWS Simple Notification Service API`_. +Note that most of the API is not applicable to Ceph, and only the following actions are implemented: + + - ``CreateTopic`` + - ``DeleteTopic`` + - ``ListTopics`` + +We also extend it by: + + - ``GetTopic`` - allowing for fetching a specific topic, instead of all user topics + - In ``CreateTopic`` we allow setting endpoint attributes + +.. _AWS Simple Notification Service API: https://docs.aws.amazon.com/sns/latest/api/API_Operations.html +.. _AWS S3 Bucket Notifications API: https://docs.aws.amazon.com/AmazonS3/latest/dev/NotificationHowTo.html +.. _Event Message Structure: https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html +.. _`PubSub Module`: ../pubsub-module +.. _`Bucket Notifications`: ../notifications diff --git a/ceph/doc/radosgw/s3.rst b/ceph/doc/radosgw/s3.rst index cf6eaba7f..36cc898ac 100644 --- a/ceph/doc/radosgw/s3.rst +++ b/ceph/doc/radosgw/s3.rst @@ -48,7 +48,7 @@ The following table describes the support status for current Amazon S3 functiona +---------------------------------+-----------------+----------------------------------------+ | **Bucket Location** | Supported | | +---------------------------------+-----------------+----------------------------------------+ -| **Bucket Notification** | Not Supported | | +| **Bucket Notification** | Supported | See `S3 Notification Compatibility`_ | +---------------------------------+-----------------+----------------------------------------+ | **Bucket Object Versions** | Supported | | +---------------------------------+-----------------+----------------------------------------+ @@ -98,3 +98,4 @@ The following common request header fields are not supported: +----------------------------+------------+ .. _Amazon S3 API: http://docs.aws.amazon.com/AmazonS3/latest/API/APIRest.html +.. _S3 Notification Compatibility: ../s3-notification-compatibility diff --git a/ceph/doc/radosgw/s3/bucketops.rst b/ceph/doc/radosgw/s3/bucketops.rst index 01e5eb4bf..7c94a835d 100644 --- a/ceph/doc/radosgw/s3/bucketops.rst +++ b/ceph/doc/radosgw/s3/bucketops.rst @@ -29,11 +29,14 @@ Syntax Parameters ~~~~~~~~~~ + +---------------+----------------------+-----------------------------------------------------------------------------+------------+ | Name | Description | Valid Values | Required | +===============+======================+=============================================================================+============+ | ``x-amz-acl`` | Canned ACLs. | ``private``, ``public-read``, ``public-read-write``, ``authenticated-read`` | No | +---------------+----------------------+-----------------------------------------------------------------------------+------------+ +| ``x-amz-bucket-object-lock-enabled`` | Enable object lock on bucket. | ``true``, ``false`` | No | ++--------------------------------------+-------------------------------+---------------------------------------------+------------+ Request Entities ~~~~~~~~~~~~~~~~ @@ -386,3 +389,294 @@ REQUEST ENTITIES +-----------------------------+-----------+---------------------------------------------------------------------------+ | ``Status`` | String | Sets the versioning state of the bucket. Valid Values: Suspended/Enabled | +-----------------------------+-----------+---------------------------------------------------------------------------+ + +PUT BUCKET OBJECT LOCK +-------------------------------- + +Places an Object Lock configuration on the specified bucket. The rule specified in the Object Lock configuration will be +applied by default to every new object placed in the specified bucket. + +Syntax +~~~~~~ + +:: + + PUT /{bucket}?object-lock HTTP/1.1 + +Request Entities +~~~~~~~~~~~~~~~~ + ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| Name | Type | Description | Required | ++=============================+=============+========================================================================================+==========+ +| ``ObjectLockConfiguration`` | Container | A container for the request. | Yes | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``ObjectLockEnabled`` | String | Indicates whether this bucket has an Object Lock configuration enabled. | Yes | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Rule`` | Container | The Object Lock rule in place for the specified bucket. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``DefaultRetention`` | Container | The default retention period applied to new objects placed in the specified bucket. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Mode`` | String | The default Object Lock retention mode. Valid Values: GOVERNANCE/COMPLIANCE | Yes | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Days`` | Integer | The number of days specified for the default retention period. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Years`` | Integer | The number of years specified for the default retention period. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ + +HTTP Response +~~~~~~~~~~~~~ + +If the bucket object lock is not enabled when creating the bucket, the operation will fail. + ++---------------+-----------------------+----------------------------------------------------------+ +| HTTP Status | Status Code | Description | ++===============+=======================+==========================================================+ +| ``400`` | MalformedXML | The XML is not well-formed | ++---------------+-----------------------+----------------------------------------------------------+ +| ``409`` | InvalidBucketState | The bucket object lock is not enabled | ++---------------+-----------------------+----------------------------------------------------------+ + +GET BUCKET OBJECT LOCK +-------------------------------- + +Gets the Object Lock configuration for a bucket. The rule specified in the Object Lock configuration will be applied by +default to every new object placed in the specified bucket. + +Syntax +~~~~~~ + +:: + + GET /{bucket}?object-lock HTTP/1.1 + + +Response Entities +~~~~~~~~~~~~~~~~~ + ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| Name | Type | Description | Required | ++=============================+=============+========================================================================================+==========+ +| ``ObjectLockConfiguration`` | Container | A container for the request. | Yes | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``ObjectLockEnabled`` | String | Indicates whether this bucket has an Object Lock configuration enabled. | Yes | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Rule`` | Container | The Object Lock rule in place for the specified bucket. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``DefaultRetention`` | Container | The default retention period applied to new objects placed in the specified bucket. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Mode`` | String | The default Object Lock retention mode. Valid Values: GOVERNANCE/COMPLIANCE | Yes | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Days`` | Integer | The number of days specified for the default retention period. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ +| ``Years`` | Integer | The number of years specified for the default retention period. | No | ++-----------------------------+-------------+----------------------------------------------------------------------------------------+----------+ + +Create Notification +------------------- + +Create a publisher for a specific bucket into a topic. + +Syntax +~~~~~~ + +:: + + PUT /?notification HTTP/1.1 + + +Request Entities +~~~~~~~~~~~~~~~~ + +Parameters are XML encoded in the body of the request, in the following format: + +:: + + + + + + + + + + + + + + + + + + + + + + + ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| Name | Type | Description | Required | ++===============================+===========+======================================================================================+==========+ +| ``NotificationConfiguration`` | Container | Holding list of ``TopicConfiguration`` entities | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``TopicConfiguration`` | Container | Holding ``Id``, ``Topic`` and list of ``Event`` entities | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Id`` | String | Name of the notification | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Topic`` | String | Topic ARN. Topic must be created beforehand | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Event`` | String | List of supported events see: `S3 Notification Compatibility`_. Multiple ``Event`` | No | +| | | entities can be used. If omitted, all events are handled | | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Filter`` | Container | Holding ``S3Key`` and ``S3Metadata`` entities | No | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``S3Key`` | Container | Holding a list of ``FilterRule`` entities, for filtering based on object key. | No | +| | | At most, 3 entities may be in the list, with ``Name`` be ``prefix``, ``suffix`` or | | +| | | ``regex``. All filter rules in the list must match for the filter to match. | | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``S3Metadata`` | Container | Holding a list of ``FilterRule`` entities, for filtering based on object metadata. | No | +| | | All filter rules in the list must match the ones defined on the object. The object, | | +| | | have other metadata entitied not listed in the filter. | | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``S3Key.FilterRule`` | Container | Holding ``Name`` and ``Value`` entities. ``Name`` would be: ``prefix``, ``suffix`` | Yes | +| | | or ``regex``. The ``Value`` would hold the key prefix, key suffix or a regular | | +| | | expression for matching the key, accordingly. | | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``S3Metadata.FilterRule`` | Container | Holding ``Name`` and ``Value`` entities. ``Name`` would be the name of the metadata | Yes | +| | | attribute (e.g. ``x-amz-meta-xxx``). The ``Value`` would be the expected value for | | +| | | this attribute | | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ + + +HTTP Response +~~~~~~~~~~~~~ + ++---------------+-----------------------+----------------------------------------------------------+ +| HTTP Status | Status Code | Description | ++===============+=======================+==========================================================+ +| ``400`` | MalformedXML | The XML is not well-formed | ++---------------+-----------------------+----------------------------------------------------------+ +| ``400`` | InvalidArgument | Missing Id; Missing/Invalid Topic ARN; Invalid Event | ++---------------+-----------------------+----------------------------------------------------------+ +| ``404`` | NoSuchBucket | The bucket does not exist | ++---------------+-----------------------+----------------------------------------------------------+ +| ``404`` | NoSuchKey | The topic does not exist | ++---------------+-----------------------+----------------------------------------------------------+ + + +Delete Notification +------------------- + +Delete a specific, or all, notifications from a bucket. + +.. note:: + + - Notification deletion is an extension to the S3 notification API + - When the bucket is deleted, any notification defined on it is also deleted + - Deleting an unkown notification (e.g. double delete) is not considered an error + +Syntax +~~~~~~ + +:: + + DELETE /bucket?notification[=] HTTP/1.1 + + +Parameters +~~~~~~~~~~ + ++------------------------+-----------+----------------------------------------------------------------------------------------+ +| Name | Type | Description | ++========================+===========+========================================================================================+ +| ``notification-id`` | String | Name of the notification. If not provided, all notifications on the bucket are deleted | ++------------------------+-----------+----------------------------------------------------------------------------------------+ + +HTTP Response +~~~~~~~~~~~~~ + ++---------------+-----------------------+----------------------------------------------------------+ +| HTTP Status | Status Code | Description | ++===============+=======================+==========================================================+ +| ``404`` | NoSuchBucket | The bucket does not exist | ++---------------+-----------------------+----------------------------------------------------------+ + +Get/List Notification +--------------------- + +Get a specific notification, or list all notifications configured on a bucket. + +Syntax +~~~~~~ + +:: + + GET /bucket?notification[=] HTTP/1.1 + + +Parameters +~~~~~~~~~~ + ++------------------------+-----------+----------------------------------------------------------------------------------------+ +| Name | Type | Description | ++========================+===========+========================================================================================+ +| ``notification-id`` | String | Name of the notification. If not provided, all notifications on the bucket are listed | ++------------------------+-----------+----------------------------------------------------------------------------------------+ + +Response Entities +~~~~~~~~~~~~~~~~~ + +Response is XML encoded in the body of the request, in the following format: + +:: + + + + + + + + + + + + + + + + + + + + + + + ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| Name | Type | Description | Required | ++===============================+===========+======================================================================================+==========+ +| ``NotificationConfiguration`` | Container | Holding list of ``TopicConfiguration`` entities | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``TopicConfiguration`` | Container | Holding ``Id``, ``Topic`` and list of ``Event`` entities | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Id`` | String | Name of the notification | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Topic`` | String | Topic ARN | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Event`` | String | Handled event. Multiple ``Event`` entities may exist | Yes | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ +| ``Filter`` | Container | Holding the filters configured for this notification | No | ++-------------------------------+-----------+--------------------------------------------------------------------------------------+----------+ + +HTTP Response +~~~~~~~~~~~~~ + ++---------------+-----------------------+----------------------------------------------------------+ +| HTTP Status | Status Code | Description | ++===============+=======================+==========================================================+ +| ``404`` | NoSuchBucket | The bucket does not exist | ++---------------+-----------------------+----------------------------------------------------------+ +| ``404`` | NoSuchKey | The notification does not exist (if provided) | ++---------------+-----------------------+----------------------------------------------------------+ + +.. _S3 Notification Compatibility: ../s3-notification-compatibility diff --git a/ceph/doc/radosgw/s3/objectops.rst b/ceph/doc/radosgw/s3/objectops.rst index 37a65c130..2ac52607f 100644 --- a/ceph/doc/radosgw/s3/objectops.rst +++ b/ceph/doc/radosgw/s3/objectops.rst @@ -403,6 +403,7 @@ Syntax DELETE /{bucket}/{object}?uploadId= HTTP/1.1 + Append Object ------------- Append data to an object. You must have write permissions on the bucket to perform this operation. @@ -410,7 +411,7 @@ It is used to upload files in appending mode. The type of the objects created by operation is Appendable Object, and the type of the objects uploaded with the Put Object operation is Normal Object. **Append Object can't be used if bucket versioning is enabled or suspended.** **Synced object will become normal in multisite, but you can still append to the original object.** - +**Compression and encryption features are disabled for Appendable objects.** Syntax ~~~~~~ @@ -458,3 +459,100 @@ The following HTTP response may be returned: | **409** | InvalidBucketstate | Bucket versioning is enabled or suspended | +---------------+----------------------------+---------------------------------------------------+ + +Put Object Retention +-------------------- +Places an Object Retention configuration on an object. + +Syntax +~~~~~~ + +:: + + PUT /{bucket}/{object}?retention&versionId= HTTP/1.1 + +Request Entities +~~~~~~~~~~~~~~~~ + ++---------------------+-------------+-------------------------------------------------------------------------------+------------+ +| Name | Type | Description | Required | ++=====================+=============+===============================================================================+============+ +| ``Retention`` | Container | A container for the request. | Yes | ++---------------------+-------------+-------------------------------------------------------------------------------+------------+ +| ``Mode`` | String | Retention mode for the specified object. Valid Values: GOVERNANCE/COMPLIANCE | Yes | ++---------------------+-------------+--------------------------------------------------------------------------------------------+ +| ``RetainUntilDate`` | Timestamp | Retention date. Format: 2020-01-05T00:00:00.000Z | Yes | ++---------------------+-------------+--------------------------------------------------------------------------------------------+ + + +Get Object Retention +-------------------- +Gets an Object Retention configuration on an object. + + +Syntax +~~~~~~ + +:: + + GET /{bucket}/{object}?retention&versionId= HTTP/1.1 + +Response Entities +~~~~~~~~~~~~~~~~~ + ++---------------------+-------------+-------------------------------------------------------------------------------+------------+ +| Name | Type | Description | Required | ++=====================+=============+===============================================================================+============+ +| ``Retention`` | Container | A container for the request. | Yes | ++---------------------+-------------+-------------------------------------------------------------------------------+------------+ +| ``Mode`` | String | Retention mode for the specified object. Valid Values: GOVERNANCE/COMPLIANCE | Yes | ++---------------------+-------------+--------------------------------------------------------------------------------------------+ +| ``RetainUntilDate`` | Timestamp | Retention date. Format: 2020-01-05T00:00:00.000Z | Yes | ++---------------------+-------------+--------------------------------------------------------------------------------------------+ + + +Put Object Legal Hold +--------------------- +Applies a Legal Hold configuration to the specified object. + +Syntax +~~~~~~ + +:: + + PUT /{bucket}/{object}?legal-hold&versionId= HTTP/1.1 + +Request Entities +~~~~~~~~~~~~~~~~ + ++----------------+-------------+----------------------------------------------------------------------------------------+------------+ +| Name | Type | Description | Required | ++================+=============+========================================================================================+============+ +| ``LegalHold`` | Container | A container for the request. | Yes | ++----------------+-------------+----------------------------------------------------------------------------------------+------------+ +| ``Status`` | String | Indicates whether the specified object has a Legal Hold in place. Valid Values: ON/OFF | Yes | ++----------------+-------------+----------------------------------------------------------------------------------------+------------+ + + +Get Object Legal Hold +--------------------- +Gets an object's current Legal Hold status. + +Syntax +~~~~~~ + +:: + + GET /{bucket}/{object}?legal-hold&versionId= HTTP/1.1 + +Response Entities +~~~~~~~~~~~~~~~~~ + ++----------------+-------------+----------------------------------------------------------------------------------------+------------+ +| Name | Type | Description | Required | ++================+=============+========================================================================================+============+ +| ``LegalHold`` | Container | A container for the request. | Yes | ++----------------+-------------+----------------------------------------------------------------------------------------+------------+ +| ``Status`` | String | Indicates whether the specified object has a Legal Hold in place. Valid Values: ON/OFF | Yes | ++----------------+-------------+----------------------------------------------------------------------------------------+------------+ + diff --git a/ceph/doc/rbd/qemu-rbd.rst b/ceph/doc/rbd/qemu-rbd.rst index 80c5dcc41..80685cd60 100644 --- a/ceph/doc/rbd/qemu-rbd.rst +++ b/ceph/doc/rbd/qemu-rbd.rst @@ -171,7 +171,7 @@ edit`` to include the ``xmlns:qemu`` value. Then, add a ``qemu:commandline`` block as a child of that domain. The following example shows how to set two devices with ``qemu id=`` to different ``discard_granularity`` values. -.. code-block:: guess +.. code-block:: xml diff --git a/ceph/doc/rbd/rbd-mirroring.rst b/ceph/doc/rbd/rbd-mirroring.rst index 2ccddde93..8762b9b98 100644 --- a/ceph/doc/rbd/rbd-mirroring.rst +++ b/ceph/doc/rbd/rbd-mirroring.rst @@ -39,14 +39,14 @@ tasks to configure mirroring using the ``rbd`` command. Mirroring is configured on a per-pool basis within the Ceph clusters. The pool configuration steps should be performed on both peer clusters. These -procedures assume two clusters, named "local" and "remote", are accessible from +procedures assume two clusters, named "site-a" and "site-b", are accessible from a single host for clarity. See the `rbd`_ manpage for additional details of how to connect to different Ceph clusters. .. note:: The cluster name in the following examples corresponds to a Ceph - configuration file of the same name (e.g. /etc/ceph/remote.conf). See the + configuration file of the same name (e.g. /etc/ceph/site-b.conf). See the `ceph-conf`_ documentation for how to configure multiple clusters. Enable Mirroring @@ -66,8 +66,8 @@ The mirroring mode can either be ``pool`` or ``image``: For example:: - $ rbd --cluster local mirror pool enable image-pool pool - $ rbd --cluster remote mirror pool enable image-pool pool + $ rbd --cluster site-a mirror pool enable image-pool pool + $ rbd --cluster site-b mirror pool enable image-pool pool Disable Mirroring ----------------- @@ -83,23 +83,72 @@ explicitly. For example:: - $ rbd --cluster local mirror pool disable image-pool - $ rbd --cluster remote mirror pool disable image-pool + $ rbd --cluster site-a mirror pool disable image-pool + $ rbd --cluster site-b mirror pool disable image-pool -Add Cluster Peer ----------------- +Bootstrap Peers +--------------- In order for the ``rbd-mirror`` daemon to discover its peer cluster, the peer -needs to be registered to the pool. To add a mirroring peer Ceph cluster with -``rbd``, specify the ``mirror pool peer add`` command, the pool name, and a -cluster specification:: +needs to be registered to the pool and a user account needs to be created. +This process can be automated with ``rbd`` and the +``mirror pool peer bootstrap create`` and ``mirror pool peer bootstrap import`` +commands. + +To manually create a new bootstrap token with ``rbd``, specify the +``mirror pool peer bootstrap create`` command, a pool name, along with an +optional friendly site name to describe the local cluster:: + + rbd mirror pool peer bootstrap create [--site-name {local-site-name}] {pool-name} + +The output of ``mirror pool peer bootstrap create`` will be a token that should +be provided to the ``mirror pool peer bootstrap import`` command. For example, +on site-a:: + + $ rbd --cluster site-a mirror pool peer bootstrap create --site-name site-a image-pool + eyJmc2lkIjoiOWY1MjgyZGItYjg5OS00NTk2LTgwOTgtMzIwYzFmYzM5NmYzIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFBUnczOWQwdkhvQmhBQVlMM1I4RmR5dHNJQU50bkFTZ0lOTVE9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMS4zOjY4MjAsdjE6MTkyLjE2OC4xLjM6NjgyMV0ifQ== + +To manually import the bootstrap token created by another cluster with ``rbd``, +specify the ``mirror pool peer bootstrap import`` command, the pool name, a file +path to the created token (or '-' to read from standard input), along with an +optional friendly site name to describe the local cluster and a mirroring +direction (defaults to rx-tx for bidirectional mirroring, but can also be set +to rx-only for unidirectional mirroring):: + + rbd mirror pool peer bootstrap import [--site-name {local-site-name}] [--direction {rx-only or rx-tx}] {pool-name} {token-path} + +For example, on site-b:: + + $ cat < token + eyJmc2lkIjoiOWY1MjgyZGItYjg5OS00NTk2LTgwOTgtMzIwYzFmYzM5NmYzIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFBUnczOWQwdkhvQmhBQVlMM1I4RmR5dHNJQU50bkFTZ0lOTVE9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMS4zOjY4MjAsdjE6MTkyLjE2OC4xLjM6NjgyMV0ifQ== + EOF + $ rbd --cluster site-b mirror pool peer bootstrap import --site-name site-b image-pool token + +Add Cluster Peer Manually +------------------------- + +Cluster peers can be specified manually if desired or if the above bootstrap +commands are not available with the currently installed Ceph release. + +The remote ``rbd-mirror`` daemon will need access to the local cluster to +perform mirroring. A new local Ceph user should be created for the remote +daemon to use. To `create a Ceph user`_, with ``ceph`` specify the +``auth get-or-create`` command, user name, monitor caps, and OSD caps:: + + ceph auth get-or-create client.rbd-mirror-peer mon 'profile rbd' osd 'profile rbd' + +The resulting keyring should be copied to the other cluster's ``rbd-mirror`` +daemon hosts if not using the Ceph monitor ``config-key`` store described below. + +To manually add a mirroring peer Ceph cluster with ``rbd``, specify the +``mirror pool peer add`` command, the pool name, and a cluster specification:: rbd mirror pool peer add {pool-name} {client-name}@{cluster-name} For example:: - $ rbd --cluster local mirror pool peer add image-pool client.remote@remote - $ rbd --cluster remote mirror pool peer add image-pool client.local@local + $ rbd --cluster site-a mirror pool peer add image-pool client.rbd-mirror-peer@site-b + $ rbd --cluster site-b mirror pool peer add image-pool client.rbd-mirror-peer@site-a By default, the ``rbd-mirror`` daemon needs to have access to a Ceph configuration file located at ``/etc/ceph/{cluster-name}.conf`` that provides @@ -112,12 +161,15 @@ stored within the local Ceph monitor ``config-key`` store. To specify the peer cluster connection attributes when adding a mirroring peer, use the ``--remote-mon-host`` and ``--remote-key-file`` optionals. For example:: - $ rbd --cluster local mirror pool peer add image-pool client.remote@remote --remote-mon-host 192.168.1.1,192.168.1.2 --remote-key-file <(echo 'AQAeuZdbMMoBChAAcj++/XUxNOLFaWdtTREEsw==') - $ rbd --cluster local mirror pool info image-pool --all + $ cat < remote-key-file + AQAeuZdbMMoBChAAcj++/XUxNOLFaWdtTREEsw== + EOF + $ rbd --cluster site-a mirror pool peer add image-pool client.rbd-mirror-peer@site-b --remote-mon-host 192.168.1.1,192.168.1.2 --remote-key-file remote-key-file + $ rbd --cluster site-a mirror pool info image-pool --all Mode: pool Peers: - UUID NAME CLIENT MON_HOST KEY - 587b08db-3d33-4f32-8af8-421e77abb081 remote client.remote 192.168.1.1,192.168.1.2 AQAeuZdbMMoBChAAcj++/XUxNOLFaWdtTREEsw== + UUID NAME CLIENT MON_HOST KEY + 587b08db-3d33-4f32-8af8-421e77abb081 site-b client.rbd-mirror-peer 192.168.1.1,192.168.1.2 AQAeuZdbMMoBChAAcj++/XUxNOLFaWdtTREEsw== Remove Cluster Peer ------------------- @@ -130,8 +182,8 @@ To remove a mirroring peer Ceph cluster with ``rbd``, specify the For example:: - $ rbd --cluster local mirror pool peer remove image-pool 55672766-c02b-4729-8567-f13a66893445 - $ rbd --cluster remote mirror pool peer remove image-pool 60c0e299-b38f-4234-91f6-eed0a367be08 + $ rbd --cluster site-a mirror pool peer remove image-pool 55672766-c02b-4729-8567-f13a66893445 + $ rbd --cluster site-b mirror pool peer remove image-pool 60c0e299-b38f-4234-91f6-eed0a367be08 Data Pools ---------- @@ -177,7 +229,7 @@ the ``feature enable`` command, the pool and image name, and the feature name:: For example:: - $ rbd --cluster local feature enable image-pool/image-1 journaling + $ rbd --cluster site-a feature enable image-pool/image-1 journaling .. note:: The journaling feature is dependent on the exclusive-lock feature. If the exclusive-lock feature is not already enabled, it should be enabled prior @@ -198,7 +250,7 @@ To enable mirroring for a specific image with ``rbd``, specify the For example:: - $ rbd --cluster local mirror image enable image-pool/image-1 + $ rbd --cluster site-a mirror image enable image-pool/image-1 Disable Image Mirroring ----------------------- @@ -210,7 +262,7 @@ To disable mirroring for a specific image with ``rbd``, specify the For example:: - $ rbd --cluster local mirror image disable image-pool/image-1 + $ rbd --cluster site-a mirror image disable image-pool/image-1 Image Promotion and Demotion ---------------------------- @@ -232,7 +284,7 @@ To demote a specific image to non-primary with ``rbd``, specify the For example:: - $ rbd --cluster local mirror image demote image-pool/image-1 + $ rbd --cluster site-a mirror image demote image-pool/image-1 To demote all primary images within a pool to non-primary with ``rbd``, specify the ``mirror pool demote`` command along with the pool name:: @@ -241,7 +293,7 @@ the ``mirror pool demote`` command along with the pool name:: For example:: - $ rbd --cluster local mirror pool demote image-pool + $ rbd --cluster site-a mirror pool demote image-pool To promote a specific image to primary with ``rbd``, specify the ``mirror image promote`` command along with the pool and image name:: @@ -250,7 +302,7 @@ To promote a specific image to primary with ``rbd``, specify the For example:: - $ rbd --cluster remote mirror image promote image-pool/image-1 + $ rbd --cluster site-b mirror image promote image-pool/image-1 To promote all non-primary images within a pool to primary with ``rbd``, specify the ``mirror pool promote`` command along with the pool name:: @@ -259,7 +311,7 @@ the ``mirror pool promote`` command along with the pool name:: For example:: - $ rbd --cluster local mirror pool promote image-pool + $ rbd --cluster site-a mirror pool promote image-pool .. tip:: Since the primary / non-primary status is per-image, it is possible to have two clusters split the IO load and stage failover / failback. diff --git a/ceph/doc_deps.deb.txt b/ceph/doc_deps.deb.txt index 2b8041bb5..318be6e22 100644 --- a/ceph/doc_deps.deb.txt +++ b/ceph/doc_deps.deb.txt @@ -1,8 +1,8 @@ git gcc -python-dev -python-pip -python-virtualenv +python3-dev +python3-pip +python3-virtualenv doxygen ditaa libxml2-dev @@ -10,4 +10,4 @@ libxslt1-dev graphviz ant zlib1g-dev -cython +cython3 diff --git a/ceph/install-deps.sh b/ceph/install-deps.sh index 0a21dfcb6..8f671db14 100755 --- a/ceph/install-deps.sh +++ b/ceph/install-deps.sh @@ -70,7 +70,7 @@ function ensure_decent_gcc_on_ubuntu { local old=$(gcc -dumpfullversion -dumpversion) local new=$1 local codename=$2 - if dpkg --compare-versions $old ge 7.0; then + if dpkg --compare-versions $old ge ${new}.0; then return fi @@ -97,10 +97,10 @@ msyaQpNl/m/lNtOLhR64v5ZybofB2EWkMxUzX8D/FQ== -----END PGP PUBLIC KEY BLOCK----- ENDOFKEY $SUDO env DEBIAN_FRONTEND=noninteractive apt-get update -y || true - $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install -y g++-7 + $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install -y g++-${new} fi - case $codename in + case "$codename" in trusty) old=4.8;; xenial) @@ -120,8 +120,8 @@ ENDOFKEY $SUDO update-alternatives --auto gcc # cmake uses the latter by default - $SUDO ln -nsf /usr/bin/gcc /usr/bin/$(uname -m)-linux-gnu-gcc - $SUDO ln -nsf /usr/bin/g++ /usr/bin/$(uname -m)-linux-gnu-g++ + $SUDO ln -nsf /usr/bin/gcc /usr/bin/${ARCH}-linux-gnu-gcc + $SUDO ln -nsf /usr/bin/g++ /usr/bin/${ARCH}-linux-gnu-g++ } function install_pkg_on_ubuntu { @@ -183,11 +183,11 @@ function ensure_decent_gcc_on_rh { cat </dev/null $ blkdiscard -o 156672 -l 512 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 64512 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 65024 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 65024 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 @@ -36,7 +36,7 @@ Zero, 1 block: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 65536 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -47,7 +47,7 @@ Zero, 1 block: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 66048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -58,7 +58,7 @@ Zero, 1 block: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 66048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -69,7 +69,7 @@ Zero, 1 block: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 66560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -82,21 +82,21 @@ Zero, < 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 163840 -l 65536 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 130048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 130560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0030000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -107,7 +107,7 @@ Zero, < 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 130560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -120,7 +120,7 @@ Zero, 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 131072 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -131,7 +131,7 @@ Zero, 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 131584 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -142,7 +142,7 @@ Zero, 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 131584 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -153,7 +153,7 @@ Zero, 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 132096 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0020000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -166,7 +166,7 @@ Zero, 37 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 589824 -l 2424832 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0090000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -177,7 +177,7 @@ Zero, 37 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 589312 -l 2424832 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0090000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -188,7 +188,7 @@ Zero, 37 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 590336 -l 2424832 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 00a0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -201,21 +201,21 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4193792 -l 512 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4129280 -l 65024 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4128768 -l 65536 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -224,7 +224,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4128256 -l 66048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -233,7 +233,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4063744 -l 130560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -242,7 +242,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4063232 -l 131072 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -251,7 +251,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4062720 -l 131584 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -260,7 +260,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 512 -l 4193792 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0010000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -271,7 +271,7 @@ Delete: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 0 -l 4194304 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0400000 @@ -286,7 +286,7 @@ Empty clone: $ rbd clone img@snap cloneimg1 $ DEV=$(sudo rbd map cloneimg1) $ blkdiscard -o 720896 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 abab abab abab abab abab abab abab abab * 0400000 @@ -295,7 +295,7 @@ Empty clone: $ rbd clone img@snap cloneimg2 $ DEV=$(sudo rbd map cloneimg2) $ blkdiscard -o 1474560 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 abab abab abab abab abab abab abab abab * 0400000 @@ -304,7 +304,7 @@ Empty clone: $ rbd clone img@snap cloneimg3 $ DEV=$(sudo rbd map cloneimg3) $ blkdiscard -o 0 -l 4194304 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 abab abab abab abab abab abab abab abab * 0400000 @@ -317,7 +317,7 @@ Full clone: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 720896 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 00b0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -328,7 +328,7 @@ Full clone: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 1474560 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0170000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -337,7 +337,7 @@ Full clone: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 0 -l 4194304 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0400000 @@ -351,14 +351,14 @@ Multiple object requests: $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null $ blkdiscard -o 0 -l 143360 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 3200000 $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null $ blkdiscard -o 0 -l 286720 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0008000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd @@ -379,7 +379,7 @@ Multiple object requests: $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null $ blkdiscard -o 0 -l 573440 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0050000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd diff --git a/ceph/qa/rbd/krbd_discard_4M.t b/ceph/qa/rbd/krbd_discard_4M.t index 395f0241d..6c3d7cc74 100644 --- a/ceph/qa/rbd/krbd_discard_4M.t +++ b/ceph/qa/rbd/krbd_discard_4M.t @@ -6,28 +6,28 @@ Zero, < 1 block: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 156672 -l 512 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 64512 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 65024 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 65024 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 @@ -36,28 +36,28 @@ Zero, 1 block: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 65536 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 66048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 66048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 66560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 @@ -66,28 +66,28 @@ Zero, < 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 163840 -l 65536 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 130048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131584 -l 130560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 130560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 @@ -96,28 +96,28 @@ Zero, 2 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 131072 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 131072 -l 131584 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 131584 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 130560 -l 132096 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 @@ -126,21 +126,21 @@ Zero, 37 blocks: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 589824 -l 2424832 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 589312 -l 2424832 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 590336 -l 2424832 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 @@ -149,7 +149,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4193792 -l 512 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03ffe00 0000 0000 0000 0000 0000 0000 0000 0000 @@ -158,7 +158,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4129280 -l 65024 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03f0200 0000 0000 0000 0000 0000 0000 0000 0000 @@ -167,7 +167,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4128768 -l 65536 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -176,7 +176,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4128256 -l 66048 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03efe00 0000 0000 0000 0000 0000 0000 0000 0000 @@ -185,7 +185,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4063744 -l 130560 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03e0200 0000 0000 0000 0000 0000 0000 0000 0000 @@ -194,7 +194,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4063232 -l 131072 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -203,7 +203,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 4062720 -l 131584 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 03dfe00 0000 0000 0000 0000 0000 0000 0000 0000 @@ -212,7 +212,7 @@ Truncate: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 512 -l 4193792 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0000200 0000 0000 0000 0000 0000 0000 0000 0000 @@ -223,7 +223,7 @@ Delete: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 0 -l 4194304 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0400000 @@ -238,7 +238,7 @@ Empty clone: $ rbd clone img@snap cloneimg1 $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg1) $ blkdiscard -o 720896 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 abab abab abab abab abab abab abab abab * 0400000 @@ -247,7 +247,7 @@ Empty clone: $ rbd clone img@snap cloneimg2 $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg2) $ blkdiscard -o 1474560 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 abab abab abab abab abab abab abab abab * 0400000 @@ -256,7 +256,7 @@ Empty clone: $ rbd clone img@snap cloneimg3 $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg3) $ blkdiscard -o 0 -l 4194304 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 abab abab abab abab abab abab abab abab * 0400000 @@ -269,14 +269,14 @@ Full clone: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 720896 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0400000 $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 1474560 -l 2719744 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0168000 0000 0000 0000 0000 0000 0000 0000 0000 @@ -285,7 +285,7 @@ Full clone: $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null $ blkdiscard -o 0 -l 4194304 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0400000 @@ -299,21 +299,21 @@ Multiple object requests: $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null $ blkdiscard -o 0 -l 143360 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 3200000 $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null $ blkdiscard -o 0 -l 286720 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 3200000 $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null $ blkdiscard -o 0 -l 573440 $DEV - $ hexdump $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 3200000 diff --git a/ceph/qa/run-standalone.sh b/ceph/qa/run-standalone.sh index 4a7a00546..92cca83ce 100755 --- a/ceph/qa/run-standalone.sh +++ b/ceph/qa/run-standalone.sh @@ -135,7 +135,7 @@ do CEPH_ROOT=.. \ CEPH_LIB=lib \ LOCALRUN=yes \ - $cmd ; then + time -f "Elapsed %E (%e seconds)" $cmd ; then echo "$f .............. FAILED" errors=$(expr $errors + 1) fi diff --git a/ceph/qa/standalone/ceph-helpers.sh b/ceph/qa/standalone/ceph-helpers.sh index 83ca1e251..52b0eee69 100755 --- a/ceph/qa/standalone/ceph-helpers.sh +++ b/ceph/qa/standalone/ceph-helpers.sh @@ -673,11 +673,15 @@ EOF echo start osd.$id ceph-osd -i $id $ceph_args & + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi wait_for_osd up $id || return 1 } -function run_osd_bluestore() { +function run_osd_filestore() { local dir=$1 shift local id=$1 @@ -710,7 +714,7 @@ function run_osd_bluestore() { echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json ceph osd new $uuid -i $osd_data/new.json rm $osd_data/new.json - ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid --osd-objectstore=bluestore + ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid --osd-objectstore=filestore local key_fn=$osd_data/keyring cat > $key_fn< +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +TEST_POOL1=test1 +TEST_POOL2=test2 + +function TEST_balancer() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_pool $TEST_POOL1 8 + create_pool $TEST_POOL2 8 + + wait_for_clean || return 1 + + ceph pg dump pgs + ceph osd set-require-min-compat-client luminous + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "none" || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + + ceph balancer ls || return 1 + PLANS=$(ceph balancer ls) + test "$PLANS" = "[]" || return 1 + ceph balancer eval || return 1 + EVAL="$(ceph balancer eval)" + test "$EVAL" = "current cluster score 0.000000 (lower is better)" + ceph balancer eval-verbose || return 1 + + ceph balancer pool add $TEST_POOL1 || return 1 + ceph balancer pool add $TEST_POOL2 || return 1 + ceph balancer pool ls || return 1 + eval POOL=$(ceph balancer pool ls | jq '.[0]') + test "$POOL" = "$TEST_POOL1" || return 1 + eval POOL=$(ceph balancer pool ls | jq '.[1]') + test "$POOL" = "$TEST_POOL2" || return 1 + ceph balancer pool rm $TEST_POOL1 || return 1 + ceph balancer pool rm $TEST_POOL2 || return 1 + ceph balancer pool ls || return 1 + ceph balancer pool add $TEST_POOL1 || return 1 + + ceph balancer mode crush-compat || return 1 + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "crush-compat" || return 1 + ! ceph balancer optimize plan_crush $TEST_POOL1 || return 1 + ceph balancer status || return 1 + eval RESULT=$(ceph balancer status | jq '.optimize_result') + test "$RESULT" = "Distribution is already perfect" || return 1 + + ceph balancer on || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + sleep 2 + ceph balancer status || return 1 + ceph balancer off || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + sleep 2 + + ceph balancer reset || return 1 + + ceph balancer mode upmap || return 1 + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "upmap" || return 1 + ! ceph balancer optimize plan_upmap $TEST_POOL || return 1 + ceph balancer status || return 1 + eval RESULT=$(ceph balancer status | jq '.optimize_result') + test "$RESULT" = "Unable to find further optimization, or pool(s)' pg_num is decreasing, or distribution is already perfect" || return 1 + + ceph balancer on || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + sleep 2 + ceph balancer status || return 1 + ceph balancer off || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + + teardown $dir || return 1 +} + +main balancer "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh balancer.sh" +# End: diff --git a/ceph/qa/standalone/misc/network-ping.sh b/ceph/qa/standalone/misc/network-ping.sh new file mode 100755 index 000000000..b2b299d63 --- /dev/null +++ b/ceph/qa/standalone/misc/network-ping.sh @@ -0,0 +1,145 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug_disable_randomized_ping=true " + CEPH_ARGS+="--debug_heartbeat_testing_span=5 " + CEPH_ARGS+="--osd_heartbeat_interval=1 " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_network_ping_test1() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 + + # Get 1 cycle worth of ping data "1 minute" + sleep 10 + flush_pg_stats + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + # Wait another 4 cycles to get "5 minute interval" + sleep 20 + flush_pg_stats + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + + # Wait another 10 cycles to get "15 minute interval" + sleep 50 + flush_pg_stats + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + # Just check the threshold output matches the input + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json + test "$(cat $dir/json | jq '.threshold')" = "99" || return 1 + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json + test "$(cat $dir/json | jq '.threshold')" = "98" || return 1 + + rm -f $dir/json +} + +# Test setting of mon_warn_on_slow_ping_time very low to +# get health warning +function TEST_network_ping_test2() { + local dir=$1 + + export CEPH_ARGS + export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 + + # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings) + sleep 10 + flush_pg_stats + + ceph health | tee $dir/health + grep -q "Long heartbeat" $dir/health || return 1 + + ceph health detail | tee $dir/health + grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1 + grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1 + rm -f $dir/health +} + +main network-ping "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh" +# End: diff --git a/ceph/qa/standalone/mon/mon-osdmap-prune.sh b/ceph/qa/standalone/mon/mon-osdmap-prune.sh index e3928087a..f8f7876bb 100755 --- a/ceph/qa/standalone/mon/mon-osdmap-prune.sh +++ b/ceph/qa/standalone/mon/mon-osdmap-prune.sh @@ -4,11 +4,6 @@ source $CEPH_ROOT/qa/standalone/ceph-helpers.sh base_test=$CEPH_ROOT/qa/workunits/mon/test_mon_osdmap_prune.sh -# We are going to open and close a lot of files, and generate a lot of maps -# that the osds will need to process. If we don't increase the fd ulimit, we -# risk having the osds asserting when handling filestore transactions. -ulimit -n 4096 - function run() { local dir=$1 diff --git a/ceph/qa/standalone/mon/osd-pool-create.sh b/ceph/qa/standalone/mon/osd-pool-create.sh index f404cdda9..ecb94cb3a 100755 --- a/ceph/qa/standalone/mon/osd-pool-create.sh +++ b/ceph/qa/standalone/mon/osd-pool-create.sh @@ -212,12 +212,11 @@ function TEST_pool_create_rep_expected_num_objects() { local dir=$1 setup $dir || return 1 - # disable pg dir merge - CEPH_ARGS+="--osd-objectstore=filestore" export CEPH_ARGS run_mon $dir a || return 1 run_mgr $dir x || return 1 - run_osd $dir 0 || return 1 + # disable pg dir merge + run_osd_filestore $dir 0 || return 1 ceph osd pool create rep_expected_num_objects 64 64 replicated replicated_rule 100000 || return 1 # wait for pg dir creating diff --git a/ceph/qa/standalone/osd/divergent-priors.sh b/ceph/qa/standalone/osd/divergent-priors.sh new file mode 100755 index 000000000..dec0e7ad4 --- /dev/null +++ b/ceph/qa/standalone/osd/divergent-priors.sh @@ -0,0 +1,840 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # This should multiple of 6 + export loglen=12 + export divisor=3 + export trim=$(expr $loglen / 2) + export DIVERGENT_WRITE=$(expr $trim / $divisor) + export DIVERGENT_REMOVE=$(expr $trim / $divisor) + export DIVERGENT_CREATE=$(expr $trim / $divisor) + export poolname=test + export testobjects=100 + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + CEPH_ARGS+="--osd_debug_pg_log_writeout=true " + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +# Special case divergence test +# Test handling of divergent entries with prior_version +# prior to log_tail +# based on qa/tasks/divergent_prior.py +function TEST_divergent() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + local case5=$testobjects + local case3=$(expr $testobjects - 1) + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$case5 $dummyfile & + echo 'create missing divergent object' + inject_eio rep data $poolname existing_$case3 $dir 0 || return 1 + rados -p $poolname get existing_$case3 $dir/existing & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #5 + if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_divergent_ec() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_ec_pool $poolname true k=2 m=1 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$testobjects $dummyfile2 & + sleep 1 + rados -p $poolname put existing_$testobjects $dummyfile & + rados -p $poolname mksnap snap1 + rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + sleep 5 + #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # Dump logs + for i in $non_divergent + do + kill_daemons $dir KILL osd.$i || return 1 + _objectstore_tool_nodown $dir $i --op log --pgid $pgid + activate_osd $dir $i || return 1 + done + _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #3 + # XXX: Not reproducing this case +# if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log') +# then +# echo failure +# return 1 +# fi + # Check for _merge_object_divergent_entries for case #4 + if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# Special case divergence test with ceph-objectstore-tool export/remove/import +# Test handling of divergent entries with prior_version +# prior to log_tail and a ceph-objectstore-tool export/import +# based on qa/tasks/divergent_prior2.py +function TEST_divergent_2() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# this is the same as case _2 above, except we enable pg autoscaling in order +# to reproduce https://tracker.ceph.com/issues/41816 +function TEST_divergent_3() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + # reproduce https://tracker.ceph.com/issues/41816 + ceph osd pool set $poolname pg_autoscale_mode on + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main divergent-priors "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh" +# End: diff --git a/ceph/qa/standalone/osd/ec-error-rollforward.sh b/ceph/qa/standalone/osd/ec-error-rollforward.sh index cfbf28719..05188ad50 100755 --- a/ceph/qa/standalone/osd/ec-error-rollforward.sh +++ b/ceph/qa/standalone/osd/ec-error-rollforward.sh @@ -10,7 +10,7 @@ function run() { export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one export CEPH_ARGS CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " - CEPH_ARGS+="--mon-host=$CEPH_MON --osd-objectstore filestore" + CEPH_ARGS+="--mon-host=$CEPH_MON " export margin=10 export objects=200 export poolname=test diff --git a/ceph/qa/standalone/osd/osd-backfill-prio.sh b/ceph/qa/standalone/osd/osd-backfill-prio.sh index 2a69ba12d..a089696bb 100755 --- a/ceph/qa/standalone/osd/osd-backfill-prio.sh +++ b/ceph/qa/standalone/osd/osd-backfill-prio.sh @@ -157,7 +157,7 @@ function TEST_backfill_priority() { ceph osd pool set $pool3 size 2 sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 # 3. Item is in progress, adjust priority with no higher priority waiting for i in $(seq 1 $max_tries) @@ -172,18 +172,18 @@ function TEST_backfill_priority() { sleep 2 done flush_pg_stats || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 ceph osd out osd.$chk_osd1_2 sleep 2 flush_pg_stats || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 ceph pg dump pgs ceph osd pool set $pool2 size 2 sleep 2 flush_pg_stats || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out ceph pg dump pgs @@ -222,7 +222,7 @@ function TEST_backfill_priority() { sleep 2 done sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") if [ "$PRIO" != "$FORCE_PRIO" ]; @@ -235,7 +235,7 @@ function TEST_backfill_priority() { # 4. Item is in progress, if higher priority items waiting prempt item ceph pg cancel-force-backfill $PG3 || return 1 sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") if [ "$PRIO" != "$degraded_prio" ]; @@ -260,14 +260,14 @@ function TEST_backfill_priority() { ceph pg cancel-force-backfill $PG2 || return 1 sleep 5 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item flush_pg_stats || return 1 ceph pg force-backfill $PG3 || return 1 sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") if [ "$PRIO" != "$degraded_prio" ]; @@ -293,7 +293,7 @@ function TEST_backfill_priority() { ceph osd unset noout ceph osd unset nobackfill - wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations" || return 1 + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 ceph pg dump pgs @@ -321,7 +321,7 @@ function TEST_backfill_priority() { # pool 2 with recovery_priority 2 # # Start backfill by changing the pool sizes from 1 to 2 -# Use dump_reservations to verify priorities +# Use dump_recovery_reservations to verify priorities function TEST_backfill_pool_priority() { local dir=$1 local pools=3 # Don't assume the first 2 pools are exact what we want @@ -430,10 +430,10 @@ function TEST_backfill_pool_priority() { ceph osd pool set $pool1 size 2 ceph osd pool set $pool2 size 2 sleep 5 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out echo osd.${chk_osd1_1} cat $dir/dump.${chk_osd1_1}.out - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_reservations > $dir/dump.${chk_osd1_2}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out echo osd.${chk_osd1_2} cat $dir/dump.${chk_osd1_2}.out diff --git a/ceph/qa/standalone/osd/osd-backfill-space.sh b/ceph/qa/standalone/osd/osd-backfill-space.sh index 936043250..636601ec7 100755 --- a/ceph/qa/standalone/osd/osd-backfill-space.sh +++ b/ceph/qa/standalone/osd/osd-backfill-space.sh @@ -247,6 +247,21 @@ function TEST_backfill_test_multi() { fi ceph pg dump pgs + ceph status + + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_WARN" ]; then + echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then + echo "PG_BACKFILL_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json if [ $ERRORS != "0" ]; then diff --git a/ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh b/ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh index 550708963..5258cfc6d 100755 --- a/ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh +++ b/ceph/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -38,13 +38,13 @@ function TEST_bluestore() { run_mon $dir a || return 1 run_mgr $dir x || return 1 - run_osd_bluestore $dir 0 || return 1 + run_osd $dir 0 || return 1 osd_pid0=$(cat $dir/osd.0.pid) - run_osd_bluestore $dir 1 || return 1 + run_osd $dir 1 || return 1 osd_pid1=$(cat $dir/osd.1.pid) - run_osd_bluestore $dir 2 || return 1 + run_osd $dir 2 || return 1 osd_pid2=$(cat $dir/osd.2.pid) - run_osd_bluestore $dir 3 || return 1 + run_osd $dir 3 || return 1 osd_pid3=$(cat $dir/osd.3.pid) sleep 5 @@ -140,13 +140,13 @@ function TEST_bluestore() { ceph-bluestore-tool --path $dir/3 fsck || return 1 - run_osd_bluestore $dir 0 || return 1 + run_osd $dir 0 || return 1 osd_pid0=$(cat $dir/osd.0.pid) - run_osd_bluestore $dir 1 || return 1 + run_osd $dir 1 || return 1 osd_pid1=$(cat $dir/osd.1.pid) - run_osd_bluestore $dir 2 || return 1 + run_osd $dir 2 || return 1 osd_pid2=$(cat $dir/osd.2.pid) - run_osd_bluestore $dir 3 || return 1 + run_osd $dir 3 || return 1 osd_pid3=$(cat $dir/osd.3.pid) wait_for_clean || return 1 @@ -218,13 +218,13 @@ function TEST_bluestore() { ceph-bluestore-tool --path $dir/3 fsck || return 1 - run_osd_bluestore $dir 0 || return 1 + run_osd $dir 0 || return 1 osd_pid0=$(cat $dir/osd.0.pid) - run_osd_bluestore $dir 1 || return 1 + run_osd $dir 1 || return 1 osd_pid1=$(cat $dir/osd.1.pid) - run_osd_bluestore $dir 2 || return 1 + run_osd $dir 2 || return 1 osd_pid2=$(cat $dir/osd.2.pid) - run_osd_bluestore $dir 3 || return 1 + run_osd $dir 3 || return 1 osd_pid3=$(cat $dir/osd.3.pid) # write some objects @@ -324,13 +324,13 @@ function TEST_bluestore() { ceph-bluestore-tool --path $dir/3 fsck || return 1 - run_osd_bluestore $dir 0 || return 1 + run_osd $dir 0 || return 1 osd_pid0=$(cat $dir/osd.0.pid) - run_osd_bluestore $dir 1 || return 1 + run_osd $dir 1 || return 1 osd_pid1=$(cat $dir/osd.1.pid) - run_osd_bluestore $dir 2 || return 1 + run_osd $dir 2 || return 1 osd_pid2=$(cat $dir/osd.2.pid) - run_osd_bluestore $dir 3 || return 1 + run_osd $dir 3 || return 1 osd_pid3=$(cat $dir/osd.3.pid) # write some objects diff --git a/ceph/qa/standalone/osd/osd-dup.sh b/ceph/qa/standalone/osd/osd-dup.sh index 324840b04..26f583b99 100755 --- a/ceph/qa/standalone/osd/osd-dup.sh +++ b/ceph/qa/standalone/osd/osd-dup.sh @@ -33,10 +33,10 @@ function TEST_filestore_to_bluestore() { run_mon $dir a || return 1 run_mgr $dir x || return 1 - run_osd $dir 0 || return 1 + run_osd_filestore $dir 0 || return 1 osd_pid=$(cat $dir/osd.0.pid) - run_osd $dir 1 || return 1 - run_osd $dir 2 || return 1 + run_osd_filestore $dir 1 || return 1 + run_osd_filestore $dir 2 || return 1 sleep 5 @@ -61,7 +61,7 @@ function TEST_filestore_to_bluestore() { --op dup || return 1 CEPH_ARGS=$O - run_osd_bluestore $dir 0 || return 1 + run_osd $dir 0 || return 1 while ! ceph osd stat | grep '3 up' ; do sleep 1 ; done ceph osd metadata 0 | grep bluestore || return 1 diff --git a/ceph/qa/standalone/osd/osd-recovery-prio.sh b/ceph/qa/standalone/osd/osd-recovery-prio.sh index d246dda66..fb386e265 100755 --- a/ceph/qa/standalone/osd/osd-recovery-prio.sh +++ b/ceph/qa/standalone/osd/osd-recovery-prio.sh @@ -152,7 +152,7 @@ function TEST_recovery_priority() { # to be preempted. ceph osd pool set $pool3 size 2 sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 # 3. Item is in progress, adjust priority with no higher priority waiting for i in $(seq 1 $max_tries) @@ -167,18 +167,18 @@ function TEST_recovery_priority() { sleep 2 done flush_pg_stats || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 ceph osd out osd.$chk_osd1_2 sleep 2 flush_pg_stats || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 ceph pg dump pgs ceph osd pool set $pool2 size 2 sleep 2 flush_pg_stats || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out ceph pg dump pgs @@ -217,7 +217,7 @@ function TEST_recovery_priority() { sleep 2 done sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") if [ "$PRIO" != "$FORCE_PRIO" ]; @@ -232,7 +232,7 @@ function TEST_recovery_priority() { ceph pg cancel-force-recovery $PG3 || return 1 sleep 2 #ceph osd set norecover - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") if [ "$PRIO" != "$NORMAL_PRIO" ]; @@ -257,14 +257,14 @@ function TEST_recovery_priority() { ceph pg cancel-force-recovery $PG2 || return 1 sleep 5 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item flush_pg_stats || return 1 ceph pg force-recovery $PG3 || return 1 sleep 2 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/out || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 cat $dir/out PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") if [ "$PRIO" != "$NORMAL_PRIO" ]; @@ -290,7 +290,7 @@ function TEST_recovery_priority() { ceph osd unset noout ceph osd unset norecover - wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations" || return 1 + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 ceph pg dump pgs @@ -318,7 +318,7 @@ function TEST_recovery_priority() { # pool 2 with recovery_priority 2 # # Start recovery by changing the pool sizes from 1 to 2 -# Use dump_reservations to verify priorities +# Use dump_recovery_reservations to verify priorities function TEST_recovery_pool_priority() { local dir=$1 local pools=3 # Don't assume the first 2 pools are exact what we want @@ -426,10 +426,10 @@ function TEST_recovery_pool_priority() { ceph osd pool set $pool1 size 2 ceph osd pool set $pool2 size 2 sleep 10 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_reservations > $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out echo osd.${chk_osd1_1} cat $dir/dump.${chk_osd1_1}.out - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_reservations > $dir/dump.${chk_osd1_2}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out echo osd.${chk_osd1_2} cat $dir/dump.${chk_osd1_2}.out diff --git a/ceph/qa/standalone/osd/osd-recovery-space.sh b/ceph/qa/standalone/osd/osd-recovery-space.sh new file mode 100755 index 000000000..d12494a90 --- /dev/null +++ b/ceph/qa/standalone/osd/osd-recovery-space.sh @@ -0,0 +1,179 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_backfills=10 " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_state() { + local state=$1 + local num_in_state=-1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state = "0" && break + if test $cur_in_state != $num_in_state ; then + loop=0 + num_in_state=$cur_in_state + elif (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_recovery_toofull() { + local timeout=$1 + wait_for_state recovery_toofull $timeout +} + + +# Create 1 pools with size 1 +# set ful-ratio to 50% +# Write data 600 5K (3000K) +# Inject fake_statfs_for_testing to 3600K (83% full) +# Incresase the pool size to 2 +# The pool shouldn't have room to recovery +function TEST_recovery_test_simple() { + local dir=$1 + local pools=1 + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-nearfull-ratio .40 + ceph osd set-backfillfull-ratio .45 + ceph osd set-full-ratio .50 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=5 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + + for o in $(seq 0 $(expr $OSDS - 1)) + do + ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1 + done + sleep 5 + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + + # If this times out, we'll detected errors below + wait_for_recovery_toofull 30 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in recovery_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_ERR" ]; then + echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then + echo "PG_RECOVERY_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + + +main osd-recovery-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh" +# End: diff --git a/ceph/qa/standalone/osd/osd-rep-recov-eio.sh b/ceph/qa/standalone/osd/osd-rep-recov-eio.sh index adf6fc796..6e9eeac39 100755 --- a/ceph/qa/standalone/osd/osd-rep-recov-eio.sh +++ b/ceph/qa/standalone/osd/osd-rep-recov-eio.sh @@ -27,7 +27,7 @@ function run() { export CEPH_ARGS CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " - CEPH_ARGS+="--osd-objectstore=filestore " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} for func in $funcs ; do @@ -44,9 +44,10 @@ function run() { function setup_osds() { local count=$1 shift + local type=$1 for id in $(seq 0 $(expr $count - 1)) ; do - run_osd $dir $id || return 1 + run_osd${type} $dir $id || return 1 done wait_for_clean || return 1 } @@ -331,7 +332,7 @@ function TEST_rep_read_unfound() { local dir=$1 local objname=myobject - setup_osds 3 || return 1 + setup_osds 3 _filestore || return 1 ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1 local poolname=test-pool diff --git a/ceph/qa/standalone/scrub/osd-scrub-dump.sh b/ceph/qa/standalone/scrub/osd-scrub-dump.sh new file mode 100755 index 000000000..e218834c6 --- /dev/null +++ b/ceph/qa/standalone/scrub/osd-scrub-dump.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +MAX_SCRUBS=4 +SCRUB_SLEEP=2 +POOL_SIZE=3 + +function run() { + local dir=$1 + shift + local SLEEP=0 + local CHUNK_MAX=5 + + export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS " + CEPH_ARGS+="--osd_scrub_sleep=$SLEEP " + CEPH_ARGS+="--osd_scrub_chunk_max=$CHUNK_MAX " + CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP " + CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_recover_unexpected() { + local dir=$1 + shift + local OSDS=6 + local PGS=16 + local POOLS=3 + local OBJS=1000 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for o in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $o + done + + for i in $(seq 1 $POOLS) + do + create_pool test$i $PGS $PGS + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=datafile bs=4k count=2 + for i in $(seq 1 $POOLS) + do + for j in $(seq 1 $OBJS) + do + rados -p test$i put obj$j datafile + done + done + rm datafile + + ceph osd set noscrub + ceph osd set nodeep-scrub + + for qpg in $(ceph pg dump pgs --format=json-pretty | jq '.pg_stats[].pgid') + do + primary=$(ceph pg dump pgs --format=json | jq ".pg_stats[] | select(.pgid == $qpg) | .acting_primary") + eval pg=$qpg # strip quotes around qpg + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pg + done + + ceph pg dump pgs + + max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs') + if [ $max != $MAX_SCRUBS]; + then + echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations" + return 1 + fi + + ceph osd unset noscrub + + ok=false + for i in $(seq 0 300) + do + ceph pg dump pgs + if ceph pg dump pgs | grep scrubbing; then + ok=true + break + fi + sleep 1 + done + if test $ok = "false"; then + echo "ERROR: Test set-up failed no scrubbing" + return 1 + fi + + local total=0 + local zerocount=0 + local maxzerocount=3 + while(true) + do + pass=0 + for o in $(seq 0 $(expr $OSDS - 1)) + do + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations + scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote') + if [ $scrubs -gt $MAX_SCRUBS ]; then + echo "ERROR: More than $MAX_SCRUBS currently reserved" + return 1 + fi + pass=$(expr $pass + $scrubs) + done + if [ $pass = "0" ]; then + zerocount=$(expr $zerocount + 1) + fi + if [ $zerocount -gt $maxzerocount ]; then + break + fi + total=$(expr $total + $pass) + sleep $(expr $SCRUB_SLEEP \* 2) + done + + # Check that there are no more scrubs + for i in $(seq 0 5) + do + if ceph pg dump pgs | grep scrubbing; then + echo "ERROR: Extra scrubs after test completion...not expected" + return 1 + fi + sleep $SCRUB_SLEEP + done + + echo $total total reservations seen + + # Sort of arbitraty number based on PGS * POOLS * POOL_SIZE as the number of total scrub + # reservations that must occur. However, the loop above might see the same reservation more + # than once. + actual_reservations=$(expr $PGS \* $POOLS \* $POOL_SIZE) + if [ $total -lt $actual_reservations ]; then + echo "ERROR: Unexpectedly low amount of scrub reservations seen during test" + return 1 + fi + + return 0 +} + + +main osd-scrub-dump "$@" + +# Local Variables: +# compile-command: "cd build ; make check && \ +# ../qa/run-standalone.sh osd-scrub-dump.sh" +# End: diff --git a/ceph/qa/standalone/scrub/osd-scrub-repair.sh b/ceph/qa/standalone/scrub/osd-scrub-repair.sh index b62e2c086..3acc0d8b4 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh @@ -56,7 +56,6 @@ function run() { CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " CEPH_ARGS+="--osd-skip-data-digest=false " - CEPH_ARGS+="--osd-objectstore=filestore " export -n CEPH_CLI_TEST_DUP_COMMAND local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} @@ -108,6 +107,86 @@ function TEST_corrupt_and_repair_replicated() { teardown $dir || return 1 } +# +# Allow repair to be scheduled when some recovering is still undergoing on the same OSD +# +function TEST_allow_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 + + teardown $dir || return 1 +} + +# +# Skip non-repair scrub correctly during recovery +# +function TEST_skip_non_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 + + teardown $dir || return 1 +} + +function scrub_and_not_schedule() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) start a non-repair scrub + # + local pg=$(get_pg $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + # + # 2) Assure the scrub is not scheduled + # + for ((i=0; i < 3; i++)); do + if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done + + # + # 3) Access to the file must OK + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + function corrupt_and_repair_two() { local dir=$1 local poolname=$2 @@ -191,24 +270,6 @@ function corrupt_and_repair_erasure_coded() { } -function create_ec_pool() { - local pool_name=$1 - shift - local allow_overwrites=$1 - shift - - ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1 - - create_pool "$poolname" 1 1 erasure myprofile || return 1 - - if [ "$allow_overwrites" = "true" ]; then - ceph osd pool set "$poolname" allow_ec_overwrites true || return 1 - fi - - wait_for_clean || return 1 - return 0 -} - function auto_repair_erasure_coded() { local dir=$1 local allow_overwrites=$2 @@ -225,9 +286,9 @@ function auto_repair_erasure_coded() { --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 2) ; do if [ "$allow_overwrites" = "true" ]; then - run_osd_bluestore $dir $id $ceph_osd_args || return 1 - else run_osd $dir $id $ceph_osd_args || return 1 + else + run_osd_filestore $dir $id $ceph_osd_args || return 1 fi done create_rbd_pool || return 1 @@ -280,7 +341,7 @@ function TEST_auto_repair_bluestore_basic() { --osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 2) ; do - run_osd_bluestore $dir $id $ceph_osd_args || return 1 + run_osd $dir $id $ceph_osd_args || return 1 done create_pool $poolname 1 1 || return 1 @@ -329,7 +390,7 @@ function TEST_auto_repair_bluestore_scrub() { --osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 2) ; do - run_osd_bluestore $dir $id $ceph_osd_args || return 1 + run_osd $dir $id $ceph_osd_args || return 1 done create_pool $poolname 1 1 || return 1 @@ -384,7 +445,7 @@ function TEST_auto_repair_bluestore_failed() { --osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 2) ; do - run_osd_bluestore $dir $id $ceph_osd_args || return 1 + run_osd $dir $id $ceph_osd_args || return 1 done create_pool $poolname 1 1 || return 1 @@ -453,7 +514,7 @@ function TEST_auto_repair_bluestore_failed_norecov() { --osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 2) ; do - run_osd_bluestore $dir $id $ceph_osd_args || return 1 + run_osd $dir $id $ceph_osd_args || return 1 done create_pool $poolname 1 1 || return 1 @@ -510,7 +571,7 @@ function TEST_repair_stats() { local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 $(expr $OSDS - 1)) ; do - run_osd_bluestore $dir $id $ceph_osd_args || return 1 + run_osd $dir $id $ceph_osd_args || return 1 done create_pool $poolname 1 1 || return 1 @@ -539,8 +600,8 @@ function TEST_repair_stats() { OSD=$(expr $i % 2) _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 done - run_osd_bluestore $dir $primary $ceph_osd_args || return 1 - run_osd_bluestore $dir $other $ceph_osd_args || return 1 + run_osd $dir $primary $ceph_osd_args || return 1 + run_osd $dir $other $ceph_osd_args || return 1 wait_for_clean || return 1 repair $pgid @@ -584,7 +645,7 @@ function TEST_repair_stats_ec() { local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ --osd-scrub-interval-randomize-ratio=0" for id in $(seq 0 $(expr $OSDS - 1)) ; do - run_osd_bluestore $dir $id $ceph_osd_args || return 1 + run_osd $dir $id $ceph_osd_args || return 1 done # Create an EC pool @@ -612,8 +673,8 @@ function TEST_repair_stats_ec() { OSD=$(expr $i % 2) _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 done - run_osd_bluestore $dir $primary $ceph_osd_args || return 1 - run_osd_bluestore $dir $other $ceph_osd_args || return 1 + run_osd $dir $primary $ceph_osd_args || return 1 + run_osd $dir $other $ceph_osd_args || return 1 wait_for_clean || return 1 repair $pgid @@ -655,9 +716,9 @@ function corrupt_and_repair_jerasure() { run_mgr $dir x || return 1 for id in $(seq 0 3) ; do if [ "$allow_overwrites" = "true" ]; then - run_osd_bluestore $dir $id || return 1 - else run_osd $dir $id || return 1 + else + run_osd_filestore $dir $id || return 1 fi done create_rbd_pool || return 1 @@ -689,9 +750,9 @@ function corrupt_and_repair_lrc() { run_mgr $dir x || return 1 for id in $(seq 0 9) ; do if [ "$allow_overwrites" = "true" ]; then - run_osd_bluestore $dir $id || return 1 - else run_osd $dir $id || return 1 + else + run_osd_filestore $dir $id || return 1 fi done create_rbd_pool || return 1 @@ -724,9 +785,9 @@ function unfound_erasure_coded() { run_mgr $dir x || return 1 for id in $(seq 0 3) ; do if [ "$allow_overwrites" = "true" ]; then - run_osd_bluestore $dir $id || return 1 - else run_osd $dir $id || return 1 + else + run_osd_filestore $dir $id || return 1 fi done @@ -794,9 +855,9 @@ function list_missing_erasure_coded() { run_mgr $dir x || return 1 for id in $(seq 0 2) ; do if [ "$allow_overwrites" = "true" ]; then - run_osd_bluestore $dir $id || return 1 - else run_osd $dir $id || return 1 + else + run_osd_filestore $dir $id || return 1 fi done create_rbd_pool || return 1 @@ -869,7 +930,7 @@ function TEST_list_missing_erasure_coded_overwrites() { function TEST_corrupt_scrub_replicated() { local dir=$1 local poolname=csr_pool - local total_objs=18 + local total_objs=19 setup $dir || return 1 run_mon $dir a --osd_pool_default_size=2 || return 1 @@ -891,6 +952,11 @@ function TEST_corrupt_scrub_replicated() { rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 done + # Increase file 1 MB + 1KB + dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025 + rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1 + rm -f $dir/new.ROBJ19 + local pg=$(get_pg $poolname ROBJ0) local primary=$(get_primary $poolname ROBJ0) @@ -1010,12 +1076,18 @@ function TEST_corrupt_scrub_replicated() { objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1 # Make one replica have a different object info, so a full repair must happen too objectstore_tool $dir $osd $objname corrupt-info || return 1 + ;; + + 19) + # Set osd-max-object-size smaller than this object's size esac done local pg=$(get_pg $poolname ROBJ0) + ceph tell osd.\* injectargs -- --osd-max-object-size=1048576 + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 @@ -1043,9 +1115,10 @@ function TEST_corrupt_scrub_replicated() { err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent " err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr" err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr buffer::malformed_input: .* no longer understand old encoding version 3 < 97" - err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 18/18 objects, 0/0 clones, 17/18 dirty, 17/18 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 113/120 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." - err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 7 inconsistent objects" - err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 17 errors" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049713/1049720 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 8 inconsistent objects" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 18 errors" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:123a5f55:::ROBJ19:head : size 1049600 > 1048576 is too large" for err_string in "${err_strings[@]}" do @@ -1588,6 +1661,69 @@ function TEST_corrupt_scrub_replicated() { ], "union_shard_errors": [] }, + { + "object": { + "name": "ROBJ19", + "nspace": "", + "locator": "", + "snap": "head", + "version": 58 + }, + "errors": [ + "size_too_large" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ19", + "key": "", + "snapid": -2, + "hash": 2868534344, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "63'59", + "prior_version": "63'58", + "last_reqid": "osd.1.0:58", + "user_version": 58, + "size": 1049600, + "mtime": "2019-08-09T23:33:58.340709+0000", + "local_mtime": "2019-08-09T23:33:58.345676+0000", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x3dde0ef3", + "omap_digest": "0xbffddd28", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 1049600 + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 1049600 + } + ] + }, { "shards": [ { @@ -1704,7 +1840,7 @@ function TEST_corrupt_scrub_replicated() { "version": "79'66", "prior_version": "79'65", "last_reqid": "client.4554.0:1", - "user_version": 74, + "user_version": 79, "size": 7, "mtime": "", "local_mtime": "", @@ -1756,7 +1892,7 @@ function TEST_corrupt_scrub_replicated() { "version": "95'67", "prior_version": "51'64", "last_reqid": "client.4649.0:1", - "user_version": 75, + "user_version": 80, "size": 1, "mtime": "", "local_mtime": "", @@ -1842,7 +1978,7 @@ function TEST_corrupt_scrub_replicated() { "version": "95'67", "prior_version": "51'64", "last_reqid": "client.4649.0:1", - "user_version": 75, + "user_version": 80, "size": 1, "mtime": "", "local_mtime": "", @@ -1915,6 +2051,10 @@ EOF inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + + # ROBJ19 won't error this time + ceph tell osd.\* injectargs -- --osd-max-object-size=134217728 + pg_deep_scrub $pg err_strings=() @@ -1941,7 +2081,7 @@ EOF err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info" err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info" err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch" - err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:65 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0" + err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0" err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error" err_strings[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error" err_strings[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info" @@ -1954,7 +2094,7 @@ EOF err_strings[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent " err_strings[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr" err_strings[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr buffer::malformed_input: .* no longer understand old encoding version 3 < 97" - err_strings[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 18/18 objects, 0/0 clones, 17/18 dirty, 17/18 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 115/116 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049715/1049716 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." err_strings[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects" err_strings[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors" @@ -3172,7 +3312,7 @@ EOF "version": "79'66", "prior_version": "79'65", "last_reqid": "client.4554.0:1", - "user_version": 74, + "user_version": 79, "size": 7, "mtime": "2018-04-05 14:34:05.598688", "local_mtime": "2018-04-05 14:34:05.599698", @@ -3270,7 +3410,7 @@ EOF "version": "119'68", "prior_version": "51'64", "last_reqid": "client.4834.0:1", - "user_version": 76, + "user_version": 81, "size": 3, "mtime": "2018-04-05 14:35:01.500659", "local_mtime": "2018-04-05 14:35:01.502117", @@ -3314,7 +3454,7 @@ EOF "version": "119'68", "prior_version": "51'64", "last_reqid": "client.4834.0:1", - "user_version": 76, + "user_version": 81, "size": 3, "mtime": "2018-04-05 14:35:01.500659", "local_mtime": "2018-04-05 14:35:01.502117", @@ -3405,9 +3545,9 @@ function corrupt_scrub_erasure() { run_mgr $dir x || return 1 for id in $(seq 0 2) ; do if [ "$allow_overwrites" = "true" ]; then - run_osd_bluestore $dir $id || return 1 - else run_osd $dir $id || return 1 + else + run_osd_filestore $dir $id || return 1 fi done create_rbd_pool || return 1 diff --git a/ceph/qa/standalone/scrub/osd-scrub-snaps.sh b/ceph/qa/standalone/scrub/osd-scrub-snaps.sh index 8fd02e6f5..5f3bb6549 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-snaps.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-snaps.sh @@ -30,7 +30,7 @@ function run() { export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one export CEPH_ARGS CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " - CEPH_ARGS+="--mon-host=$CEPH_MON --osd-objectstore=filestore" + CEPH_ARGS+="--mon-host=$CEPH_MON " export -n CEPH_CLI_TEST_DUP_COMMAND local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} @@ -100,12 +100,12 @@ function create_scenario() { JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)" OBJ5SAVE="$JSON" # Starts with a snapmap - ceph-osdomap-tool --no-mon-config --omap-path $dir/${osd}/current/omap --command dump-raw-keys > $dir/drk.log - grep "_USER_[0-9]*_USER_,MAP_.*[.]1[.]obj5[.][.]" $dir/drk.log || return 1 + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep "^M.*MAP_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1 ceph-objectstore-tool --data-path $dir/${osd} --rmtype nosnapmap "$JSON" remove || return 1 # Check that snapmap is stil there - ceph-osdomap-tool --no-mon-config --omap-path $dir/${osd}/current/omap --command dump-raw-keys > $dir/drk.log - grep "_USER_[0-9]*_USER_,MAP_.*[.]1[.]obj5[.][.]" $dir/drk.log || return 1 + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep "^M.*MAP_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1 rm -f $dir/drk.log JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)" @@ -120,13 +120,13 @@ function create_scenario() { ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1 # Starts with a snapmap - ceph-osdomap-tool --no-mon-config --omap-path $dir/${osd}/current/omap --command dump-raw-keys > $dir/drk.log - grep "_USER_[0-9]*_USER_,MAP_.*[.]7[.]obj16[.][.]" $dir/drk.log || return 1 + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep "^M.*MAP_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1 JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)" ceph-objectstore-tool --data-path $dir/${osd} --rmtype snapmap "$JSON" remove || return 1 # Check that snapmap is now removed - ceph-osdomap-tool --no-mon-config --omap-path $dir/${osd}/current/omap --command dump-raw-keys > $dir/drk.log - ! grep "_USER_[0-9]*_USER_,MAP_.*[.]7[.]obj16[.][.]" $dir/drk.log || return 1 + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + ! grep "^M.*MAP_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1 rm -f $dir/drk.log JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)" diff --git a/ceph/qa/standalone/special/ceph_objectstore_tool.py b/ceph/qa/standalone/special/ceph_objectstore_tool.py index 15ad54e61..b058c247c 100755 --- a/ceph/qa/standalone/special/ceph_objectstore_tool.py +++ b/ceph/qa/standalone/special/ceph_objectstore_tool.py @@ -604,6 +604,7 @@ def test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_N errors=0 print("Test removeall") kill_daemons() + test_force_remove = 0 for nspace in db.keys(): for basename in db[nspace].keys(): JSON = db[nspace][basename]['json'] @@ -619,6 +620,25 @@ def test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_N if int(basename.split(REP_NAME)[1]) <= int(NUM_CLONED_REP_OBJECTS): cmd = (CFSD_PREFIX + "'{json}' remove").format(osd=osd, json=JSON) errors += test_failure(cmd, "Snapshots are present, use removeall to delete everything") + if not test_force_remove: + + cmd = (CFSD_PREFIX + " '{json}' set-attr snapset /dev/null").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Test set-up to corrupt snapset failed for {json}".format(json=JSON)) + errors += 1 + # Do the removeall since this test failed to set-up + else: + test_force_remove = 1 + + cmd = (CFSD_PREFIX + " '{json}' --force remove").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("forced remove with corrupt snapset failed for {json}".format(json=JSON)) + errors += 1 + continue cmd = (CFSD_PREFIX + " --force --dry-run '{json}' remove").format(osd=osd, json=JSON) logging.debug(cmd) diff --git a/ceph/qa/suites/fs/basic_functional/tasks/client-recovery.yaml b/ceph/qa/suites/fs/basic_functional/tasks/client-recovery.yaml index a2f56299b..d1cef8025 100644 --- a/ceph/qa/suites/fs/basic_functional/tasks/client-recovery.yaml +++ b/ceph/qa/suites/fs/basic_functional/tasks/client-recovery.yaml @@ -7,6 +7,8 @@ overrides: - evicting unresponsive client - but it is still running - slow request + - MDS_CLIENT_LATE_RELEASE + - t responding to mclientcaps tasks: - cephfs_test_runner: diff --git a/ceph/qa/suites/fs/bugs/conf b/ceph/qa/suites/fs/bugs/client_trim_caps/conf similarity index 100% rename from ceph/qa/suites/fs/bugs/conf rename to ceph/qa/suites/fs/bugs/client_trim_caps/conf diff --git a/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml b/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml index 4dc0086e6..688029619 100644 --- a/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml +++ b/ceph/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml @@ -6,3 +6,5 @@ overrides: ms inject delay type: osd mds ms inject delay probability: .005 ms inject delay max: 1 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml b/ceph/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml index a02c48ecd..77836536e 100644 --- a/ceph/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml +++ b/ceph/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml @@ -1,8 +1,20 @@ overrides: ceph: + mon_bind_msgr2: false + mon_bind_addrvec: false + log-whitelist: + - scrub mismatch + - ScrubResult + - wrongly marked + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - overall HEALTH_ + - \(MON_MSGR2_NOT_ENABLED\) conf: global: bluestore warn on legacy statfs: false + mon: + mon warn on osd down out interval zero: false tasks: - mds_pre_upgrade: @@ -11,8 +23,32 @@ tasks: mon.a: mon.b: - print: "**** done install.upgrade both hosts" -- ceph.stop: [mds.*] - ceph.restart: - daemons: [mon.*, mgr.*, osd.*, mds.*] + daemons: [mon.*, mgr.*] mon-health-to-clog: false + wait-for-healthy: false +- exec: + mon.a: + - ceph config set global mon_warn_on_msgr2_not_enabled false +- ceph.healthy: +- ceph.restart: + daemons: [osd.*] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.stop: [mds.*] +- ceph.restart: + daemons: [mds.*] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + mon.a: + - ceph mon enable-msgr2 + - ceph config rm global mon_warn_on_msgr2_not_enabled +- exec: + mon.a: + - ceph osd dump -f json-pretty + - ceph versions + - ceph osd require-osd-release nautilus + #- ceph osd set-require-min-compat-client nautilus +- ceph.healthy: - print: "**** done ceph.restart" diff --git a/ceph/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml b/ceph/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml index a02c48ecd..77836536e 100644 --- a/ceph/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml +++ b/ceph/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml @@ -1,8 +1,20 @@ overrides: ceph: + mon_bind_msgr2: false + mon_bind_addrvec: false + log-whitelist: + - scrub mismatch + - ScrubResult + - wrongly marked + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - overall HEALTH_ + - \(MON_MSGR2_NOT_ENABLED\) conf: global: bluestore warn on legacy statfs: false + mon: + mon warn on osd down out interval zero: false tasks: - mds_pre_upgrade: @@ -11,8 +23,32 @@ tasks: mon.a: mon.b: - print: "**** done install.upgrade both hosts" -- ceph.stop: [mds.*] - ceph.restart: - daemons: [mon.*, mgr.*, osd.*, mds.*] + daemons: [mon.*, mgr.*] mon-health-to-clog: false + wait-for-healthy: false +- exec: + mon.a: + - ceph config set global mon_warn_on_msgr2_not_enabled false +- ceph.healthy: +- ceph.restart: + daemons: [osd.*] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.stop: [mds.*] +- ceph.restart: + daemons: [mds.*] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + mon.a: + - ceph mon enable-msgr2 + - ceph config rm global mon_warn_on_msgr2_not_enabled +- exec: + mon.a: + - ceph osd dump -f json-pretty + - ceph versions + - ceph osd require-osd-release nautilus + #- ceph osd set-require-min-compat-client nautilus +- ceph.healthy: - print: "**** done ceph.restart" diff --git a/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml b/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml +++ b/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml b/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml index 86f8dde8a..4caedaebd 100644 --- a/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml +++ b/ceph/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 500 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml b/ceph/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml +++ b/ceph/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml b/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml +++ b/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml b/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml index 86f8dde8a..4caedaebd 100644 --- a/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml +++ b/ceph/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 500 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml b/ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml new file mode 100644 index 000000000..c326507ac --- /dev/null +++ b/ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_udev_enumerate.sh diff --git a/ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml b/ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml new file mode 100644 index 000000000..b0530d52c --- /dev/null +++ b/ceph/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-whitelist: + - pauserd,pausewr flag\(s\) set + +tasks: +- workunit: + clients: + all: + - rbd/krbd_udev_netlink_enobufs.sh diff --git a/ceph/qa/suites/krbd/rbd/msgr-failures/few.yaml b/ceph/qa/suites/krbd/rbd/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/krbd/rbd/msgr-failures/few.yaml +++ b/ceph/qa/suites/krbd/rbd/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/rbd/msgr-failures/many.yaml b/ceph/qa/suites/krbd/rbd/msgr-failures/many.yaml index 86f8dde8a..4caedaebd 100644 --- a/ceph/qa/suites/krbd/rbd/msgr-failures/many.yaml +++ b/ceph/qa/suites/krbd/rbd/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 500 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/singleton/msgr-failures/few.yaml b/ceph/qa/suites/krbd/singleton/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/krbd/singleton/msgr-failures/few.yaml +++ b/ceph/qa/suites/krbd/singleton/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/singleton/msgr-failures/many.yaml b/ceph/qa/suites/krbd/singleton/msgr-failures/many.yaml index 86f8dde8a..4caedaebd 100644 --- a/ceph/qa/suites/krbd/singleton/msgr-failures/many.yaml +++ b/ceph/qa/suites/krbd/singleton/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 500 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/krbd/unmap/ceph/ceph.yaml b/ceph/qa/suites/krbd/unmap/ceph/ceph.yaml index c58aaca48..aee5779fa 100644 --- a/ceph/qa/suites/krbd/unmap/ceph/ceph.yaml +++ b/ceph/qa/suites/krbd/unmap/ceph/ceph.yaml @@ -1,6 +1,11 @@ overrides: ceph: crush_tunables: bobtail + mon_bind_addrvec: false + mon_bind_msgr2: false + conf: + global: + ms bind msgr2: false tasks: - install: - ceph: diff --git a/ceph/qa/suites/krbd/wac/wac/verify/many-resets.yaml b/ceph/qa/suites/krbd/wac/wac/verify/many-resets.yaml index 526897e9c..d69f65031 100644 --- a/ceph/qa/suites/krbd/wac/wac/verify/many-resets.yaml +++ b/ceph/qa/suites/krbd/wac/wac/verify/many-resets.yaml @@ -3,6 +3,8 @@ overrides: conf: global: ms inject socket failures: 500 + log-whitelist: + - \(OSD_SLOW_PING_TIME tasks: - exec: client.0: diff --git a/ceph/qa/suites/rados/basic/msgr-failures/few.yaml b/ceph/qa/suites/rados/basic/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rados/basic/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/basic/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/basic/msgr-failures/many.yaml b/ceph/qa/suites/rados/basic/msgr-failures/many.yaml index 038c3a799..f4bb065b5 100644 --- a/ceph/qa/suites/rados/basic/msgr-failures/many.yaml +++ b/ceph/qa/suites/rados/basic/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 1500 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/dashboard/% b/ceph/qa/suites/rados/dashboard/% new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/rados/dashboard/.qa b/ceph/qa/suites/rados/dashboard/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/clusters/+ b/ceph/qa/suites/rados/dashboard/clusters/+ new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/qa/suites/rados/dashboard/clusters/.qa b/ceph/qa/suites/rados/dashboard/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/clusters/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/clusters/2-node-mgr.yaml b/ceph/qa/suites/rados/dashboard/clusters/2-node-mgr.yaml new file mode 120000 index 000000000..8a0b9123b --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/clusters/2-node-mgr.yaml @@ -0,0 +1 @@ +.qa/clusters/2-node-mgr.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/debug/.qa b/ceph/qa/suites/rados/dashboard/debug/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/debug/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/debug/mgr.yaml b/ceph/qa/suites/rados/dashboard/debug/mgr.yaml new file mode 120000 index 000000000..651e5f8a8 --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/debug/mgr.yaml @@ -0,0 +1 @@ +.qa/debug/mgr.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/objectstore b/ceph/qa/suites/rados/dashboard/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/objectstore @@ -0,0 +1 @@ +.qa/objectstore \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/supported-random-distro$ b/ceph/qa/suites/rados/dashboard/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$ \ No newline at end of file diff --git a/ceph/qa/suites/rados/dashboard/tasks/.qa b/ceph/qa/suites/rados/dashboard/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/rados/dashboard/tasks/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/rados/mgr/tasks/dashboard.yaml b/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml similarity index 97% rename from ceph/qa/suites/rados/mgr/tasks/dashboard.yaml rename to ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml index 76ce83d18..ad6adc7c8 100644 --- a/ceph/qa/suites/rados/mgr/tasks/dashboard.yaml +++ b/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml @@ -23,6 +23,7 @@ tasks: - \(POOL_APP_NOT_ENABLED\) - pauserd,pausewr flag\(s\) set - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running + - evicting unresponsive client .+ - rgw: [client.0] - cephfs_test_runner: fail_on_skip: false diff --git a/ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml b/ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml deleted file mode 100644 index 906c59707..000000000 --- a/ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml +++ /dev/null @@ -1,6 +0,0 @@ -roles: -- [mgr.x, mon.a, mon.c, mds.a, mds.c, osd.0, client.0] -- [mgr.y, mgr.z, mon.b, mds.b, osd.1, osd.2, osd.3, client.1] -log-rotate: - ceph-mds: 10G - ceph-osd: 10G diff --git a/ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml b/ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml new file mode 120000 index 000000000..8a0b9123b --- /dev/null +++ b/ceph/qa/suites/rados/mgr/clusters/2-node-mgr.yaml @@ -0,0 +1 @@ +.qa/clusters/2-node-mgr.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/mgr/clusters/openstack.yaml b/ceph/qa/suites/rados/mgr/clusters/openstack.yaml deleted file mode 100644 index 21eca2bbd..000000000 --- a/ceph/qa/suites/rados/mgr/clusters/openstack.yaml +++ /dev/null @@ -1,4 +0,0 @@ -openstack: - - volumes: # attached to each instance - count: 2 - size: 30 # GB diff --git a/ceph/qa/suites/rados/mgr/debug/mgr.yaml b/ceph/qa/suites/rados/mgr/debug/mgr.yaml deleted file mode 100644 index 068021eb6..000000000 --- a/ceph/qa/suites/rados/mgr/debug/mgr.yaml +++ /dev/null @@ -1,16 +0,0 @@ -overrides: - ceph: - conf: - mon: - debug mon: 20 - mgr: - debug mgr: 20 - debug ms: 1 - client: - debug client: 20 - debug mgrc: 20 - debug ms: 1 - osd: - debug mgrc: 20 - mds: - debug mgrc: 20 diff --git a/ceph/qa/suites/rados/mgr/debug/mgr.yaml b/ceph/qa/suites/rados/mgr/debug/mgr.yaml new file mode 120000 index 000000000..651e5f8a8 --- /dev/null +++ b/ceph/qa/suites/rados/mgr/debug/mgr.yaml @@ -0,0 +1 @@ +.qa/debug/mgr.yaml \ No newline at end of file diff --git a/ceph/qa/suites/rados/mgr/tasks/crash.yaml b/ceph/qa/suites/rados/mgr/tasks/crash.yaml index 77183c74f..7b4c44460 100644 --- a/ceph/qa/suites/rados/mgr/tasks/crash.yaml +++ b/ceph/qa/suites/rados/mgr/tasks/crash.yaml @@ -9,6 +9,7 @@ tasks: - overall HEALTH_ - \(MGR_DOWN\) - \(PG_ + - \(RECENT_CRASH\) - replacing it with standby - No standby daemons available - cephfs_test_runner: diff --git a/ceph/qa/suites/rados/mgr/tasks/insights.yaml b/ceph/qa/suites/rados/mgr/tasks/insights.yaml index 2d5ccb54b..521606656 100644 --- a/ceph/qa/suites/rados/mgr/tasks/insights.yaml +++ b/ceph/qa/suites/rados/mgr/tasks/insights.yaml @@ -11,6 +11,7 @@ tasks: - \(MGR_INSIGHTS_WARNING\) - \(insights_health_check - \(PG_ + - \(RECENT_CRASH\) - replacing it with standby - No standby daemons available - cephfs_test_runner: diff --git a/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml b/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml index 17fa6b395..bdfb6abf6 100644 --- a/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml +++ b/ceph/qa/suites/rados/mgr/tasks/module_selftest.yaml @@ -18,6 +18,7 @@ tasks: - influxdb python module not found - \(MGR_ZABBIX_ - foo bar + - evicting unresponsive client - cephfs_test_runner: modules: - tasks.mgr.test_module_selftest diff --git a/ceph/qa/suites/rados/monthrash/msgr-failures/few.yaml b/ceph/qa/suites/rados/monthrash/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rados/monthrash/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/monthrash/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml b/ceph/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml index da25b7a09..fcd8ca7c2 100644 --- a/ceph/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml +++ b/ceph/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml @@ -9,3 +9,5 @@ overrides: ms inject internal delays: .002 mgr: debug monc: 10 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/multimon/msgr-failures/few.yaml b/ceph/qa/suites/rados/multimon/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rados/multimon/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/multimon/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/multimon/msgr-failures/many.yaml b/ceph/qa/suites/rados/multimon/msgr-failures/many.yaml index 54e6d7f2b..ffeb5f685 100644 --- a/ceph/qa/suites/rados/multimon/msgr-failures/many.yaml +++ b/ceph/qa/suites/rados/multimon/msgr-failures/many.yaml @@ -4,3 +4,5 @@ overrides: global: ms inject socket failures: 1000 mon mgr beacon grace: 90 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/few.yaml b/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/many.yaml b/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/many.yaml index 5f67fcea9..59ca5c0f0 100644 --- a/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/many.yaml +++ b/ceph/qa/suites/rados/singleton-bluestore/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 1000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/singleton-nomsgr/all/balancer.yaml b/ceph/qa/suites/rados/singleton-nomsgr/all/balancer.yaml new file mode 100644 index 000000000..754105082 --- /dev/null +++ b/ceph/qa/suites/rados/singleton-nomsgr/all/balancer.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- ceph: + fs: xfs +- cram: + clients: + client.0: + - src/test/cli-integration/balancer/misplaced.t diff --git a/ceph/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled b/ceph/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled new file mode 100644 index 000000000..56de322eb --- /dev/null +++ b/ceph/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled @@ -0,0 +1,152 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - osd.8 + - osd.9 + - osd.10 + - osd.11 + - osd.12 + - osd.13 + - osd.14 + - client.0 +openstack: + - volumes: # attached to each instance + count: 4 + size: 1 # GB +overrides: + ceph: + conf: + mon: + mon memory target: 134217728 # reduced to 128_M + rocksdb cache size: 67108864 # reduced to 64_M + mon osd cache size: 100000 + mon osd cache size min: 134217728 + osd: + osd memory target: 1610612736 # reduced to 1.5_G + osd objectstore: bluestore + debug bluestore: 20 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 9 + +tasks: +- install: + branch: wip-sseshasa2-testing-2019-07-30-1825 # change as appropriate +- ceph: + create_rbd_pool: false + log-whitelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow requests +- interactive: +- parallel: + - log-mon-rss + - stress-tasks + - benchload +- exec: + client.0: + - "ceph_test_mon_memory_target 134217728" # mon memory target + - "ceph_test_mon_rss_usage 134217728" +log-mon-rss: +- background_exec: + client.0: + - while true + - do /usr/bin/ceph_test_log_rss_usage ceph-mon >> /var/log/ceph/ceph-mon-rss-usage.log + - sleep 300 # log rss usage every 5 mins. May be modified accordingly + - done +- exec: + client.0: + - sleep 37860 # sum total of the radosbench test times below plus 60 secs +benchload: # The total radosbench test below translates to 10.5 hrs +- full_sequential: + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 +stress-tasks: +- thrashosds: + op_delay: 1 + bdev_inject_crash: 1 + bdev_inject_crash_probability: .8 + chance_down: 80 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 + chance_thrash_cluster_full: 0 + chance_thrash_pg_upmap: 3 + chance_thrash_pg_upmap_items: 3 + min_in: 2 diff --git a/ceph/qa/suites/rados/singleton/all/test-crash.yaml b/ceph/qa/suites/rados/singleton/all/test-crash.yaml index 6dbffb48c..8002deaa5 100644 --- a/ceph/qa/suites/rados/singleton/all/test-crash.yaml +++ b/ceph/qa/suites/rados/singleton/all/test-crash.yaml @@ -7,6 +7,7 @@ tasks: log-whitelist: - Reduced data availability - OSD_.*DOWN + - \(RECENT_CRASH\) - workunit: clients: client.0: diff --git a/ceph/qa/suites/rados/singleton/msgr-failures/few.yaml b/ceph/qa/suites/rados/singleton/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rados/singleton/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/singleton/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/singleton/msgr-failures/many.yaml b/ceph/qa/suites/rados/singleton/msgr-failures/many.yaml index cd193532d..20aeb4df2 100644 --- a/ceph/qa/suites/rados/singleton/msgr-failures/many.yaml +++ b/ceph/qa/suites/rados/singleton/msgr-failures/many.yaml @@ -7,3 +7,5 @@ overrides: mon client hunt interval max multiple: 2 mgr: debug monc: 10 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/standalone/workloads/mgr.yaml b/ceph/qa/suites/rados/standalone/workloads/mgr.yaml new file mode 100644 index 000000000..997fae865 --- /dev/null +++ b/ceph/qa/suites/rados/standalone/workloads/mgr.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - mgr diff --git a/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml b/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml index 77fd730af..02121726e 100644 --- a/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml +++ b/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml @@ -4,3 +4,5 @@ overrides: global: ms inject socket failures: 2500 ms tcp read timeout: 5 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml b/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml index 477bffe61..527eadb45 100644 --- a/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml @@ -5,3 +5,5 @@ overrides: ms inject socket failures: 5000 osd: osd heartbeat use min delay socket: true + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml b/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml index a33ba89e1..91c147256 100644 --- a/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml +++ b/ceph/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml @@ -7,3 +7,5 @@ overrides: ms inject delay probability: .005 ms inject delay max: 1 ms inject internal delays: .002 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/thrash/msgr-failures/fastclose.yaml b/ceph/qa/suites/rados/thrash/msgr-failures/fastclose.yaml index 77fd730af..02121726e 100644 --- a/ceph/qa/suites/rados/thrash/msgr-failures/fastclose.yaml +++ b/ceph/qa/suites/rados/thrash/msgr-failures/fastclose.yaml @@ -4,3 +4,5 @@ overrides: global: ms inject socket failures: 2500 ms tcp read timeout: 5 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/thrash/msgr-failures/few.yaml b/ceph/qa/suites/rados/thrash/msgr-failures/few.yaml index 477bffe61..527eadb45 100644 --- a/ceph/qa/suites/rados/thrash/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/thrash/msgr-failures/few.yaml @@ -5,3 +5,5 @@ overrides: ms inject socket failures: 5000 osd: osd heartbeat use min delay socket: true + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml b/ceph/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml index a33ba89e1..91c147256 100644 --- a/ceph/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml +++ b/ceph/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml @@ -7,3 +7,5 @@ overrides: ms inject delay probability: .005 ms inject delay max: 1 ms inject internal delays: .002 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rados/verify/msgr-failures/few.yaml b/ceph/qa/suites/rados/verify/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rados/verify/msgr-failures/few.yaml +++ b/ceph/qa/suites/rados/verify/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rbd/basic/msgr-failures/few.yaml b/ceph/qa/suites/rbd/basic/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rbd/basic/msgr-failures/few.yaml +++ b/ceph/qa/suites/rbd/basic/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rbd/cli/msgr-failures/few.yaml b/ceph/qa/suites/rbd/cli/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rbd/cli/msgr-failures/few.yaml +++ b/ceph/qa/suites/rbd/cli/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rbd/cli_v1/msgr-failures/few.yaml b/ceph/qa/suites/rbd/cli_v1/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rbd/cli_v1/msgr-failures/few.yaml +++ b/ceph/qa/suites/rbd/cli_v1/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rbd/mirror/workloads/rbd-mirror-bootstrap-workunit.yaml b/ceph/qa/suites/rbd/mirror/workloads/rbd-mirror-bootstrap-workunit.yaml new file mode 100644 index 000000000..585f58291 --- /dev/null +++ b/ceph/qa/suites/rbd/mirror/workloads/rbd-mirror-bootstrap-workunit.yaml @@ -0,0 +1,11 @@ +meta: +- desc: run the rbd_mirror_bootstrap.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_bootstrap.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '1' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' diff --git a/ceph/qa/suites/rbd/qemu/msgr-failures/few.yaml b/ceph/qa/suites/rbd/qemu/msgr-failures/few.yaml index 55b6df536..9349b4f9a 100644 --- a/ceph/qa/suites/rbd/qemu/msgr-failures/few.yaml +++ b/ceph/qa/suites/rbd/qemu/msgr-failures/few.yaml @@ -5,3 +5,4 @@ overrides: ms inject socket failures: 5000 log-whitelist: - but it is still running + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rbd/thrash/msgr-failures/few.yaml b/ceph/qa/suites/rbd/thrash/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rbd/thrash/msgr-failures/few.yaml +++ b/ceph/qa/suites/rbd/thrash/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/rgw/multisite/realms/three-zone-plus-pubsub.yaml b/ceph/qa/suites/rgw/multisite/realms/three-zone-plus-pubsub.yaml new file mode 100644 index 000000000..e77e5ade4 --- /dev/null +++ b/ceph/qa/suites/rgw/multisite/realms/three-zone-plus-pubsub.yaml @@ -0,0 +1,23 @@ +overrides: + rgw-multisite: + realm: + name: test-realm + is default: true + zonegroups: + - name: test-zonegroup + is_master: true + is_default: true + endpoints: [c1.client.0] + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + - name: test-zone3 + endpoints: [c1.client.1] + - name: test-zone4 + endpoints: [c2.client.1] + is_pubsub: true diff --git a/ceph/qa/suites/rgw/verify/msgr-failures/few.yaml b/ceph/qa/suites/rgw/verify/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/rgw/verify/msgr-failures/few.yaml +++ b/ceph/qa/suites/rgw/verify/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/smoke/basic/tasks/mon_thrash.yaml b/ceph/qa/suites/smoke/basic/tasks/mon_thrash.yaml index 595ef667d..2144afd2f 100644 --- a/ceph/qa/suites/smoke/basic/tasks/mon_thrash.yaml +++ b/ceph/qa/suites/smoke/basic/tasks/mon_thrash.yaml @@ -13,6 +13,7 @@ overrides: - \(OBJECT_ - \(SLOW_OPS\) - \(TOO_FEW_PGS\) + - \(OSD_SLOW_PING_TIME conf: global: ms inject delay max: 1 diff --git a/ceph/qa/suites/smoke/basic/tasks/rados_bench.yaml b/ceph/qa/suites/smoke/basic/tasks/rados_bench.yaml index 84331efa2..493858d37 100644 --- a/ceph/qa/suites/smoke/basic/tasks/rados_bench.yaml +++ b/ceph/qa/suites/smoke/basic/tasks/rados_bench.yaml @@ -22,6 +22,7 @@ tasks: - \(OBJECT_ - \(SLOW_OPS\) - \(TOO_FEW_PGS\) + - \(OSD_SLOW_PING_TIME - thrashosds: chance_pgnum_grow: 2 chance_pgnum_shrink: 2 diff --git a/ceph/qa/suites/smoke/basic/tasks/rbd_fsx.yaml b/ceph/qa/suites/smoke/basic/tasks/rbd_fsx.yaml index 8440c80c0..3297ef5b8 100644 --- a/ceph/qa/suites/smoke/basic/tasks/rbd_fsx.yaml +++ b/ceph/qa/suites/smoke/basic/tasks/rbd_fsx.yaml @@ -11,6 +11,7 @@ overrides: - \(OBJECT_ - \(SLOW_OPS\) - \(TOO_FEW_PGS\) + - \(OSD_SLOW_PING_TIME conf: client: rbd cache: true diff --git a/ceph/qa/suites/tgt/basic/msgr-failures/few.yaml b/ceph/qa/suites/tgt/basic/msgr-failures/few.yaml index 0de320d46..4326fe23e 100644 --- a/ceph/qa/suites/tgt/basic/msgr-failures/few.yaml +++ b/ceph/qa/suites/tgt/basic/msgr-failures/few.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 5000 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/suites/tgt/basic/msgr-failures/many.yaml b/ceph/qa/suites/tgt/basic/msgr-failures/many.yaml index 86f8dde8a..4caedaebd 100644 --- a/ceph/qa/suites/tgt/basic/msgr-failures/many.yaml +++ b/ceph/qa/suites/tgt/basic/msgr-failures/many.yaml @@ -3,3 +3,5 @@ overrides: conf: global: ms inject socket failures: 500 + log-whitelist: + - \(OSD_SLOW_PING_TIME diff --git a/ceph/qa/tasks/cbt.py b/ceph/qa/tasks/cbt.py index 9f8520f8c..b774cb566 100644 --- a/ceph/qa/tasks/cbt.py +++ b/ceph/qa/tasks/cbt.py @@ -240,6 +240,21 @@ class CBT(Task): cosbench_version = 'cosbench-0.4.2.c3.1' else: cosbench_version = '0.4.2.c3' + # note: stop-all requires 'nc' + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'stop-all.sh', + run.Raw('||'), 'true' + ] + ) + self.first_mon.run( + args=[ + 'sudo', 'killall', '-9', 'java', + run.Raw('||'), 'true' + ] + ) self.first_mon.run( args=[ 'rm', '--one-file-system', '-rf', '--', diff --git a/ceph/qa/tasks/ceph.conf.template b/ceph/qa/tasks/ceph.conf.template index 2566000ad..b246169e7 100644 --- a/ceph/qa/tasks/ceph.conf.template +++ b/ceph/qa/tasks/ceph.conf.template @@ -26,6 +26,7 @@ mon warn on crush straw calc version zero = false mon warn on no sortbitwise = false mon warn on osd down out interval zero = false + mon warn on too few osds = false osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd" @@ -36,6 +37,9 @@ mon cluster log file level = debug debug asserts on shutdown = true + # we see this fail in qa on *nautilus*; bump up retries + mon_client_directed_command_retry = 4 + [osd] osd journal size = 100 diff --git a/ceph/qa/tasks/ceph.py b/ceph/qa/tasks/ceph.py index 7f0976536..e07d331e9 100644 --- a/ceph/qa/tasks/ceph.py +++ b/ceph/qa/tasks/ceph.py @@ -1625,14 +1625,10 @@ def restart(ctx, config): ctx.daemons.get_daemon(type_, id_, cluster).restart() clusters.add(cluster) - for cluster in clusters: - manager = ctx.managers[cluster] - for dmon in daemons: - if '.' in dmon: - dm_parts = dmon.split('.') - if dm_parts[1].isdigit(): - if dm_parts[0] == 'osd': - manager.mark_down_osd(int(dm_parts[1])) + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + if type_ == 'osd': + ctx.managers[cluster].mark_down_osd(id_) if config.get('wait-for-healthy', True): for cluster in clusters: @@ -1938,11 +1934,13 @@ def task(ctx, config): # a bunch of scary messages unrelated to our actual run. firstmon = teuthology.get_first_mon(ctx, config, config['cluster']) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + # try this several times, since tell to mons is lossy. mon0_remote.run( args=[ 'sudo', 'ceph', '--cluster', config['cluster'], + '--mon-client-directed-command-retry', '5', 'tell', 'mon.*', 'injectargs', diff --git a/ceph/qa/tasks/cephfs/fuse_mount.py b/ceph/qa/tasks/cephfs/fuse_mount.py index 0c0c84b75..bbd56b3c5 100644 --- a/ceph/qa/tasks/cephfs/fuse_mount.py +++ b/ceph/qa/tasks/cephfs/fuse_mount.py @@ -166,6 +166,7 @@ class FuseMount(CephFSMount): def gather_mount_info(self): status = self.admin_socket(['status']) self.id = status['id'] + self.client_pid = status['metadata']['pid'] try: self.inst = status['inst_str'] self.addr = status['addr_str'] diff --git a/ceph/qa/tasks/cephfs/kernel_mount.py b/ceph/qa/tasks/cephfs/kernel_mount.py index 6b128f572..3c33cc83c 100644 --- a/ceph/qa/tasks/cephfs/kernel_mount.py +++ b/ceph/qa/tasks/cephfs/kernel_mount.py @@ -1,6 +1,7 @@ from StringIO import StringIO import json import logging +import time from textwrap import dedent from teuthology.orchestra.run import CommandFailedError from teuthology import misc @@ -17,46 +18,21 @@ UMOUNT_TIMEOUT = 300 class KernelMount(CephFSMount): - def __init__(self, ctx, mons, test_dir, client_id, client_remote, + def __init__(self, ctx, test_dir, client_id, client_remote, ipmi_user, ipmi_password, ipmi_domain): super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote) - self.mons = mons self.mounted = False self.ipmi_user = ipmi_user self.ipmi_password = ipmi_password self.ipmi_domain = ipmi_domain - def write_secret_file(self, remote, role, keyring, filename): - """ - Stash the keyring in the filename specified. - """ - remote.run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=self.test_dir), - 'ceph-authtool', - '--name={role}'.format(role=role), - '--print-key', - keyring, - run.Raw('>'), - filename, - ], - timeout=(5*60), - ) - def mount(self, mount_path=None, mount_fs_name=None): self.setupfs(name=mount_fs_name) log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) - keyring = self.get_keyring_path() - secret = '{tdir}/ceph.data/client.{id}.secret'.format(tdir=self.test_dir, id=self.client_id) - self.write_secret_file(self.client_remote, 'client.{id}'.format(id=self.client_id), - keyring, secret) - self.client_remote.run( args=[ 'mkdir', @@ -69,8 +45,8 @@ class KernelMount(CephFSMount): if mount_path is None: mount_path = "/" - opts = 'name={id},secretfile={secret},norequire_active_mds'.format(id=self.client_id, - secret=secret) + opts = 'name={id},norequire_active_mds,conf={conf}'.format(id=self.client_id, + conf=self.config_path) if mount_fs_name is not None: opts += ",mds_namespace={0}".format(mount_fs_name) @@ -81,8 +57,10 @@ class KernelMount(CephFSMount): 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), - '/sbin/mount.ceph', - '{mons}:{mount_path}'.format(mons=','.join(self.mons), mount_path=mount_path), + '/bin/mount', + '-t', + 'ceph', + ':{mount_path}'.format(mount_path=mount_path), self.mountpoint, '-v', '-o', @@ -176,21 +154,31 @@ class KernelMount(CephFSMount): self.ipmi_user, self.ipmi_password, self.ipmi_domain) - con.power_off() + con.hard_reset(wait_for_login=False) self.mounted = False def kill_cleanup(self): assert not self.mounted - con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, - self.ipmi_user, - self.ipmi_password, - self.ipmi_domain) - con.power_on() + # We need to do a sleep here because we don't know how long it will + # take for a hard_reset to be effected. + time.sleep(30) - # Wait for node to come back up after reboot - misc.reconnect(None, 300, [self.client_remote]) + try: + # Wait for node to come back up after reboot + misc.reconnect(None, 300, [self.client_remote]) + except: + # attempt to get some useful debug output: + con = orchestra_remote.getRemoteConsole(self.client_remote.hostname, + self.ipmi_user, + self.ipmi_password, + self.ipmi_domain) + con.check_status(timeout=60) + raise + + # Remove mount directory + self.client_remote.run(args=['uptime'], timeout=10) # Remove mount directory self.client_remote.run( diff --git a/ceph/qa/tasks/cephfs/mount.py b/ceph/qa/tasks/cephfs/mount.py index 4bbad2542..8d07323d8 100644 --- a/ceph/qa/tasks/cephfs/mount.py +++ b/ceph/qa/tasks/cephfs/mount.py @@ -502,6 +502,14 @@ class CephFSMount(object): self._kill_background(p) self.background_procs.remove(p) + def send_signal(self, signal): + signal = signal.lower() + if signal.lower() not in ['sigstop', 'sigcont', 'sigterm', 'sigkill']: + raise NotImplementedError + + self.client_remote.run(args=['sudo', 'kill', '-{0}'.format(signal), + self.client_pid], omit_sudo=False) + def get_global_id(self): raise NotImplementedError() diff --git a/ceph/qa/tasks/cephfs/test_client_recovery.py b/ceph/qa/tasks/cephfs/test_client_recovery.py index 496b2faa1..24a3b4a69 100644 --- a/ceph/qa/tasks/cephfs/test_client_recovery.py +++ b/ceph/qa/tasks/cephfs/test_client_recovery.py @@ -238,6 +238,9 @@ class TestClientRecovery(CephFSTestCase): # Simulate client death self.mount_a.kill() + # wait for it to die so it doesn't voluntarily release buffer cap + time.sleep(5) + try: # Now, after session_timeout seconds, the waiter should # complete their operation when the MDS marks the holder's @@ -299,6 +302,9 @@ class TestClientRecovery(CephFSTestCase): # Simulate client death self.mount_a.kill() + # wait for it to die so it doesn't voluntarily release buffer cap + time.sleep(5) + try: # The waiter should get stuck waiting for the capability # held on the MDS by the now-dead client A @@ -569,3 +575,37 @@ class TestClientRecovery(CephFSTestCase): self.assert_session_state(gid, "open") time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale self.assert_session_state(gid, "stale") + + def test_dont_mark_unresponsive_client_stale(self): + """ + Test that an unresponsive client holding caps is not marked stale or + evicted unless another clients wants its caps. + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to handle signal STOP/CONT") + + # XXX: To conduct this test we need at least two clients since a + # single client is never evcited by MDS. + SESSION_TIMEOUT = 30 + SESSION_AUTOCLOSE = 50 + time_at_beg = time.time() + mount_a_gid = self.mount_a.get_global_id() + mount_a_pid = self.mount_a.client_pid + self.fs.set_var('session_timeout', SESSION_TIMEOUT) + self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE) + self.assert_session_count(2, self.fs.mds_asok(['session', 'ls'])) + + # test that client holding cap not required by any other client is not + # marked stale when it becomes unresponsive. + self.mount_a.run_shell(['mkdir', 'dir']) + self.mount_a.send_signal('sigstop') + time.sleep(SESSION_TIMEOUT + 2) + self.assert_session_state(mount_a_gid, "open") + + # test that other clients have to wait to get the caps from + # unresponsive client until session_autoclose. + self.mount_b.run_shell(['stat', 'dir']) + self.assert_session_count(1, self.fs.mds_asok(['session', 'ls'])) + self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE) + + self.mount_a.send_signal('sigcont') diff --git a/ceph/qa/tasks/cephfs/test_failover.py b/ceph/qa/tasks/cephfs/test_failover.py index f1bb0f22a..07702435a 100644 --- a/ceph/qa/tasks/cephfs/test_failover.py +++ b/ceph/qa/tasks/cephfs/test_failover.py @@ -365,7 +365,7 @@ class TestStandbyReplay(CephFSTestCase): self.assertEqual(0, len(list(self.fs.get_replays(status=status)))) return status - def _confirm_single_replay(self, full=True, status=None): + def _confirm_single_replay(self, full=True, status=None, retries=3): status = self.fs.wait_for_daemons(status=status) ranks = sorted(self.fs.get_mds_map(status=status)['in']) replays = list(self.fs.get_replays(status=status)) @@ -378,7 +378,11 @@ class TestStandbyReplay(CephFSTestCase): has_replay = True checked_replays.add(replay['gid']) if full and not has_replay: - raise RuntimeError("rank "+str(rank)+" has no standby-replay follower") + if retries <= 0: + raise RuntimeError("rank "+str(rank)+" has no standby-replay follower") + else: + retries = retries-1 + time.sleep(2) self.assertEqual(checked_replays, set(info['gid'] for info in replays)) return status diff --git a/ceph/qa/tasks/cephfs/test_volume_client.py b/ceph/qa/tasks/cephfs/test_volume_client.py index 8ab632b92..fcf308544 100644 --- a/ceph/qa/tasks/cephfs/test_volume_client.py +++ b/ceph/qa/tasks/cephfs/test_volume_client.py @@ -6,6 +6,7 @@ from textwrap import dedent from tasks.cephfs.cephfs_test_case import CephFSTestCase from tasks.cephfs.fuse_mount import FuseMount from teuthology.exceptions import CommandFailedError +from teuthology.misc import sudo_write_file log = logging.getLogger(__name__) @@ -14,11 +15,12 @@ class TestVolumeClient(CephFSTestCase): # One for looking at the global filesystem, one for being # the VolumeClient, two for mounting the created shares CLIENTS_REQUIRED = 4 - py_version = 'python' + default_py_version = 'python3' def setUp(self): CephFSTestCase.setUp(self) - self.py_version = self.ctx.config.get('overrides', {}).get('python', 'python') + self.py_version = self.ctx.config.get('overrides', {}).\ + get('python', TestVolumeClient.default_py_version) log.info("using python version: {python_version}".format( python_version=self.py_version )) @@ -33,6 +35,8 @@ class TestVolumeClient(CephFSTestCase): return client.run_python(""" from __future__ import print_function from ceph_volume_client import CephFSVolumeClient, VolumePath +from sys import version_info as sys_version_info +from rados import OSError as rados_OSError import logging log = logging.getLogger("ceph_volume_client") log.addHandler(logging.StreamHandler()) @@ -45,27 +49,6 @@ vc.disconnect() vol_prefix=vol_prefix, ns_prefix=ns_prefix), self.py_version) - def _sudo_write_file(self, remote, path, data): - """ - Write data to a remote file as super user - - :param remote: Remote site. - :param path: Path on the remote being written to. - :param data: Data to be written. - - Both perms and owner are passed directly to chmod. - """ - remote.run( - args=[ - 'sudo', - 'python', - '-c', - 'import shutil, sys; shutil.copyfileobj(sys.stdin, file(sys.argv[1], "wb"))', - path, - ], - stdin=data, - ) - def _configure_vc_auth(self, mount, id_name): """ Set up auth credentials for the VolumeClient user @@ -77,7 +60,7 @@ vc.disconnect() "mon", "allow *" ) mount.client_id = id_name - self._sudo_write_file(mount.client_remote, mount.get_keyring_path(), out) + sudo_write_file(mount.client_remote, mount.get_keyring_path(), out) self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path()) def _configure_guest_auth(self, volumeclient_mount, guest_mount, @@ -140,9 +123,8 @@ vc.disconnect() key=key )) guest_mount.client_id = guest_entity - self._sudo_write_file(guest_mount.client_remote, - guest_mount.get_keyring_path(), - keyring_txt) + sudo_write_file(guest_mount.client_remote, + guest_mount.get_keyring_path(), keyring_txt) # Add a guest client section to the ceph config file. self.set_conf("client.{0}".format(guest_entity), "client quota", "True") @@ -990,6 +972,29 @@ vc.disconnect() vc_mount.umount_wait() self._configure_vc_auth(vc_mount, "manila") + obj_data = 'test_data' + obj_name = 'test_vc_obj' + pool_name = self.fs.get_data_pool_names()[0] + self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data) + + self._volume_client_python(vc_mount, dedent(""" + data, version_before = vc.get_object_and_version("{pool_name}", "{obj_name}") + + if sys_version_info.major < 3: + data = data + 'modification1' + elif sys_version_info.major > 3: + data = str.encode(data.decode() + 'modification1') + + vc.put_object_versioned("{pool_name}", "{obj_name}", data, version_before) + data, version_after = vc.get_object_and_version("{pool_name}", "{obj_name}") + assert version_after == version_before + 1 + """).format(pool_name=pool_name, obj_name=obj_name)) + + def test_version_check_for_put_object_versioned(self): + vc_mount = self.mounts[1] + vc_mount.umount_wait() + self._configure_vc_auth(vc_mount, "manila") + obj_data = 'test_data' obj_name = 'test_vc_ob_2' pool_name = self.fs.get_data_pool_names()[0] @@ -997,14 +1002,30 @@ vc.disconnect() # Test if put_object_versioned() crosschecks the version of the # given object. Being a negative test, an exception is expected. - with self.assertRaises(CommandFailedError): - self._volume_client_python(vc_mount, dedent(""" - data, version = vc.get_object_and_version("{pool_name}", "{obj_name}") - data += 'm1' - vc.put_object("{pool_name}", "{obj_name}", data) - data += 'm2' + expected_exception = 'rados_OSError' + output = self._volume_client_python(vc_mount, dedent(""" + data, version = vc.get_object_and_version("{pool_name}", "{obj_name}") + + if sys_version_info.major < 3: + data = data + 'm1' + elif sys_version_info.major > 3: + data = str.encode(data.decode('utf-8') + 'm1') + + vc.put_object("{pool_name}", "{obj_name}", data) + + if sys_version_info.major < 3: + data = data + 'm2' + elif sys_version_info.major > 3: + data = str.encode(data.decode('utf-8') + 'm2') + + try: vc.put_object_versioned("{pool_name}", "{obj_name}", data, version) - """).format(pool_name=pool_name, obj_name=obj_name)) + except {expected_exception}: + print('{expected_exception} raised') + """).format(pool_name=pool_name, obj_name=obj_name, + expected_exception=expected_exception)) + self.assertEqual(expected_exception + ' raised', output) + def test_delete_object(self): vc_mount = self.mounts[1] diff --git a/ceph/qa/tasks/cephfs/test_volumes.py b/ceph/qa/tasks/cephfs/test_volumes.py index eb94251ca..f7f20f005 100644 --- a/ceph/qa/tasks/cephfs/test_volumes.py +++ b/ceph/qa/tasks/cephfs/test_volumes.py @@ -3,6 +3,7 @@ import json import errno import random import logging +import collections from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.exceptions import CommandFailedError @@ -63,7 +64,7 @@ class TestVolumes(CephFSTestCase): return path[1:].rstrip() def _delete_test_volume(self): - self._fs_cmd("volume", "rm", self.volname) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") def _do_subvolume_io(self, subvolume, number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE): @@ -95,6 +96,23 @@ class TestVolumes(CephFSTestCase): self._delete_test_volume() super(TestVolumes, self).tearDown() + def test_volume_rm(self): + try: + self._fs_cmd("volume", "rm", self.volname) + except CommandFailedError as ce: + if ce.exitstatus != errno.EPERM: + raise RuntimeError("expected the 'fs volume rm' command to fail with EPERM, " + "but it failed with {0}".format(ce.exitstatus)) + else: + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if it's gone + volumes = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd('fs', 'volume', 'ls', '--format=json-pretty')) + if (self.volname in [volume['name'] for volume in volumes]): + raise RuntimeError("Expected the 'fs volume rm' command to succeed. The volume {0} not removed.".format(self.volname)) + else: + raise RuntimeError("expected the 'fs volume rm' command to fail.") + ### basic subvolume operations def test_subvolume_create_and_rm(self): @@ -132,6 +150,47 @@ class TestVolumes(CephFSTestCase): # verify trash dir is clean self._wait_for_trash_empty() + def test_subvolume_create_with_invalid_data_pool_layout(self): + subvolume = self._generate_random_subvolume_name() + data_pool = "invalid_pool" + # create subvolume with invalid data pool layout + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise + # clean up + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") + + def test_subvolume_create_with_auto_cleanup_on_fail(self): + subvolume = self._generate_random_subvolume_name() + data_pool = "invalid_pool" + # create subvolume with invalid data pool layout fails + with self.assertRaises(CommandFailedError): + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + + # check whether subvol path is cleaned up + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise + + def test_subvolume_create_with_invalid_size(self): + # create subvolume with an invalid size -1 + subvolume = self._generate_random_subvolume_name() + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1") + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs subvolume create' command to fail") + def test_nonexistent_subvolume_rm(self): # remove non-existing subvolume subvolume = "non_existent_subvolume" @@ -174,6 +233,36 @@ class TestVolumes(CephFSTestCase): # remove subvolume self._fs_cmd("subvolume", "rm", self.volname, subvolume) + def test_subvolume_ls(self): + # tests the 'fs subvolume ls' command + + subvolumes = [] + + # create subvolumes + for i in range(3): + svname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, svname) + subvolumes.append(svname) + + # list subvolumes + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + if len(subvolumels) == 0: + raise RuntimeError("Expected the 'fs subvolume ls' command to list the created subvolumes.") + else: + subvolnames = [subvolume['name'] for subvolume in subvolumels] + if collections.Counter(subvolnames) != collections.Counter(subvolumes): + raise RuntimeError("Error creating or listing subvolumes") + + def test_subvolume_ls_for_notexistent_default_group(self): + # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist + # prerequisite: we expect that the volume is created and the default group _nogroup is + # NOT created (i.e. a subvolume without group is not created) + + # list subvolumes + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + if len(subvolumels) > 0: + raise RuntimeError("Expected the 'fs subvolume ls' command to output an empty list.") + ### subvolume group operations def test_subvolume_create_and_rm_in_group(self): @@ -221,6 +310,36 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolumegroup", "rm", self.volname, group1) self._fs_cmd("subvolumegroup", "rm", self.volname, group2) + def test_subvolume_group_create_with_invalid_data_pool_layout(self): + group = self._generate_random_group_name() + data_pool = "invalid_pool" + # create group with invalid data pool layout + try: + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise + # clean up + self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force") + + def test_subvolume_group_create_with_auto_cleanup_on_fail(self): + group = self._generate_random_group_name() + data_pool = "invalid_pool" + # create group with invalid data pool layout + with self.assertRaises(CommandFailedError): + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool) + + # check whether group path is cleaned up + try: + self._fs_cmd("subvolumegroup", "getpath", self.volname, group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise + def test_subvolume_create_with_desired_data_pool_layout_in_group(self): subvol1 = self._generate_random_subvolume_name() subvol2 = self._generate_random_subvolume_name() @@ -343,6 +462,34 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + def test_subvolume_group_ls(self): + # tests the 'fs subvolumegroup ls' command + + subvolumegroups = [] + + #create subvolumegroups + for i in range(3): + groupname = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + subvolumegroups.append(groupname) + + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + if len(subvolumegroupls) == 0: + raise RuntimeError("Expected the 'fs subvolumegroup ls' command to list the created subvolume groups") + else: + subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls] + if collections.Counter(subvolgroupnames) != collections.Counter(subvolumegroups): + raise RuntimeError("Error creating or listing subvolume groups") + + def test_subvolume_group_ls_for_nonexistent_volume(self): + # tests the 'fs subvolumegroup ls' command when /volume doesn't exist + # prerequisite: we expect that the test volume is created and a subvolumegroup is NOT created + + # list subvolume groups + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + if len(subvolumegroupls) > 0: + raise RuntimeError("Expected the 'fs subvolumegroup ls' command to output an empty list") + ### snapshot operations def test_subvolume_snapshot_create_and_rm(self): @@ -441,6 +588,29 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + def test_subvolume_snapshot_ls(self): + # tests the 'fs subvolume snapshot ls' command + + snapshots = [] + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # create subvolume snapshots + for i in range(3): + sname = self._generate_random_snapshot_name() + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, sname) + snapshots.append(sname) + + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + if len(subvolsnapshotls) == 0: + raise RuntimeError("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots") + else: + snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] + if collections.Counter(snapshotnames) != collections.Counter(snapshots): + raise RuntimeError("Error creating or listing subvolume snapshots") + def test_subvolume_group_snapshot_create_and_rm(self): subvolume = self._generate_random_subvolume_name() group = self._generate_random_group_name() @@ -529,6 +699,29 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + def test_subvolume_group_snapshot_ls(self): + # tests the 'fs subvolumegroup snapshot ls' command + + snapshots = [] + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumegroup snapshots + for i in range(3): + sname = self._generate_random_snapshot_name() + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, sname) + snapshots.append(sname) + + subvolgrpsnapshotls = json.loads(self._fs_cmd('subvolumegroup', 'snapshot', 'ls', self.volname, group)) + if len(subvolgrpsnapshotls) == 0: + raise RuntimeError("Expected the 'fs subvolumegroup snapshot ls' command to list the created subvolume group snapshots") + else: + snapshotnames = [snapshot['name'] for snapshot in subvolgrpsnapshotls] + if collections.Counter(snapshotnames) != collections.Counter(snapshots): + raise RuntimeError("Error creating or listing subvolume group snapshots") + def test_async_subvolume_rm(self): subvolume = self._generate_random_subvolume_name() diff --git a/ceph/qa/tasks/kclient.py b/ceph/qa/tasks/kclient.py index 5e217b323..88f27366a 100644 --- a/ceph/qa/tasks/kclient.py +++ b/ceph/qa/tasks/kclient.py @@ -72,13 +72,6 @@ def task(ctx, config): test_dir = misc.get_testdir(ctx) - # Assemble mon addresses - remotes_and_roles = ctx.cluster.remotes.items() - roles = [roles for (remote_, roles) in remotes_and_roles] - ips = [remote_.ssh.get_transport().getpeername()[0] - for (remote_, _) in remotes_and_roles] - mons = misc.get_mons(roles, ips).values() - mounts = {} for id_, remote in clients: client_config = config.get("client.%s" % id_) @@ -90,7 +83,6 @@ def task(ctx, config): kernel_mount = KernelMount( ctx, - mons, test_dir, id_, remote, diff --git a/ceph/qa/tasks/mgr/dashboard/test_mgr_module.py b/ceph/qa/tasks/mgr/dashboard/test_mgr_module.py index af0036904..154700e0b 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_mgr_module.py +++ b/ceph/qa/tasks/mgr/dashboard/test_mgr_module.py @@ -12,11 +12,6 @@ logger = logging.getLogger(__name__) class MgrModuleTestCase(DashboardTestCase): MGRS_REQUIRED = 1 - @classmethod - def tearDownClass(cls): - cls._ceph_cmd(['mgr', 'module', 'disable', 'telemetry']) - super(MgrModuleTestCase, cls).tearDownClass() - def wait_until_rest_api_accessible(self): """ Wait until the REST API is accessible. @@ -37,7 +32,7 @@ class MgrModuleTestCase(DashboardTestCase): class MgrModuleTest(MgrModuleTestCase): def test_list_disabled_module(self): - self._ceph_cmd(['mgr', 'module', 'disable', 'telemetry']) + self._ceph_cmd(['mgr', 'module', 'disable', 'iostat']) self.wait_until_rest_api_accessible() data = self._get('/api/mgr/module') self.assertStatus(200) @@ -65,12 +60,12 @@ class MgrModuleTest(MgrModuleTestCase): 'tags': JList(str) })) }))) - module_info = self.find_object_in_list('name', 'telemetry', data) + module_info = self.find_object_in_list('name', 'iostat', data) self.assertIsNotNone(module_info) self.assertFalse(module_info['enabled']) def test_list_enabled_module(self): - self._ceph_cmd(['mgr', 'module', 'enable', 'telemetry']) + self._ceph_cmd(['mgr', 'module', 'enable', 'iostat']) self.wait_until_rest_api_accessible() data = self._get('/api/mgr/module') self.assertStatus(200) @@ -98,7 +93,7 @@ class MgrModuleTest(MgrModuleTestCase): 'tags': JList(str) })) }))) - module_info = self.find_object_in_list('name', 'telemetry', data) + module_info = self.find_object_in_list('name', 'iostat', data) self.assertIsNotNone(module_info) self.assertTrue(module_info['enabled']) @@ -110,15 +105,21 @@ class MgrModuleTelemetryTest(MgrModuleTestCase): self.assertSchema( data, JObj( + allow_unknown=True, sub_elems={ - 'contact': JLeaf(str), - 'description': JLeaf(str), - 'enabled': JLeaf(bool), - 'interval': JLeaf(int), - 'leaderboard': JLeaf(bool), - 'organization': JLeaf(str), - 'proxy': JLeaf(str), - 'url': JLeaf(str) + 'channel_basic': bool, + 'channel_ident': bool, + 'channel_crash': bool, + 'channel_device': bool, + 'contact': str, + 'description': str, + 'enabled': bool, + 'interval': int, + 'last_opt_revision': int, + 'leaderboard': bool, + 'organization': str, + 'proxy': str, + 'url': str })) def test_put(self): @@ -155,37 +156,3 @@ class MgrModuleTelemetryTest(MgrModuleTestCase): self.assertEqual(data['organization'], 'SUSE Linux') self.assertEqual(data['proxy'], 'foo') self.assertEqual(data['url'], 'https://foo.bar/report') - - def test_enable(self): - self._ceph_cmd(['mgr', 'module', 'disable', 'telemetry']) - self.wait_until_rest_api_accessible() - try: - # Note, an exception is thrown because the Ceph Mgr - # modules are reloaded. - self._post('/api/mgr/module/telemetry/enable') - except requests.ConnectionError: - pass - self.wait_until_rest_api_accessible() - data = self._get('/api/mgr/module') - self.assertStatus(200) - module_info = self.find_object_in_list('name', 'telemetry', data) - self.assertIsNotNone(module_info) - self.assertTrue(module_info['enabled']) - - def test_disable(self): - # Enable the 'telemetry' module (all CephMgr modules are restarted) - # and wait until the Dashboard REST API is accessible. - self._ceph_cmd(['mgr', 'module', 'enable', 'telemetry']) - self.wait_until_rest_api_accessible() - try: - # Note, an exception is thrown because the Ceph Mgr - # modules are reloaded. - self._post('/api/mgr/module/telemetry/disable') - except requests.ConnectionError: - pass - self.wait_until_rest_api_accessible() - data = self._get('/api/mgr/module') - self.assertStatus(200) - module_info = self.find_object_in_list('name', 'telemetry', data) - self.assertIsNotNone(module_info) - self.assertFalse(module_info['enabled']) diff --git a/ceph/qa/tasks/mgr/dashboard/test_pool.py b/ceph/qa/tasks/mgr/dashboard/test_pool.py index 82acd606e..12306801d 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_pool.py +++ b/ceph/qa/tasks/mgr/dashboard/test_pool.py @@ -235,7 +235,10 @@ class PoolTest(DashboardTestCase): # they can't recover from the resulting warning state. # Feel free to test it locally. # { - # 'pg_num': '8', + # 'pg_num': '2', # Decrease PGs + # }, + # { + # 'pg_num': '8', # Increase PGs # }, { 'application_metadata': ['rgw'], diff --git a/ceph/qa/tasks/mgr/dashboard/test_rbd.py b/ceph/qa/tasks/mgr/dashboard/test_rbd.py index b1c64bbdd..039c8cc22 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_rbd.py +++ b/ceph/qa/tasks/mgr/dashboard/test_rbd.py @@ -114,8 +114,8 @@ class RbdTest(DashboardTestCase): @classmethod def setUpClass(cls): super(RbdTest, cls).setUpClass() - cls.create_pool('rbd', 10, 'replicated') - cls.create_pool('rbd_iscsi', 10, 'replicated') + cls.create_pool('rbd', 2**3, 'replicated') + cls.create_pool('rbd_iscsi', 2**3, 'replicated') cls.create_image('rbd', 'img1', 2**30) cls.create_image('rbd', 'img2', 2*2**30) @@ -324,7 +324,7 @@ class RbdTest(DashboardTestCase): if not self.bluestore_support: self.skipTest('requires bluestore cluster') - self.create_pool('data_pool', 12, 'erasure') + self.create_pool('data_pool', 2**4, 'erasure') rbd_name = 'test_rbd_in_data_pool' self.create_image('rbd', rbd_name, 10240, data_pool='data_pool') diff --git a/ceph/qa/tasks/mgr/dashboard/test_rgw.py b/ceph/qa/tasks/mgr/dashboard/test_rgw.py index b4c0676b0..34f337f8d 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_rgw.py +++ b/ceph/qa/tasks/mgr/dashboard/test_rgw.py @@ -114,16 +114,22 @@ class RgwBucketTest(RgwTestCase): def setUpClass(cls): cls.create_test_user = True super(RgwBucketTest, cls).setUpClass() - # Create a tenanted user. + # Create tenanted users. cls._radosgw_admin_cmd([ 'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user', '--display-name', 'tenanted teuth-test-user' ]) + cls._radosgw_admin_cmd([ + 'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user2', + '--display-name', 'tenanted teuth-test-user 2' + ]) @classmethod def tearDownClass(cls): cls._radosgw_admin_cmd( ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user']) + cls._radosgw_admin_cmd( + ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user2']) super(RgwBucketTest, cls).tearDownClass() def test_all(self): @@ -232,7 +238,22 @@ class RgwBucketTest(RgwTestCase): self.assertEqual(data['tenant'], 'testx') self.assertEqual(data['bid'], 'testx/teuth-test-bucket') - # Update the bucket. + # Update bucket: different user from same tenant. + self._put( + '/api/rgw/bucket/{}'.format( + urllib.quote_plus('testx/teuth-test-bucket')), + params={ + 'bucket_id': data['id'], + 'uid': 'testx$teuth-test-user2' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/{}'.format( + urllib.quote_plus('testx/teuth-test-bucket'))) + self.assertStatus(200) + self.assertIn('owner', data) + self.assertEqual(data['owner'], 'testx$teuth-test-user2') + + # Update bucket: different user from empty tenant. self._put( '/api/rgw/bucket/{}'.format( urllib.quote_plus('testx/teuth-test-bucket')), diff --git a/ceph/qa/tasks/mgr/test_dashboard.py b/ceph/qa/tasks/mgr/test_dashboard.py index 3b778520d..b0cf200d6 100644 --- a/ceph/qa/tasks/mgr/test_dashboard.py +++ b/ceph/qa/tasks/mgr/test_dashboard.py @@ -20,6 +20,14 @@ class TestDashboard(MgrTestCase): self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", "create-self-signed-cert") + def tearDown(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_behaviour", + "redirect") + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_error_status_code", + "500") + def test_standby(self): original_active_id = self.mgr_cluster.get_active_id() original_uri = self._get_uri("dashboard") @@ -46,6 +54,42 @@ class TestDashboard(MgrTestCase): self.assertEqual(r.status_code, 303) self.assertEqual(r.headers['Location'], failed_over_uri) + def test_standby_disable_redirect(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_behaviour", + "error") + + original_active_id = self.mgr_cluster.get_active_id() + original_uri = self._get_uri("dashboard") + log.info("Originally running manager '{}' at {}".format( + original_active_id, original_uri)) + + # Force a failover and wait until the previously active manager + # is listed as standby. + self.mgr_cluster.mgr_fail(original_active_id) + self.wait_until_true( + lambda: original_active_id in self.mgr_cluster.get_standby_ids(), + timeout=30) + + failed_active_id = self.mgr_cluster.get_active_id() + failed_over_uri = self._get_uri("dashboard") + log.info("After failover running manager '{}' at {}".format( + failed_active_id, failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # Redirection should be disabled now, instead a 500 must be returned. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 500) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_error_status_code", + "503") + + # The customized HTTP status code (503) must be returned. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 503) + def test_urls(self): base_uri = self._get_uri("dashboard") diff --git a/ceph/qa/tasks/mgr/test_insights.py b/ceph/qa/tasks/mgr/test_insights.py index 37fe0a89c..8f0f41ceb 100644 --- a/ceph/qa/tasks/mgr/test_insights.py +++ b/ceph/qa/tasks/mgr/test_insights.py @@ -163,23 +163,6 @@ class TestInsights(MgrTestCase): report = self._insights() self.assertFalse(report["health"]["history"]["checks"]) - def test_insights_health(self): - """The insights module reports health checks""" - self._add_crash(1, True) # add invalid crash data - timeout = 10 - while timeout > 0: - time.sleep(1) - timeout -= 1 - # should observe a health check because it can't read the invalid - # crash data created at the beginning of this test - report = self._insights() - if "MGR_INSIGHTS_WARNING" in report["health"]["current"]["checks"]: - self._clear_crashes() - return - self._clear_crashes() - self.fail("Insights module did not set health check") - pass - def test_schema(self): """TODO: assert conformance to a full schema specification?""" report = self._insights() @@ -214,10 +197,4 @@ class TestInsights(MgrTestCase): self.assertFalse(report["errors"]) log.warning("{}".format(json.dumps(report["crashes"], indent=2))) - # handling of comm. error with crash module - self._add_crash(1, True) - report = self._insights() - self.assertFalse(report["crashes"]["summary"]) - self.assertTrue(report["errors"]) - self._clear_crashes() diff --git a/ceph/qa/tasks/mgr/test_module_selftest.py b/ceph/qa/tasks/mgr/test_module_selftest.py index 7e6a5110c..bec2786c3 100644 --- a/ceph/qa/tasks/mgr/test_module_selftest.py +++ b/ceph/qa/tasks/mgr/test_module_selftest.py @@ -78,6 +78,10 @@ class TestModuleSelftest(MgrTestCase): def test_crash(self): self._selftest_plugin("crash") + def test_orchestrator_cli(self): + self._selftest_plugin("orchestrator_cli") + + def test_selftest_config_update(self): """ That configuration updates are seen by running mgr modules diff --git a/ceph/qa/tasks/rgw_multisite.py b/ceph/qa/tasks/rgw_multisite.py index a41238daa..9dea39312 100644 --- a/ceph/qa/tasks/rgw_multisite.py +++ b/ceph/qa/tasks/rgw_multisite.py @@ -11,6 +11,7 @@ from util.rgw import rgwadmin, wait_for_radosgw from util.rados import create_ec_pool, create_replicated_pool from rgw_multi import multisite from rgw_multi.zone_rados import RadosZone as RadosZone +from rgw_multi.zone_ps import PSZone as PSZone from teuthology.orchestra import run from teuthology import misc @@ -33,6 +34,7 @@ class RGWMultisite(Task): * 'is_master' is passed on the command line as --master * 'is_default' is passed on the command line as --default + * 'is_pubsub' is used to create a zone with tier-type=pubsub * 'endpoints' given as client names are replaced with actual endpoints zonegroups: @@ -78,6 +80,9 @@ class RGWMultisite(Task): - name: test-zone2 is_default: true endpoints: [c2.client.0] + - name: test-zone3 + is_pubsub: true + endpoints: [c1.client.1] """ def __init__(self, ctx, config): @@ -369,7 +374,10 @@ def create_zonegroup(cluster, gateways, period, config): def create_zone(ctx, cluster, gateways, creds, zonegroup, config): """ create a zone with the given configuration """ zone = multisite.Zone(config['name'], zonegroup, cluster) - zone = RadosZone(config['name'], zonegroup, cluster) + if config.pop('is_pubsub', False): + zone = PSZone(config['name'], zonegroup, cluster) + else: + zone = RadosZone(config['name'], zonegroup, cluster) # collect Gateways for the zone's endpoints endpoints = config.get('endpoints') diff --git a/ceph/qa/tasks/rgw_multisite_tests.py b/ceph/qa/tasks/rgw_multisite_tests.py index 4e6e2b3df..dade6e474 100644 --- a/ceph/qa/tasks/rgw_multisite_tests.py +++ b/ceph/qa/tasks/rgw_multisite_tests.py @@ -10,10 +10,11 @@ from teuthology.exceptions import ConfigError from teuthology.task import Task from teuthology import misc -from rgw_multi import multisite, tests +from rgw_multi import multisite, tests, tests_ps log = logging.getLogger(__name__) + class RGWMultisiteTests(Task): """ Runs the rgw_multi tests against a multisite configuration created by the @@ -63,9 +64,16 @@ class RGWMultisiteTests(Task): # run nose tests in the rgw_multi.tests module conf = nose.config.Config(stream=get_log_stream(), verbosity=2) + error_msg = '' result = nose.run(defaultTest=tests.__name__, argv=argv, config=conf) if not result: - raise RuntimeError('rgw multisite test failures') + error_msg += 'rgw multisite, ' + result = nose.run(defaultTest=tests_ps.__name__, argv=argv, config=conf) + if not result: + error_msg += 'rgw multisite pubsub, ' + if error_msg: + raise RuntimeError(error_msg + 'test failures') + def get_log_stream(): """ return a log stream for nose output """ @@ -88,4 +96,5 @@ def get_log_stream(): return LogStream() + task = RGWMultisiteTests diff --git a/ceph/qa/tasks/vstart_runner.py b/ceph/qa/tasks/vstart_runner.py index bdfc04fc3..74b26d23d 100644 --- a/ceph/qa/tasks/vstart_runner.py +++ b/ceph/qa/tasks/vstart_runner.py @@ -109,7 +109,7 @@ if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"): python_paths.append(g_exp) ld_path = os.path.join(os.getcwd(), "lib/") - print "Using guessed paths {0} {1}".format(ld_path, python_paths) + print("Using guessed paths {0} {1}".format(ld_path, python_paths)) respawn_in_path(ld_path, python_paths) @@ -884,6 +884,7 @@ def exec_test(): interactive_on_error = False create_cluster = False create_cluster_only = False + ignore_missing_binaries = False args = sys.argv[1:] flags = [a for a in args if a.startswith("-")] @@ -895,6 +896,8 @@ def exec_test(): create_cluster = True elif f == "--create-cluster-only": create_cluster_only = True + elif f == "--ignore-missing-binaries": + ignore_missing_binaries = True else: log.error("Unknown option '{0}'".format(f)) sys.exit(-1) @@ -904,7 +907,7 @@ def exec_test(): require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan", "cephfs-table-tool", "ceph-fuse", "rados"] missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))] - if missing_binaries: + if missing_binaries and not ignore_missing_binaries: log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries))) sys.exit(-1) diff --git a/ceph/qa/valgrind.supp b/ceph/qa/valgrind.supp index e533d3fdf..56112b1f4 100644 --- a/ceph/qa/valgrind.supp +++ b/ceph/qa/valgrind.supp @@ -588,7 +588,7 @@ # while using aes-128-gcm with AES-NI enabled. Not observed while running # with `OPENSSL_ia32cap="~0x200000200000000"`. { - + uninitialised gcm.Xi in aes-128-gcm with AES-NI for msgr, part 1 Memcheck:Cond ... fun:EVP_DecryptFinal_ex @@ -598,24 +598,16 @@ ... fun:_ZN15AsyncConnection7processEv fun:_ZN11EventCenter14process_eventsEjPNSt6chrono8durationImSt5ratioILl1ELl1000000000EEEE - fun:operator() - fun:_ZNSt17_Function_handlerIFvvEZN12NetworkStack10add_threadEjEUlvE_E9_M_invokeERKSt9_Any_data - fun:execute_native_thread_routine - fun:start_thread - fun:clone + ... } { - + uninitialised gcm.Xi in aes-128-gcm with AES-NI for msgr, part 2 Memcheck:Cond fun:_ZN4ceph6crypto6onwire25AES128GCM_OnWireRxHandler34authenticated_decrypt_update_finalEONS_6buffer7v14_2_04listEj fun:_ZN10ProtocolV231handle_read_frame_epilogue_mainEOSt10unique_ptrIN4ceph6buffer7v14_2_08ptr_nodeENS4_8disposerEEi fun:_ZN10ProtocolV216run_continuationER2CtIS_E ... fun:_ZN11EventCenter14process_eventsEjPNSt6chrono8durationImSt5ratioILl1ELl1000000000EEEE - fun:operator() - fun:_ZNSt17_Function_handlerIFvvEZN12NetworkStack10add_threadEjEUlvE_E9_M_invokeERKSt9_Any_data - fun:execute_native_thread_routine - fun:start_thread - fun:clone + ... } diff --git a/ceph/qa/workunits/ceph-helpers-root.sh b/ceph/qa/workunits/ceph-helpers-root.sh index cd716a57e..65c2fc3b9 100755 --- a/ceph/qa/workunits/ceph-helpers-root.sh +++ b/ceph/qa/workunits/ceph-helpers-root.sh @@ -22,6 +22,11 @@ function distro_id() { echo $ID } +function distro_version() { + source /etc/os-release + echo $VERSION +} + function install() { for package in "$@" ; do install_one $package @@ -45,6 +50,52 @@ function install_one() { esac } +function install_cmake3_on_centos7 { + source /etc/os-release + local MAJOR_VERSION="$(echo $VERSION_ID | cut -d. -f1)" + sudo yum-config-manager --add-repo https://dl.fedoraproject.org/pub/epel/$MAJOR_VERSION/x86_64/ + sudo yum install --nogpgcheck -y epel-release + sudo rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-$MAJOR_VERSION + sudo yum install -y cmake3 +} + +function install_cmake3_on_xenial { + install_pkg_on_ubuntu \ + ceph-cmake \ + d278b9d28de0f6b88f56dfe1e8bf684a41577210 \ + xenial \ + force \ + cmake +} + +function install_pkg_on_ubuntu { + local project=$1 + shift + local sha1=$1 + shift + local codename=$1 + shift + local force=$1 + shift + local pkgs=$@ + local missing_pkgs + if [ $force = "force" ]; then + missing_pkgs="$@" + else + for pkg in $pkgs; do + if ! dpkg -s $pkg &> /dev/null; then + missing_pkgs+=" $pkg" + fi + done + fi + if test -n "$missing_pkgs"; then + local shaman_url="https://shaman.ceph.com/api/repos/${project}/master/${sha1}/ubuntu/${codename}/repo" + sudo curl --silent --location $shaman_url --output /etc/apt/sources.list.d/$project.list + sudo env DEBIAN_FRONTEND=noninteractive apt-get update -y -o Acquire::Languages=none -o Acquire::Translation=none || true + sudo env DEBIAN_FRONTEND=noninteractive apt-get install --allow-unauthenticated -y $missing_pkgs + fi +} + ####################################################################### function control_osd() { diff --git a/ceph/qa/workunits/cephtool/test.sh b/ceph/qa/workunits/cephtool/test.sh index f469acdc8..a4bd3d415 100755 --- a/ceph/qa/workunits/cephtool/test.sh +++ b/ceph/qa/workunits/cephtool/test.sh @@ -49,6 +49,11 @@ function expect_false() if "$@"; then return 1; else return 0; fi } +function expect_true() +{ + set -x + if ! "$@"; then return 1; else return 0; fi +} TEMP_DIR=$(mktemp -d ${TMPDIR-/tmp}/cephtool.XXX) trap "rm -fr $TEMP_DIR" 0 @@ -2700,6 +2705,55 @@ function test_mgr_tell() ceph tell mgr osd status } +function test_mgr_devices() +{ + ceph device ls + expect_false ceph device info doesnotexist + expect_false ceph device get-health-metrics doesnotexist +} + +function test_per_pool_scrub_status() +{ + ceph osd pool create noscrub_pool 12 + ceph osd pool create noscrub_pool2 12 + ceph -s | expect_false grep -q "Some pool(s) have the.*scrub.* flag(s) set" + ceph -s --format json | \ + jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \ + expect_false grep -q "Some pool(s) have the.*scrub.* flag(s) set" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | + expect_false grep -q "Pool .* has .*scrub.* flag" + ceph health detail | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_false grep -q "Pool .* has .*scrub.* flag" + + ceph osd pool set noscrub_pool noscrub 1 + ceph -s | expect_true grep -q "Some pool(s) have the noscrub flag(s) set" + ceph -s --format json | \ + jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \ + expect_true grep -q "Some pool(s) have the noscrub flag(s) set" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool has noscrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool has noscrub flag" + + ceph osd pool set noscrub_pool nodeep-scrub 1 + ceph osd pool set noscrub_pool2 nodeep-scrub 1 + ceph -s | expect_true grep -q "Some pool(s) have the noscrub, nodeep-scrub flag(s) set" + ceph -s --format json | \ + jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \ + expect_true grep -q "Some pool(s) have the noscrub, nodeep-scrub flag(s) set" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool has noscrub flag" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool has nodeep-scrub flag" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool2 has nodeep-scrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool has noscrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool has nodeep-scrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool2 has nodeep-scrub flag" + + ceph osd pool rm noscrub_pool noscrub_pool --yes-i-really-really-mean-it + ceph osd pool rm noscrub_pool2 noscrub_pool2 --yes-i-really-really-mean-it +} + # # New tests should be added to the TESTS array below # @@ -2752,6 +2806,7 @@ OSD_TESTS+=" tiering_agent" OSD_TESTS+=" admin_heap_profiler" OSD_TESTS+=" osd_tell_help_command" OSD_TESTS+=" osd_compact" +OSD_TESTS+=" per_pool_scrub_status" MDS_TESTS+=" mds_tell" MDS_TESTS+=" mon_mds" @@ -2759,6 +2814,7 @@ MDS_TESTS+=" mon_mds_metadata" MDS_TESTS+=" mds_tell_help_command" MGR_TESTS+=" mgr_tell" +MGR_TESTS+=" mgr_devices" TESTS+=$MON_TESTS TESTS+=$OSD_TESTS diff --git a/ceph/qa/workunits/mon/pool_ops.sh b/ceph/qa/workunits/mon/pool_ops.sh index a336fd544..4098795b9 100755 --- a/ceph/qa/workunits/mon/pool_ops.sh +++ b/ceph/qa/workunits/mon/pool_ops.sh @@ -25,6 +25,16 @@ ceph osd pool set foo size 10 expect_false ceph osd pool set foo size 0 expect_false ceph osd pool set foo size 20 +ceph osd pool set foo size 3 +ceph osd getcrushmap -o crush +crushtool -d crush -o crush.txt +sed -i 's/max_size 10/max_size 3/' crush.txt +crushtool -c crush.txt -o crush.new +ceph osd setcrushmap -i crush.new +expect_false ceph osd pool set foo size 4 +ceph osd setcrushmap -i crush +rm -f crush crush.txt crush.new + # should fail due to safety interlock expect_false ceph osd pool delete foo expect_false ceph osd pool delete foo foo diff --git a/ceph/qa/workunits/rados/test_crash.sh b/ceph/qa/workunits/rados/test_crash.sh index 6e7aaaaba..6608d7872 100755 --- a/ceph/qa/workunits/rados/test_crash.sh +++ b/ceph/qa/workunits/rados/test_crash.sh @@ -29,5 +29,11 @@ sudo systemctl restart ceph-crash sleep 30 # must be 3 crashdumps registered and moved to crash/posted -[ $(ceph crash ls | wc -l) = 3 ] || exit 1 +[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header [ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1 + +# there should be a health warning +ceph health detail | grep RECENT_CRASH || exit 1 +ceph crash archive-all +sleep 30 +ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone! diff --git a/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh b/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh index d5c09529f..1360f6dfe 100755 --- a/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh +++ b/ceph/qa/workunits/rados/test_envlibrados_for_rocksdb.sh @@ -10,7 +10,6 @@ source $(dirname $0)/../ceph-helpers-root.sh # Install required tools ############################################ echo "Install required tools" -install git cmake CURRENT_PATH=`pwd` @@ -21,13 +20,26 @@ CURRENT_PATH=`pwd` # for rocksdb case $(distro_id) in ubuntu|debian|devuan) - install g++ libsnappy-dev zlib1g-dev libbz2-dev libradospp-dev + install git g++ libsnappy-dev zlib1g-dev libbz2-dev libradospp-dev + case $(distro_version) in + *Xenial*) + install_cmake3_on_xenial + ;; + *) + install cmake + ;; + esac ;; centos|fedora|rhel) - install gcc-c++.x86_64 snappy-devel zlib zlib-devel bzip2 bzip2-devel libradospp-devel.x86_64 + install git gcc-c++.x86_64 snappy-devel zlib zlib-devel bzip2 bzip2-devel libradospp-devel.x86_64 + if [ $(distro_id) = "fedora" ]; then + install cmake + else + install_cmake3_on_centos7 + fi ;; opensuse*|suse|sles) - install gcc-c++ snappy-devel zlib-devel libbz2-devel libradospp-devel + install git gcc-c++ snappy-devel zlib-devel libbz2-devel libradospp-devel ;; *) echo "$(distro_id) is unknown, $@ will have to be installed manually." @@ -55,7 +67,12 @@ git clone https://github.com/facebook/rocksdb.git --depth 1 # compile code cd rocksdb -mkdir build && cd build && cmake -DWITH_LIBRADOS=ON -DWITH_SNAPPY=ON -DWITH_GFLAGS=OFF -DFAIL_ON_WARNINGS=OFF .. +if type cmake3 > /dev/null 2>&1 ; then + CMAKE=cmake3 +else + CMAKE=cmake +fi +mkdir build && cd build && ${CMAKE} -DWITH_LIBRADOS=ON -DWITH_SNAPPY=ON -DWITH_GFLAGS=OFF -DFAIL_ON_WARNINGS=OFF .. make rocksdb_env_librados_test -j8 echo "Copy ceph.conf" diff --git a/ceph/qa/workunits/rados/test_librados_build.sh b/ceph/qa/workunits/rados/test_librados_build.sh index 41500151c..0bca50507 100755 --- a/ceph/qa/workunits/rados/test_librados_build.sh +++ b/ceph/qa/workunits/rados/test_librados_build.sh @@ -22,8 +22,8 @@ hello_world_cpp " BINARIES="${BINARIES_TO_RUN}hello_radosstriper_cpp " -DL_PREFIX="http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=master;f=examples/librados/" -#DL_PREFIX="https://raw.githubusercontent.com/ceph/ceph/master/examples/librados/" +DL_PREFIX="http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=nautilus;f=examples/librados/" +#DL_PREFIX="https://raw.githubusercontent.com/ceph/ceph/nautilus/examples/librados/" DESTDIR=$(pwd) function cleanup () { diff --git a/ceph/qa/workunits/rbd/cli_generic.sh b/ceph/qa/workunits/rbd/cli_generic.sh index 1a46df10c..7f44d932d 100755 --- a/ceph/qa/workunits/rbd/cli_generic.sh +++ b/ceph/qa/workunits/rbd/cli_generic.sh @@ -665,6 +665,16 @@ test_namespace() { rbd rm rbd/test2/image2 rbd rm rbd/image2 + # v1 clones are supported within the same namespace + rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image3 + rbd snap create rbd/test1/image3@1 + rbd snap protect rbd/test1/image3@1 + rbd clone --rbd-default-clone-format 1 rbd/test1/image3@1 rbd/test1/image4 + rbd rm rbd/test1/image4 + rbd snap unprotect rbd/test1/image3@1 + rbd snap rm rbd/test1/image3@1 + rbd rm rbd/test1/image3 + rbd create $RBD_CREATE_ARGS --size 1G --namespace test1 image2 expect_fail rbd namespace remove rbd/test1 diff --git a/ceph/qa/workunits/rbd/krbd_udev_enumerate.sh b/ceph/qa/workunits/rbd/krbd_udev_enumerate.sh new file mode 100755 index 000000000..494f958f8 --- /dev/null +++ b/ceph/qa/workunits/rbd/krbd_udev_enumerate.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# This is a test for https://tracker.ceph.com/issues/41036, but it also +# triggers https://tracker.ceph.com/issues/41404 in some environments. + +set -ex + +function assert_exit_codes() { + declare -a pids=($@) + + for pid in ${pids[@]}; do + wait $pid + done +} + +function run_map() { + declare -a pids + + for i in {1..300}; do + sudo rbd map img$i & + pids+=($!) + done + + assert_exit_codes ${pids[@]} + [[ $(rbd showmapped | wc -l) -eq 301 ]] +} + +function run_unmap_by_dev() { + declare -a pids + + run_map + for i in {0..299}; do + sudo rbd unmap /dev/rbd$i & + pids+=($!) + done + + assert_exit_codes ${pids[@]} + [[ $(rbd showmapped | wc -l) -eq 0 ]] +} + +function run_unmap_by_spec() { + declare -a pids + + run_map + for i in {1..300}; do + sudo rbd unmap img$i & + pids+=($!) + done + + assert_exit_codes ${pids[@]} + [[ $(rbd showmapped | wc -l) -eq 0 ]] +} + +# Can't test with exclusive-lock, don't bother enabling deep-flatten. +# See https://tracker.ceph.com/issues/42492. +for i in {1..300}; do + rbd create --size 1 --image-feature '' img$i +done + +for i in {1..30}; do + echo Iteration $i + run_unmap_by_dev + run_unmap_by_spec +done + +echo OK diff --git a/ceph/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh b/ceph/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh new file mode 100755 index 000000000..7c9c53a2f --- /dev/null +++ b/ceph/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# This is a test for https://tracker.ceph.com/issues/41404, verifying that udev +# events are properly reaped while the image is being (un)mapped in the kernel. +# UDEV_BUF_SIZE is 1M (giving us a 2M socket receive buffer), but modprobe + +# modprobe -r generate ~28M worth of "block" events. + +set -ex + +rbd create --size 1 img + +ceph osd pause +sudo rbd map img & +PID=$! +sudo modprobe scsi_debug max_luns=16 add_host=16 num_parts=1 num_tgts=16 +sudo udevadm settle +sudo modprobe -r scsi_debug +[[ $(rbd showmapped | wc -l) -eq 0 ]] +ceph osd unpause +wait $PID +[[ $(rbd showmapped | wc -l) -eq 2 ]] +sudo rbd unmap img + +echo OK diff --git a/ceph/qa/workunits/rbd/rbd_mirror.sh b/ceph/qa/workunits/rbd/rbd_mirror.sh index 80d7b5dea..e534f537b 100755 --- a/ceph/qa/workunits/rbd/rbd_mirror.sh +++ b/ceph/qa/workunits/rbd/rbd_mirror.sh @@ -305,6 +305,35 @@ for i in ${image2} ${image4}; do compare_images ${POOL} ${i} done +testlog "TEST: remove mirroring pool" +pool=pool_to_remove +for cluster in ${CLUSTER1} ${CLUSTER2}; do + CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${pool} 16 16 + CEPH_ARGS='' rbd --cluster ${cluster} pool init ${pool} + rbd --cluster ${cluster} mirror pool enable ${pool} pool +done +rbd --cluster ${CLUSTER1} mirror pool peer add ${pool} ${CLUSTER2} +rbd --cluster ${CLUSTER2} mirror pool peer add ${pool} ${CLUSTER1} +rdp_image=test_remove_data_pool +create_image ${CLUSTER2} ${pool} ${image} 128 +create_image ${CLUSTER2} ${POOL} ${rdp_image} 128 --data-pool ${pool} +write_image ${CLUSTER2} ${pool} ${image} 100 +write_image ${CLUSTER2} ${POOL} ${rdp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${pool} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${pool} ${image} 'up+replaying' 'master_position' +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${rdp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${rdp_image} 'up+replaying' 'master_position' +for cluster in ${CLUSTER1} ${CLUSTER2}; do + CEPH_ARGS='' ceph --cluster ${cluster} osd pool rm ${pool} ${pool} --yes-i-really-really-mean-it +done +remove_image_retry ${CLUSTER2} ${POOL} ${rdp_image} +wait_for_image_present ${CLUSTER1} ${POOL} ${rdp_image} 'deleted' +for i in 0 1 2 4 8 8 8 8 16 16; do + sleep $i + admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} || break +done +admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} && false + testlog "TEST: snapshot rename" snap_name='snap_rename' create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0" diff --git a/ceph/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/ceph/qa/workunits/rbd/rbd_mirror_bootstrap.sh new file mode 100755 index 000000000..05352f36b --- /dev/null +++ b/ceph/qa/workunits/rbd/rbd_mirror_bootstrap.sh @@ -0,0 +1,49 @@ +#!/bin/sh -ex +# +# rbd_mirror_bootstrap.sh - test peer bootstrap create/import +# + +RBD_MIRROR_MANUAL_PEERS=1 +RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1} +. $(dirname $0)/rbd_mirror_helpers.sh + +setup + +testlog "TEST: bootstrap cluster2 from cluster1" +# create token on cluster1 and import to cluster2 +TOKEN=${TEMPDIR}/peer-token +TOKEN_2=${TEMPDIR}/peer-token-2 +rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${POOL} > ${TOKEN} +rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${PARENT_POOL} > ${TOKEN_2} +cmp ${TOKEN} ${TOKEN_2} + +rbd --cluster ${CLUSTER2} --pool ${POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-only +rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-tx + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +testlog "TEST: verify rx-only direction" +[ "$(rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format xml | + ${XMLSTARLET} sel -t -v '//mirror/peers/peer[1]/uuid')" = "" ] + +create_image ${CLUSTER1} ${POOL} image1 + +wait_for_image_replay_started ${CLUSTER2} ${POOL} image1 +write_image ${CLUSTER1} ${POOL} image1 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} image1 + +testlog "TEST: verify rx-tx direction" +create_image ${CLUSTER1} ${PARENT_POOL} image1 +create_image ${CLUSTER2} ${PARENT_POOL} image2 + +enable_mirror ${CLUSTER1} ${PARENT_POOL} image1 +enable_mirror ${CLUSTER2} ${PARENT_POOL} image2 + +wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1 +write_image ${CLUSTER1} ${PARENT_POOL} image1 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} image1 + +wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} image2 +write_image ${CLUSTER2} ${PARENT_POOL} image2 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} image2 diff --git a/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh b/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh index 9d5c3c8ff..fcd5b38b2 100755 --- a/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -257,26 +257,28 @@ setup_pools() rbd --cluster ${cluster} mirror pool enable ${POOL} pool rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL} image - if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then - rbd --cluster ${cluster} mirror pool peer add ${POOL} ${remote_cluster} - rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} ${remote_cluster} - else - mon_map_file=${TEMPDIR}/${remote_cluster}.monmap - ceph --cluster ${remote_cluster} mon getmap > ${mon_map_file} - mon_addr=$(monmaptool --print ${mon_map_file} | grep -E 'mon\.' | - head -n 1 | sed -E 's/^[0-9]+: ([^ ]+).+$/\1/' | sed -E 's/\/[0-9]+//g') - - admin_key_file=${TEMPDIR}/${remote_cluster}.client.${CEPH_ID}.key - CEPH_ARGS='' ceph --cluster ${remote_cluster} auth get-key client.${CEPH_ID} > ${admin_key_file} - - rbd --cluster ${cluster} mirror pool peer add ${POOL} client.${CEPH_ID}@${remote_cluster}-DNE \ - --remote-mon-host "${mon_addr}" --remote-key-file ${admin_key_file} - - uuid=$(rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} client.${CEPH_ID}@${remote_cluster}-DNE) - rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} mon-host ${mon_addr} - rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} key-file ${admin_key_file} - - PEER_CLUSTER_SUFFIX=-DNE + if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then + if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then + rbd --cluster ${cluster} mirror pool peer add ${POOL} ${remote_cluster} + rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} ${remote_cluster} + else + mon_map_file=${TEMPDIR}/${remote_cluster}.monmap + ceph --cluster ${remote_cluster} mon getmap > ${mon_map_file} + mon_addr=$(monmaptool --print ${mon_map_file} | grep -E 'mon\.' | + head -n 1 | sed -E 's/^[0-9]+: ([^ ]+).+$/\1/' | sed -E 's/\/[0-9]+//g') + + admin_key_file=${TEMPDIR}/${remote_cluster}.client.${CEPH_ID}.key + CEPH_ARGS='' ceph --cluster ${remote_cluster} auth get-key client.${CEPH_ID} > ${admin_key_file} + + rbd --cluster ${cluster} mirror pool peer add ${POOL} client.${CEPH_ID}@${remote_cluster}-DNE \ + --remote-mon-host "${mon_addr}" --remote-key-file ${admin_key_file} + + uuid=$(rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} client.${CEPH_ID}@${remote_cluster}-DNE) + rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} mon-host ${mon_addr} + rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} key-file ${admin_key_file} + + PEER_CLUSTER_SUFFIX=-DNE + fi fi } @@ -680,7 +682,7 @@ test_status_in_pool_dir() local description_pattern="$5" local service_pattern="$6" - local status_log=${TEMPDIR}/${cluster}-${image}.mirror_status + local status_log=${TEMPDIR}/${cluster}-${pool}-${image}.mirror_status rbd --cluster ${cluster} -p ${pool} mirror image status ${image} | tee ${status_log} >&2 grep "state: .*${state_pattern}" ${status_log} || return 1 @@ -694,7 +696,38 @@ test_status_in_pool_dir() grep "service: " ${status_log} && return 1 fi + # recheck using `mirror pool status` command to stress test it. + + local last_update="$(sed -nEe 's/^ *last_update: *(.*) *$/\1/p' ${status_log})" + test_mirror_pool_status_verbose \ + ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" && return 0 + + echo "'mirror pool status' test failed" >&2 + exit 1 +} + +test_mirror_pool_status_verbose() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state_pattern="$4" + local prev_last_update="$5" + + local status_log=${TEMPDIR}/${cluster}-${pool}.mirror_status + + rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \ + > ${status_log} + + local last_update state + last_update=$($XMLSTARLET sel -t -v \ + "//images/image[name='${image}']/last_update" < ${status_log}) + state=$($XMLSTARLET sel -t -v \ + "//images/image[name='${image}']/state" < ${status_log}) + + echo "${state}" | grep "${state_pattern}" || + test "${last_update}" '>' "${prev_last_update}" } wait_for_status_in_pool_dir() diff --git a/ceph/src/.git_version b/ceph/src/.git_version index ef3a08108..aa97a7083 100644 --- a/ceph/src/.git_version +++ b/ceph/src/.git_version @@ -1,2 +1,2 @@ -75f4de193b3ea58512f204623e6c5a16e6c1e1ba -v14.2.4 +ad5bd132e1492173c85fda2cc863152730b16a92 +v14.2.5 diff --git a/ceph/src/CMakeLists.txt b/ceph/src/CMakeLists.txt index 4895e84df..28ec9835f 100644 --- a/ceph/src/CMakeLists.txt +++ b/ceph/src/CMakeLists.txt @@ -625,6 +625,8 @@ add_subdirectory(bash_completion) add_subdirectory(client) if(WITH_LIBCEPHFS) + find_package(PkgConfig QUIET REQUIRED) + pkg_check_modules(CAPNG REQUIRED libcap-ng) set(libcephfs_srcs libcephfs.cc) add_library(cephfs ${CEPH_SHARED} ${libcephfs_srcs}) target_link_libraries(cephfs PRIVATE client ceph-common @@ -651,7 +653,9 @@ if(WITH_LIBCEPHFS) add_executable(ceph-syn ${ceph_syn_srcs}) target_link_libraries(ceph-syn client global-static ceph-common) install(TARGETS ceph-syn DESTINATION bin) - add_subdirectory(mount) + if(LINUX) + add_subdirectory(mount) + endif() endif(WITH_LIBCEPHFS) if(WITH_FUSE) diff --git a/ceph/src/auth/cephx/CephxClientHandler.cc b/ceph/src/auth/cephx/CephxClientHandler.cc index 94a9b7a0e..abdb2f2cc 100644 --- a/ceph/src/auth/cephx/CephxClientHandler.cc +++ b/ceph/src/auth/cephx/CephxClientHandler.cc @@ -128,7 +128,13 @@ int CephxClientHandler::handle_response( if (starting) { CephXServerChallenge ch; - decode(ch, indata); + try { + decode(ch, indata); + } catch (buffer::error& e) { + ldout(cct, 1) << __func__ << " failed to decode CephXServerChallenge: " + << e.what() << dendl; + return -EPERM; + } server_challenge = ch.server_challenge; ldout(cct, 10) << " got initial server challenge " << std::hex << server_challenge << std::dec << dendl; @@ -139,7 +145,13 @@ int CephxClientHandler::handle_response( } struct CephXResponseHeader header; - decode(header, indata); + try { + decode(header, indata); + } catch (buffer::error& e) { + ldout(cct, 1) << __func__ << " failed to decode CephXResponseHeader: " + << e.what() << dendl; + return -EPERM; + } switch (header.request_type) { case CEPHX_GET_AUTH_SESSION_KEY: @@ -159,8 +171,14 @@ int CephxClientHandler::handle_response( ldout(cct, 10) << " want=" << want << " need=" << need << " have=" << have << dendl; if (!indata.end()) { bufferlist cbl, extra_tickets; - decode(cbl, indata); - decode(extra_tickets, indata); + try { + decode(cbl, indata); + decode(extra_tickets, indata); + } catch (buffer::error& e) { + ldout(cct, 1) << __func__ << " failed to decode tickets: " + << e.what() << dendl; + return -EPERM; + } ldout(cct, 10) << " got connection bl " << cbl.length() << " and extra tickets " << extra_tickets.length() << dendl; diff --git a/ceph/src/auth/cephx/CephxProtocol.cc b/ceph/src/auth/cephx/CephxProtocol.cc index 6254d8f3c..5d44d847d 100644 --- a/ceph/src/auth/cephx/CephxProtocol.cc +++ b/ceph/src/auth/cephx/CephxProtocol.cc @@ -37,9 +37,9 @@ void cephx_calc_client_server_challenge(CephContext *cct, CryptoKey& secret, uin return; uint64_t k = 0; - const uint64_t *p = (const uint64_t *)enc.c_str(); + const ceph_le64 *p = (const ceph_le64 *)enc.c_str(); for (int pos = 0; pos + sizeof(k) <= enc.length(); pos+=sizeof(k), p++) - k ^= mswab(*p); + k ^= *p; *key = k; } @@ -149,50 +149,59 @@ bool CephXTicketHandler::verify_service_ticket_reply( CryptoKey& secret, bufferlist::const_iterator& indata) { - __u8 service_ticket_v; - decode(service_ticket_v, indata); - - CephXServiceTicket msg_a; - std::string error; - if (decode_decrypt(cct, msg_a, secret, indata, error)) { - ldout(cct, 0) << "verify_service_ticket_reply: failed decode_decrypt, error is: " << error << dendl; - return false; - } - - __u8 ticket_enc; - decode(ticket_enc, indata); + try { + __u8 service_ticket_v; + decode(service_ticket_v, indata); - bufferlist service_ticket_bl; - if (ticket_enc) { - ldout(cct, 10) << " got encrypted ticket" << dendl; + CephXServiceTicket msg_a; std::string error; - if (decode_decrypt(cct, service_ticket_bl, session_key, indata, error)) { - ldout(cct, 10) << "verify_service_ticket_reply: decode_decrypt failed " - << "with " << error << dendl; + if (decode_decrypt(cct, msg_a, secret, indata, error)) { + ldout(cct, 0) << __func__ << " failed decode_decrypt, error is: " << error + << dendl; return false; } - } else { - decode(service_ticket_bl, indata); - } - auto iter = service_ticket_bl.cbegin(); - decode(ticket, iter); - ldout(cct, 10) << " ticket.secret_id=" << ticket.secret_id << dendl; - - ldout(cct, 10) << "verify_service_ticket_reply service " << ceph_entity_type_name(service_id) - << " secret_id " << ticket.secret_id - << " session_key " << msg_a.session_key - << " validity=" << msg_a.validity << dendl; - session_key = msg_a.session_key; - if (!msg_a.validity.is_zero()) { - expires = ceph_clock_now(); - expires += msg_a.validity; - renew_after = expires; - renew_after -= ((double)msg_a.validity.sec() / 4); - ldout(cct, 10) << "ticket expires=" << expires << " renew_after=" << renew_after << dendl; - } - have_key_flag = true; - return true; + __u8 ticket_enc; + decode(ticket_enc, indata); + + bufferlist service_ticket_bl; + if (ticket_enc) { + ldout(cct, 10) << __func__ << " got encrypted ticket" << dendl; + std::string error; + if (decode_decrypt(cct, service_ticket_bl, session_key, indata, error)) { + ldout(cct, 10) << __func__ << " decode_decrypt failed " + << "with " << error << dendl; + return false; + } + } else { + decode(service_ticket_bl, indata); + } + auto iter = service_ticket_bl.cbegin(); + decode(ticket, iter); + ldout(cct, 10) << __func__ << " ticket.secret_id=" << ticket.secret_id + << dendl; + + ldout(cct, 10) << __func__ << " service " + << ceph_entity_type_name(service_id) + << " secret_id " << ticket.secret_id + << " session_key " << msg_a.session_key + << " validity=" << msg_a.validity << dendl; + session_key = msg_a.session_key; + if (!msg_a.validity.is_zero()) { + expires = ceph_clock_now(); + expires += msg_a.validity; + renew_after = expires; + renew_after -= ((double)msg_a.validity.sec() / 4); + ldout(cct, 10) << __func__ << " ticket expires=" << expires + << " renew_after=" << renew_after << dendl; + } + + have_key_flag = true; + return true; + } catch (buffer::error& e) { + ldout(cct, 1) << __func__ << " decode error: " << e.what() << dendl; + return false; + } } bool CephXTicketHandler::have_key() @@ -269,15 +278,24 @@ bool CephXTicketManager::verify_service_ticket_reply(CryptoKey& secret, bufferlist::const_iterator& indata) { __u8 service_ticket_reply_v; - decode(service_ticket_reply_v, indata); - uint32_t num; - decode(num, indata); + try { + decode(service_ticket_reply_v, indata); + decode(num, indata); + } catch (buffer::error& e) { + ldout(cct, 10) << __func__ << " failed to decode ticket v or count: " + << e.what() << dendl; + } ldout(cct, 10) << "verify_service_ticket_reply got " << num << " keys" << dendl; for (int i=0; i<(int)num; i++) { uint32_t type; - decode(type, indata); + try { + decode(type, indata); + } catch (buffer::error& e) { + ldout(cct, 10) << __func__ << " failed to decode ticket type: " << e.what() + << dendl; + } ldout(cct, 10) << "got key for service_id " << ceph_entity_type_name(type) << dendl; CephXTicketHandler& handler = get_handler(type); if (!handler.verify_service_ticket_reply(secret, indata)) { diff --git a/ceph/src/auth/cephx/CephxServiceHandler.cc b/ceph/src/auth/cephx/CephxServiceHandler.cc index 12bb8348f..2ab0602e2 100644 --- a/ceph/src/auth/cephx/CephxServiceHandler.cc +++ b/ceph/src/auth/cephx/CephxServiceHandler.cc @@ -61,7 +61,13 @@ int CephxServiceHandler::handle_request( int ret = 0; struct CephXRequestHeader cephx_header; - decode(cephx_header, indata); + try { + decode(cephx_header, indata); + } catch (buffer::error& e) { + ldout(cct, 0) << __func__ << " failed to decode CephXRequestHeader: " + << e.what() << dendl; + return -EPERM; + } switch (cephx_header.request_type) { case CEPHX_GET_AUTH_SESSION_KEY: @@ -70,7 +76,14 @@ int CephxServiceHandler::handle_request( << entity_name << dendl; CephXAuthenticate req; - decode(req, indata); + try { + decode(req, indata); + } catch (buffer::error& e) { + ldout(cct, 0) << __func__ << " failed to decode CephXAuthenticate: " + << e.what() << dendl; + ret = -EPERM; + break; + } CryptoKey secret; if (!key_server->get_secret(entity_name, secret)) { @@ -234,7 +247,15 @@ int CephxServiceHandler::handle_request( } CephXServiceTicketRequest ticket_req; - decode(ticket_req, indata); + try { + decode(ticket_req, indata); + } catch (buffer::error& e) { + ldout(cct, 0) << __func__ + << " failed to decode CephXServiceTicketRequest: " + << e.what() << dendl; + ret = -EPERM; + break; + } ldout(cct, 10) << " ticket_req.keys = " << ticket_req.keys << dendl; ret = 0; diff --git a/ceph/src/auth/cephx/CephxSessionHandler.cc b/ceph/src/auth/cephx/CephxSessionHandler.cc index 16c125184..83069f285 100644 --- a/ceph/src/auth/cephx/CephxSessionHandler.cc +++ b/ceph/src/auth/cephx/CephxSessionHandler.cc @@ -37,16 +37,16 @@ int CephxSessionHandler::_calc_signature(Message *m, uint64_t *psig) // - skip the leading 4 byte wrapper from encode_encrypt struct { __u8 v; - __le64 magic; - __le32 len; - __le32 header_crc; - __le32 front_crc; - __le32 middle_crc; - __le32 data_crc; + ceph_le64 magic; + ceph_le32 len; + ceph_le32 header_crc; + ceph_le32 front_crc; + ceph_le32 middle_crc; + ceph_le32 data_crc; } __attribute__ ((packed)) sigblock = { - 1, mswab(AUTH_ENC_MAGIC), mswab(4*4), - mswab(header.crc), mswab(footer.front_crc), - mswab(footer.middle_crc), mswab(footer.data_crc) + 1, init_le64(AUTH_ENC_MAGIC), init_le32(4*4), + init_le32(header.crc), init_le32(footer.front_crc), + init_le32(footer.middle_crc), init_le32(footer.data_crc) }; char exp_buf[CryptoKey::get_max_outbuf_size(sizeof(sigblock))]; @@ -66,27 +66,27 @@ int CephxSessionHandler::_calc_signature(Message *m, uint64_t *psig) return -1; } - *psig = *reinterpret_cast<__le64*>(exp_buf); + *psig = *reinterpret_cast(exp_buf); } else { // newer mimic+ signatures struct { - __le32 header_crc; - __le32 front_crc; - __le32 front_len; - __le32 middle_crc; - __le32 middle_len; - __le32 data_crc; - __le32 data_len; - __le32 seq_lower_word; + ceph_le32 header_crc; + ceph_le32 front_crc; + ceph_le32 front_len; + ceph_le32 middle_crc; + ceph_le32 middle_len; + ceph_le32 data_crc; + ceph_le32 data_len; + ceph_le32 seq_lower_word; } __attribute__ ((packed)) sigblock = { - mswab(header.crc), - mswab(footer.front_crc), - mswab(header.front_len), - mswab(footer.middle_crc), - mswab(header.middle_len), - mswab(footer.data_crc), - mswab(header.data_len), - mswab(header.seq) + init_le32(header.crc), + init_le32(footer.front_crc), + init_le32(header.front_len), + init_le32(footer.middle_crc), + init_le32(header.middle_len), + init_le32(footer.data_crc), + init_le32(header.data_len), + init_le32(header.seq) }; char exp_buf[CryptoKey::get_max_outbuf_size(sizeof(sigblock))]; @@ -107,7 +107,7 @@ int CephxSessionHandler::_calc_signature(Message *m, uint64_t *psig) } struct enc { - __le64 a, b, c, d; + ceph_le64 a, b, c, d; } *penc = reinterpret_cast(exp_buf); *psig = penc->a ^ penc->b ^ penc->c ^ penc->d; } diff --git a/ceph/src/ceph-crash.in b/ceph/src/ceph-crash.in index 5bfb50c47..e5f08acb7 100755 --- a/ceph/src/ceph-crash.in +++ b/ceph/src/ceph-crash.in @@ -5,6 +5,7 @@ import argparse import logging import os +import socket import subprocess import sys import time @@ -12,6 +13,9 @@ import time logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) +auth_names = ['client.crash.%s' % socket.gethostname(), + 'client.crash', + 'client.admin'] def parse_args(): parser = argparse.ArgumentParser() @@ -22,22 +26,31 @@ def parse_args(): '-d', '--delay', default=10.0, type=float, help='minutes to delay between scans (0 to exit after one)', ) + parser.add_argument( + '--name', '-n', + help='ceph name to authenticate as (default: try client.crash, client.admin)') return parser.parse_args() def post_crash(path): - pr = subprocess.Popen( - args=['timeout', '30', 'ceph', 'crash', 'post', '-i', '-'], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - f = open(os.path.join(path, 'meta'), 'r') - stdout, stderr = pr.communicate(input=f.read()) - rc = pr.wait() - f.close() - if rc != 0: - log.warning('post %s failed: %s' % (path, stderr)) + rc = 0 + for n in auth_names: + pr = subprocess.Popen( + args=['timeout', '30', 'ceph', + '-n', n, + 'crash', 'post', '-i', '-'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + f = open(os.path.join(path, 'meta'), 'rb') + stdout, stderr = pr.communicate(input=f.read()) + rc = pr.wait() + f.close() + if rc != 0: + log.warning('post %s as %s failed: %s' % (path, n, stderr)) + if rc == 0: + break return rc @@ -66,6 +79,8 @@ def scrape_path(path): def main(): args = parse_args() postdir = os.path.join(args.path, 'posted') + if args.name: + auth_names = [args.name] while not os.path.isdir(postdir): log.error("directory %s does not exist; please create" % postdir) diff --git a/ceph/src/ceph-volume/ceph_volume/api/lvm.py b/ceph/src/ceph-volume/ceph_volume/api/lvm.py index cce8a302c..b6661522e 100644 --- a/ceph/src/ceph-volume/ceph_volume/api/lvm.py +++ b/ceph/src/ceph-volume/ceph_volume/api/lvm.py @@ -267,61 +267,11 @@ def dmsetup_splitname(dev): return _splitname_parser(out) -def is_lv(dev, lvs=None): - """ - Boolean to detect if a device is an LV or not. - """ - splitname = dmsetup_splitname(dev) - # Allowing to optionally pass `lvs` can help reduce repetitive checks for - # multiple devices at once. - lvs = lvs if lvs is not None else Volumes() - if splitname.get('LV_NAME'): - lvs.filter(lv_name=splitname['LV_NAME'], vg_name=splitname['VG_NAME']) - return len(lvs) > 0 - return False - - -def get_api_vgs(): - """ - Return the list of group volumes available in the system using flags to - include common metadata associated with them - - Command and sample delimited output should look like:: - - $ vgs --noheadings --units=g --readonly --separator=';' \ - -o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free - ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m - osd_vg;3;1;0;wz--n-;29.21g;9.21g - - To normalize sizing, the units are forced in 'g' which is equivalent to - gigabytes, which uses multiples of 1024 (as opposed to 1000) - """ - fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free,vg_free_count' - stdout, stderr, returncode = process.call( - ['vgs', '--noheadings', '--readonly', '--units=g', '--separator=";"', '-o', fields], - verbose_on_failure=False - ) - return _output_parser(stdout, fields) - - -def get_api_lvs(): - """ - Return the list of logical volumes available in the system using flags to include common - metadata associated with them - - Command and delimited output should look like:: - - $ lvs --noheadings --readonly --separator=';' -a -o lv_tags,lv_path,lv_name,vg_name - ;/dev/ubuntubox-vg/root;root;ubuntubox-vg - ;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg - - """ - fields = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size' - stdout, stderr, returncode = process.call( - ['lvs', '--noheadings', '--readonly', '--separator=";"', '-a', '-o', fields], - verbose_on_failure=False - ) - return _output_parser(stdout, fields) +#################################### +# +# Code for LVM Physical Volumes +# +################################ def get_api_pvs(): @@ -348,51 +298,161 @@ def get_api_pvs(): return _output_parser(stdout, fields) -def get_lv_from_argument(argument): +class PVolume(object): """ - Helper proxy function that consumes a possible logical volume passed in from the CLI - in the form of `vg/lv`, but with some validation so that an argument that is a full - path to a device can be ignored + Represents a Physical Volume from LVM, with some top-level attributes like + ``pv_name`` and parsed tags as a dictionary of key/value pairs. """ - if argument.startswith('/'): - lv = get_lv(lv_path=argument) - return lv - try: - vg_name, lv_name = argument.split('/') - except (ValueError, AttributeError): - return None - return get_lv(lv_name=lv_name, vg_name=vg_name) + def __init__(self, **kw): + for k, v in kw.items(): + setattr(self, k, v) + self.pv_api = kw + self.name = kw['pv_name'] + self.tags = parse_tags(kw['pv_tags']) -def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): - """ - Return a matching lv for the current system, requiring ``lv_name``, - ``vg_name``, ``lv_path`` or ``tags``. Raises an error if more than one lv - is found. + def __str__(self): + return '<%s>' % self.pv_api['pv_name'] - It is useful to use ``tags`` when trying to find a specific logical volume, - but it can also lead to multiple lvs being found, since a lot of metadata - is shared between lvs of a distinct OSD. - """ - if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]): - return None - lvs = Volumes() - return lvs.get( - lv_name=lv_name, vg_name=vg_name, lv_path=lv_path, lv_uuid=lv_uuid, - lv_tags=lv_tags - ) + def __repr__(self): + return self.__str__() + + def set_tags(self, tags): + """ + :param tags: A dictionary of tag names and values, like:: + + { + "ceph.osd_fsid": "aaa-fff-bbbb", + "ceph.osd_id": "0" + } + + At the end of all modifications, the tags are refreshed to reflect + LVM's most current view. + """ + for k, v in tags.items(): + self.set_tag(k, v) + # after setting all the tags, refresh them for the current object, use the + # pv_* identifiers to filter because those shouldn't change + pv_object = get_pv(pv_name=self.pv_name, pv_uuid=self.pv_uuid) + self.tags = pv_object.tags + + def set_tag(self, key, value): + """ + Set the key/value pair as an LVM tag. Does not "refresh" the values of + the current object for its tags. Meant to be a "fire and forget" type + of modification. + + **warning**: Altering tags on a PV has to be done ensuring that the + device is actually the one intended. ``pv_name`` is *not* a persistent + value, only ``pv_uuid`` is. Using ``pv_uuid`` is the best way to make + sure the device getting changed is the one needed. + """ + # remove it first if it exists + if self.tags.get(key): + current_value = self.tags[key] + tag = "%s=%s" % (key, current_value) + process.call(['pvchange', '--deltag', tag, self.pv_name]) + + process.call( + [ + 'pvchange', + '--addtag', '%s=%s' % (key, value), self.pv_name + ] + ) -def get_pv(pv_name=None, pv_uuid=None, pv_tags=None): +class PVolumes(list): """ - Return a matching pv (physical volume) for the current system, requiring - ``pv_name``, ``pv_uuid``, or ``pv_tags``. Raises an error if more than one - pv is found. + A list of all known (physical) volumes for the current system, with the ability + to filter them via keyword arguments. """ - if not any([pv_name, pv_uuid, pv_tags]): - return None - pvs = PVolumes() - return pvs.get(pv_name=pv_name, pv_uuid=pv_uuid, pv_tags=pv_tags) + + def __init__(self, populate=True): + if populate: + self._populate() + + def _populate(self): + # get all the pvs in the current system + for pv_item in get_api_pvs(): + self.append(PVolume(**pv_item)) + + def _purge(self): + """ + Deplete all the items in the list, used internally only so that we can + dynamically allocate the items when filtering without the concern of + messing up the contents + """ + self[:] = [] + + def _filter(self, pv_name=None, pv_uuid=None, pv_tags=None): + """ + The actual method that filters using a new list. Useful so that other + methods that do not want to alter the contents of the list (e.g. + ``self.find``) can operate safely. + """ + filtered = [i for i in self] + if pv_name: + filtered = [i for i in filtered if i.pv_name == pv_name] + + if pv_uuid: + filtered = [i for i in filtered if i.pv_uuid == pv_uuid] + + # at this point, `filtered` has either all the physical volumes in self + # or is an actual filtered list if any filters were applied + if pv_tags: + tag_filtered = [] + for pvolume in filtered: + matches = all(pvolume.tags.get(k) == str(v) for k, v in pv_tags.items()) + if matches: + tag_filtered.append(pvolume) + # return the tag_filtered pvolumes here, the `filtered` list is no + # longer usable + return tag_filtered + + return filtered + + def filter(self, pv_name=None, pv_uuid=None, pv_tags=None): + """ + Filter out volumes on top level attributes like ``pv_name`` or by + ``pv_tags`` where a dict is required. For example, to find a physical + volume that has an OSD ID of 0, the filter would look like:: + + pv_tags={'ceph.osd_id': '0'} + + """ + if not any([pv_name, pv_uuid, pv_tags]): + raise TypeError('.filter() requires pv_name, pv_uuid, or pv_tags' + '(none given)') + + filtered_pvs = PVolumes(populate=False) + filtered_pvs.extend(self._filter(pv_name, pv_uuid, pv_tags)) + return filtered_pvs + + def get(self, pv_name=None, pv_uuid=None, pv_tags=None): + """ + This is a bit expensive, since it will try to filter out all the + matching items in the list, filter them out applying anything that was + added and return the matching item. + + This method does *not* alter the list, and it will raise an error if + multiple pvs are matched + + It is useful to use ``tags`` when trying to find a specific logical volume, + but it can also lead to multiple pvs being found, since a lot of metadata + is shared between pvs of a distinct OSD. + """ + if not any([pv_name, pv_uuid, pv_tags]): + return None + pvs = self._filter( + pv_name=pv_name, + pv_uuid=pv_uuid, + pv_tags=pv_tags + ) + if not pvs: + return None + if len(pvs) > 1 and pv_tags: + raise MultiplePVsError(pv_name) + return pvs[0] def create_pv(device): @@ -409,306 +469,195 @@ def create_pv(device): ]) -def create_vg(devices, name=None, name_prefix=None): +def remove_pv(pv_name): """ - Create a Volume Group. Command looks like:: + Removes a physical volume using a double `-f` to prevent prompts and fully + remove anything related to LVM. This is tremendously destructive, but so is all other actions + when zapping a device. - vgcreate --force --yes group_name device + In the case where multiple PVs are found, it will ignore that fact and + continue with the removal, specifically in the case of messages like:: - Once created the volume group is returned as a ``VolumeGroup`` object + WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2 - :param devices: A list of devices to create a VG. Optionally, a single - device (as a string) can be used. - :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}' - :param name_prefix: Optionally prefix the name of the VG, which will get combined - with a UUID string + These situations can be avoided with custom filtering rules, which this API + cannot handle while accommodating custom user filters. """ - if isinstance(devices, set): - devices = list(devices) - if not isinstance(devices, list): - devices = [devices] - if name_prefix: - name = "%s-%s" % (name_prefix, str(uuid.uuid4())) - elif name is None: - name = "ceph-%s" % str(uuid.uuid4()) - process.run([ - 'vgcreate', - '-s', - '1G', - '--force', - '--yes', - name] + devices - ) - - vg = get_vg(vg_name=name) - return vg + fail_msg = "Unable to remove vg %s" % pv_name + process.run( + [ + 'pvremove', + '-v', # verbose + '-f', # force it + '-f', # force it + pv_name + ], + fail_msg=fail_msg, + ) -def extend_vg(vg, devices): +def get_pv(pv_name=None, pv_uuid=None, pv_tags=None, pvs=None): """ - Extend a Volume Group. Command looks like:: - - vgextend --force --yes group_name [device, ...] + Return a matching pv (physical volume) for the current system, requiring + ``pv_name``, ``pv_uuid``, or ``pv_tags``. Raises an error if more than one + pv is found. + """ + if not any([pv_name, pv_uuid, pv_tags]): + return None + if pvs is None or len(pvs) == 0: + pvs = PVolumes() - Once created the volume group is extended and returned as a ``VolumeGroup`` object + return pvs.get(pv_name=pv_name, pv_uuid=pv_uuid, pv_tags=pv_tags) - :param vg: A VolumeGroup object - :param devices: A list of devices to extend the VG. Optionally, a single - device (as a string) can be used. - """ - if not isinstance(devices, list): - devices = [devices] - process.run([ - 'vgextend', - '--force', - '--yes', - vg.name] + devices - ) - vg = get_vg(vg_name=vg.name) - return vg +################################ +# +# Code for LVM Volume Groups +# +############################# -def reduce_vg(vg, devices): +def get_api_vgs(): """ - Reduce a Volume Group. Command looks like:: + Return the list of group volumes available in the system using flags to + include common metadata associated with them - vgreduce --force --yes group_name [device, ...] + Command and sample delimited output should look like:: - :param vg: A VolumeGroup object - :param devices: A list of devices to remove from the VG. Optionally, a - single device (as a string) can be used. + $ vgs --noheadings --units=g --readonly --separator=';' \ + -o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free + ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m + osd_vg;3;1;0;wz--n-;29.21g;9.21g + + To normalize sizing, the units are forced in 'g' which is equivalent to + gigabytes, which uses multiples of 1024 (as opposed to 1000) """ - if not isinstance(devices, list): - devices = [devices] - process.run([ - 'vgreduce', - '--force', - '--yes', - vg.name] + devices + fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free,vg_free_count' + stdout, stderr, returncode = process.call( + ['vgs', '--noheadings', '--readonly', '--units=g', '--separator=";"', '-o', fields], + verbose_on_failure=False ) - - vg = get_vg(vg_name=vg.name) - return vg + return _output_parser(stdout, fields) -def remove_vg(vg_name): +class VolumeGroup(object): """ - Removes a volume group. + Represents an LVM group, with some top-level attributes like ``vg_name`` """ - if not vg_name: - logger.warning('Skipping removal of invalid VG name: "%s"', vg_name) - return - fail_msg = "Unable to remove vg %s" % vg_name - process.run( - [ - 'vgremove', - '-v', # verbose - '-f', # force it - vg_name - ], - fail_msg=fail_msg, - ) + def __init__(self, **kw): + for k, v in kw.items(): + setattr(self, k, v) + self.name = kw['vg_name'] + self.tags = parse_tags(kw.get('vg_tags', '')) -def remove_pv(pv_name): - """ - Removes a physical volume using a double `-f` to prevent prompts and fully - remove anything related to LVM. This is tremendously destructive, but so is all other actions - when zapping a device. + def __str__(self): + return '<%s>' % self.name - In the case where multiple PVs are found, it will ignore that fact and - continue with the removal, specifically in the case of messages like:: + def __repr__(self): + return self.__str__() - WARNING: PV $UUID /dev/DEV-1 was already found on /dev/DEV-2 + def _parse_size(self, size): + error_msg = "Unable to convert vg size to integer: '%s'" % str(size) + try: + integer, _ = size.split('g') + except ValueError: + logger.exception(error_msg) + raise RuntimeError(error_msg) - These situations can be avoided with custom filtering rules, which this API - cannot handle while accommodating custom user filters. - """ - fail_msg = "Unable to remove vg %s" % pv_name - process.run( - [ - 'pvremove', - '-v', # verbose - '-f', # force it - '-f', # force it - pv_name - ], - fail_msg=fail_msg, - ) + return util.str_to_int(integer) + @property + def free(self): + """ + Parse the available size in gigabytes from the ``vg_free`` attribute, that + will be a string with a character ('g') to indicate gigabytes in size. + Returns a rounded down integer to ease internal operations:: -def remove_lv(lv): - """ - Removes a logical volume given it's absolute path. + >>> data_vg.vg_free + '0.01g' + >>> data_vg.size + 0 + """ + return self._parse_size(self.vg_free) - Will return True if the lv is successfully removed or - raises a RuntimeError if the removal fails. + @property + def size(self): + """ + Parse the size in gigabytes from the ``vg_size`` attribute, that + will be a string with a character ('g') to indicate gigabytes in size. + Returns a rounded down integer to ease internal operations:: - :param lv: A ``Volume`` object or the path for an LV - """ - if isinstance(lv, Volume): - path = lv.lv_path - else: - path = lv + >>> data_vg.vg_size + '1024.9g' + >>> data_vg.size + 1024 + """ + return self._parse_size(self.vg_size) - stdout, stderr, returncode = process.call( - [ - 'lvremove', - '-v', # verbose - '-f', # force it - path - ], - show_command=True, - terminal_verbose=True, - ) - if returncode != 0: - raise RuntimeError("Unable to remove %s" % path) - return True + def sizing(self, parts=None, size=None): + """ + Calculate proper sizing to fully utilize the volume group in the most + efficient way possible. To prevent situations where LVM might accept + a percentage that is beyond the vg's capabilities, it will refuse with + an error when requesting a larger-than-possible parameter, in addition + to rounding down calculations. + A dictionary with different sizing parameters is returned, to make it + easier for others to choose what they need in order to create logical + volumes:: -def create_lv(name, group, extents=None, size=None, tags=None, uuid_name=False, pv=None): - """ - Create a Logical Volume in a Volume Group. Command looks like:: + >>> data_vg.free + 1024 + >>> data_vg.sizing(parts=4) + {'parts': 4, 'sizes': 256, 'percentages': 25} + >>> data_vg.sizing(size=512) + {'parts': 2, 'sizes': 512, 'percentages': 50} - lvcreate -L 50G -n gfslv vg0 - ``name``, ``group``, are required. If ``size`` is provided it must follow - lvm's size notation (like 1G, or 20M). Tags are an optional dictionary and is expected to - conform to the convention of prefixing them with "ceph." like:: + :param parts: Number of parts to create LVs from + :param size: Size in gigabytes to divide the VG into - {"ceph.block_device": "/dev/ceph/osd-1"} + :raises SizeAllocationError: When requested size cannot be allocated with + :raises ValueError: If both ``parts`` and ``size`` are given + """ + if parts is not None and size is not None: + raise ValueError( + "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size) + ) - :param uuid_name: Optionally combine the ``name`` with UUID to ensure uniqueness + # if size is given we need to map that to extents so that we avoid + # issues when trying to get this right with a size in gigabytes find + # the percentage first, cheating, because these values are thrown out + vg_free_count = util.str_to_int(self.vg_free_count) + + if size: + extents = int(size * vg_free_count / self.free) + disk_sizing = sizing(self.free, size=size, parts=parts) + else: + if parts is not None: + # Prevent parts being 0, falling back to 1 (100% usage) + parts = parts or 1 + size = int(self.free / parts) + extents = size * vg_free_count / self.free + disk_sizing = sizing(self.free, parts=parts) + + extent_sizing = sizing(vg_free_count, size=extents) + + disk_sizing['extents'] = int(extents) + disk_sizing['percentages'] = extent_sizing['percentages'] + return disk_sizing + + +class VolumeGroups(list): + """ + A list of all known volume groups for the current system, with the ability + to filter them via keyword arguments. """ - if uuid_name: - name = '%s-%s' % (name, uuid.uuid4()) - if tags is None: - tags = { - "ceph.osd_id": "null", - "ceph.type": "null", - "ceph.cluster_fsid": "null", - "ceph.osd_fsid": "null", - } - # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations - type_path_tag = { - 'journal': 'ceph.journal_device', - 'data': 'ceph.data_device', - 'block': 'ceph.block_device', - 'wal': 'ceph.wal_device', - 'db': 'ceph.db_device', - 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery - } - if size: - command = [ - 'lvcreate', - '--yes', - '-L', - '%s' % size, - '-n', name, group - ] - elif extents: - command = [ - 'lvcreate', - '--yes', - '-l', - '%s' % extents, - '-n', name, group - ] - # create the lv with all the space available, this is needed because the - # system call is different for LVM - else: - command = [ - 'lvcreate', - '--yes', - '-l', - '100%FREE', - '-n', name, group - ] - if pv: - command.append(pv) - process.run(command) - - lv = get_lv(lv_name=name, vg_name=group) - lv.set_tags(tags) - - # when creating a distinct type, the caller doesn't know what the path will - # be so this function will set it after creation using the mapping - path_tag = type_path_tag.get(tags.get('ceph.type')) - if path_tag: - lv.set_tags( - {path_tag: lv.lv_path} - ) - return lv - - -def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'): - """ - Create multiple Logical Volumes from a Volume Group by calculating the - proper extents from ``parts`` or ``size``. A custom prefix can be used - (defaults to ``ceph-lv``), these names are always suffixed with a uuid. - - LV creation in ceph-volume will require tags, this is expected to be - pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It - will probably not be the case when mass-creating LVs, so common/default - tags will be set to ``"null"``. - - .. note:: LVs that are not in use can be detected by querying LVM for tags that are - set to ``"null"``. - - :param volume_group: The volume group (vg) to use for LV creation - :type group: ``VolumeGroup()`` object - :param parts: Number of LVs to create *instead of* ``size``. - :type parts: int - :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible" - :type size: int - :param extents: The number of LVM extents to use to create the LV. Useful if looking to have - accurate LV sizes (LVM rounds sizes otherwise) - """ - if parts is None and size is None: - # fallback to just one part (using 100% of the vg) - parts = 1 - lvs = [] - tags = { - "ceph.osd_id": "null", - "ceph.type": "null", - "ceph.cluster_fsid": "null", - "ceph.osd_fsid": "null", - } - sizing = volume_group.sizing(parts=parts, size=size) - for part in range(0, sizing['parts']): - size = sizing['sizes'] - extents = sizing['extents'] - lv_name = '%s-%s' % (name_prefix, uuid.uuid4()) - lvs.append( - create_lv(lv_name, volume_group.name, extents=extents, tags=tags) - ) - return lvs - - -def get_vg(vg_name=None, vg_tags=None): - """ - Return a matching vg for the current system, requires ``vg_name`` or - ``tags``. Raises an error if more than one vg is found. - - It is useful to use ``tags`` when trying to find a specific volume group, - but it can also lead to multiple vgs being found. - """ - if not any([vg_name, vg_tags]): - return None - vgs = VolumeGroups() - return vgs.get(vg_name=vg_name, vg_tags=vg_tags) - - -class VolumeGroups(list): - """ - A list of all known volume groups for the current system, with the ability - to filter them via keyword arguments. - """ - - def __init__(self): - self._populate() + def __init__(self, populate=True): + if populate: + self._populate() def _populate(self): # get all the vgs in the current system @@ -760,15 +709,10 @@ class VolumeGroups(list): """ if not any([vg_name, vg_tags]): raise TypeError('.filter() requires vg_name or vg_tags (none given)') - # first find the filtered volumes with the values in self - filtered_groups = self._filter( - vg_name=vg_name, - vg_tags=vg_tags - ) - # then purge everything - self._purge() - # and add the filtered items - self.extend(filtered_groups) + + filtered_vgs = VolumeGroups(populate=False) + filtered_vgs.extend(self._filter(vg_name, vg_tags)) + return filtered_vgs def get(self, vg_name=None, vg_tags=None): """ @@ -797,318 +741,149 @@ class VolumeGroups(list): return vgs[0] -class Volumes(list): - """ - A list of all known (logical) volumes for the current system, with the ability - to filter them via keyword arguments. +def create_vg(devices, name=None, name_prefix=None): """ + Create a Volume Group. Command looks like:: - def __init__(self): - self._populate() + vgcreate --force --yes group_name device - def _populate(self): - # get all the lvs in the current system - for lv_item in get_api_lvs(): - self.append(Volume(**lv_item)) + Once created the volume group is returned as a ``VolumeGroup`` object - def _purge(self): - """ - Delete all the items in the list, used internally only so that we can - dynamically allocate the items when filtering without the concern of - messing up the contents - """ - self[:] = [] + :param devices: A list of devices to create a VG. Optionally, a single + device (as a string) can be used. + :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}' + :param name_prefix: Optionally prefix the name of the VG, which will get combined + with a UUID string + """ + if isinstance(devices, set): + devices = list(devices) + if not isinstance(devices, list): + devices = [devices] + if name_prefix: + name = "%s-%s" % (name_prefix, str(uuid.uuid4())) + elif name is None: + name = "ceph-%s" % str(uuid.uuid4()) + process.run([ + 'vgcreate', + '-s', + '1G', + '--force', + '--yes', + name] + devices + ) - def _filter(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): - """ - The actual method that filters using a new list. Useful so that other - methods that do not want to alter the contents of the list (e.g. - ``self.find``) can operate safely. - """ - filtered = [i for i in self] - if lv_name: - filtered = [i for i in filtered if i.lv_name == lv_name] + vg = get_vg(vg_name=name) + return vg - if vg_name: - filtered = [i for i in filtered if i.vg_name == vg_name] - if lv_uuid: - filtered = [i for i in filtered if i.lv_uuid == lv_uuid] +def extend_vg(vg, devices): + """ + Extend a Volume Group. Command looks like:: - if lv_path: - filtered = [i for i in filtered if i.lv_path == lv_path] + vgextend --force --yes group_name [device, ...] - # at this point, `filtered` has either all the volumes in self or is an - # actual filtered list if any filters were applied - if lv_tags: - tag_filtered = [] - for volume in filtered: - # all the tags we got need to match on the volume - matches = all(volume.tags.get(k) == str(v) for k, v in lv_tags.items()) - if matches: - tag_filtered.append(volume) - return tag_filtered + Once created the volume group is extended and returned as a ``VolumeGroup`` object - return filtered + :param vg: A VolumeGroup object + :param devices: A list of devices to extend the VG. Optionally, a single + device (as a string) can be used. + """ + if not isinstance(devices, list): + devices = [devices] + process.run([ + 'vgextend', + '--force', + '--yes', + vg.name] + devices + ) - def filter(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): - """ - Filter out volumes on top level attributes like ``lv_name`` or by - ``lv_tags`` where a dict is required. For example, to find a volume - that has an OSD ID of 0, the filter would look like:: + vg = get_vg(vg_name=vg.name) + return vg - lv_tags={'ceph.osd_id': '0'} - """ - if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]): - raise TypeError('.filter() requires lv_name, vg_name, lv_path, lv_uuid, or tags (none given)') - # first find the filtered volumes with the values in self - filtered_volumes = self._filter( - lv_name=lv_name, - vg_name=vg_name, - lv_path=lv_path, - lv_uuid=lv_uuid, - lv_tags=lv_tags - ) - # then purge everything - self._purge() - # and add the filtered items - self.extend(filtered_volumes) +def reduce_vg(vg, devices): + """ + Reduce a Volume Group. Command looks like:: - def get(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): - """ - This is a bit expensive, since it will try to filter out all the - matching items in the list, filter them out applying anything that was - added and return the matching item. + vgreduce --force --yes group_name [device, ...] - This method does *not* alter the list, and it will raise an error if - multiple LVs are matched + :param vg: A VolumeGroup object + :param devices: A list of devices to remove from the VG. Optionally, a + single device (as a string) can be used. + """ + if not isinstance(devices, list): + devices = [devices] + process.run([ + 'vgreduce', + '--force', + '--yes', + vg.name] + devices + ) - It is useful to use ``tags`` when trying to find a specific logical volume, - but it can also lead to multiple lvs being found, since a lot of metadata - is shared between lvs of a distinct OSD. - """ - if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]): - return None - lvs = self._filter( - lv_name=lv_name, - vg_name=vg_name, - lv_path=lv_path, - lv_uuid=lv_uuid, - lv_tags=lv_tags - ) - if not lvs: - return None - if len(lvs) > 1: - raise MultipleLVsError(lv_name, lv_path) - return lvs[0] + vg = get_vg(vg_name=vg.name) + return vg -class PVolumes(list): +def remove_vg(vg_name): """ - A list of all known (physical) volumes for the current system, with the ability - to filter them via keyword arguments. + Removes a volume group. """ - - def __init__(self): - self._populate() - - def _populate(self): - # get all the pvs in the current system - for pv_item in get_api_pvs(): - self.append(PVolume(**pv_item)) - - def _purge(self): - """ - Deplete all the items in the list, used internally only so that we can - dynamically allocate the items when filtering without the concern of - messing up the contents - """ - self[:] = [] - - def _filter(self, pv_name=None, pv_uuid=None, pv_tags=None): - """ - The actual method that filters using a new list. Useful so that other - methods that do not want to alter the contents of the list (e.g. - ``self.find``) can operate safely. - """ - filtered = [i for i in self] - if pv_name: - filtered = [i for i in filtered if i.pv_name == pv_name] - - if pv_uuid: - filtered = [i for i in filtered if i.pv_uuid == pv_uuid] - - # at this point, `filtered` has either all the physical volumes in self - # or is an actual filtered list if any filters were applied - if pv_tags: - tag_filtered = [] - for pvolume in filtered: - matches = all(pvolume.tags.get(k) == str(v) for k, v in pv_tags.items()) - if matches: - tag_filtered.append(pvolume) - # return the tag_filtered pvolumes here, the `filtered` list is no - # longer usable - return tag_filtered - - return filtered - - def filter(self, pv_name=None, pv_uuid=None, pv_tags=None): - """ - Filter out volumes on top level attributes like ``pv_name`` or by - ``pv_tags`` where a dict is required. For example, to find a physical volume - that has an OSD ID of 0, the filter would look like:: - - pv_tags={'ceph.osd_id': '0'} - - """ - if not any([pv_name, pv_uuid, pv_tags]): - raise TypeError('.filter() requires pv_name, pv_uuid, or pv_tags (none given)') - # first find the filtered volumes with the values in self - filtered_volumes = self._filter( - pv_name=pv_name, - pv_uuid=pv_uuid, - pv_tags=pv_tags - ) - # then purge everything - self._purge() - # and add the filtered items - self.extend(filtered_volumes) - - def get(self, pv_name=None, pv_uuid=None, pv_tags=None): - """ - This is a bit expensive, since it will try to filter out all the - matching items in the list, filter them out applying anything that was - added and return the matching item. - - This method does *not* alter the list, and it will raise an error if - multiple pvs are matched - - It is useful to use ``tags`` when trying to find a specific logical volume, - but it can also lead to multiple pvs being found, since a lot of metadata - is shared between pvs of a distinct OSD. - """ - if not any([pv_name, pv_uuid, pv_tags]): - return None - pvs = self._filter( - pv_name=pv_name, - pv_uuid=pv_uuid, - pv_tags=pv_tags - ) - if not pvs: - return None - if len(pvs) > 1 and pv_tags: - raise MultiplePVsError(pv_name) - return pvs[0] + if not vg_name: + logger.warning('Skipping removal of invalid VG name: "%s"', vg_name) + return + fail_msg = "Unable to remove vg %s" % vg_name + process.run( + [ + 'vgremove', + '-v', # verbose + '-f', # force it + vg_name + ], + fail_msg=fail_msg, + ) -class VolumeGroup(object): +def get_vg(vg_name=None, vg_tags=None, vgs=None): """ - Represents an LVM group, with some top-level attributes like ``vg_name`` - """ - - def __init__(self, **kw): - for k, v in kw.items(): - setattr(self, k, v) - self.name = kw['vg_name'] - self.tags = parse_tags(kw.get('vg_tags', '')) - - def __str__(self): - return '<%s>' % self.name - - def __repr__(self): - return self.__str__() - - def _parse_size(self, size): - error_msg = "Unable to convert vg size to integer: '%s'" % str(size) - try: - integer, _ = size.split('g') - except ValueError: - logger.exception(error_msg) - raise RuntimeError(error_msg) - - return util.str_to_int(integer) - - @property - def free(self): - """ - Parse the available size in gigabytes from the ``vg_free`` attribute, that - will be a string with a character ('g') to indicate gigabytes in size. - Returns a rounded down integer to ease internal operations:: - - >>> data_vg.vg_free - '0.01g' - >>> data_vg.size - 0 - """ - return self._parse_size(self.vg_free) - - @property - def size(self): - """ - Parse the size in gigabytes from the ``vg_size`` attribute, that - will be a string with a character ('g') to indicate gigabytes in size. - Returns a rounded down integer to ease internal operations:: - - >>> data_vg.vg_size - '1024.9g' - >>> data_vg.size - 1024 - """ - return self._parse_size(self.vg_size) - - def sizing(self, parts=None, size=None): - """ - Calculate proper sizing to fully utilize the volume group in the most - efficient way possible. To prevent situations where LVM might accept - a percentage that is beyond the vg's capabilities, it will refuse with - an error when requesting a larger-than-possible parameter, in addition - to rounding down calculations. + Return a matching vg for the current system, requires ``vg_name`` or + ``tags``. Raises an error if more than one vg is found. - A dictionary with different sizing parameters is returned, to make it - easier for others to choose what they need in order to create logical - volumes:: + It is useful to use ``tags`` when trying to find a specific volume group, + but it can also lead to multiple vgs being found. + """ + if not any([vg_name, vg_tags]): + return None + if vgs is None or len(vgs) == 0: + vgs = VolumeGroups() - >>> data_vg.free - 1024 - >>> data_vg.sizing(parts=4) - {'parts': 4, 'sizes': 256, 'percentages': 25} - >>> data_vg.sizing(size=512) - {'parts': 2, 'sizes': 512, 'percentages': 50} + return vgs.get(vg_name=vg_name, vg_tags=vg_tags) - :param parts: Number of parts to create LVs from - :param size: Size in gigabytes to divide the VG into +################################# +# +# Code for LVM Logical Volumes +# +############################### - :raises SizeAllocationError: When requested size cannot be allocated with - :raises ValueError: If both ``parts`` and ``size`` are given - """ - if parts is not None and size is not None: - raise ValueError( - "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size) - ) - # if size is given we need to map that to extents so that we avoid - # issues when trying to get this right with a size in gigabytes find - # the percentage first, cheating, because these values are thrown out - vg_free_count = util.str_to_int(self.vg_free_count) +def get_api_lvs(): + """ + Return the list of logical volumes available in the system using flags to include common + metadata associated with them - if size: - extents = int(size * vg_free_count / self.free) - disk_sizing = sizing(self.free, size=size, parts=parts) - else: - if parts is not None: - # Prevent parts being 0, falling back to 1 (100% usage) - parts = parts or 1 - size = int(self.free / parts) - extents = size * vg_free_count / self.free - disk_sizing = sizing(self.free, parts=parts) + Command and delimited output should look like:: - extent_sizing = sizing(vg_free_count, size=extents) + $ lvs --noheadings --readonly --separator=';' -a -o lv_tags,lv_path,lv_name,vg_name + ;/dev/ubuntubox-vg/root;root;ubuntubox-vg + ;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg - disk_sizing['extents'] = int(extents) - disk_sizing['percentages'] = extent_sizing['percentages'] - return disk_sizing + """ + fields = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size' + stdout, stderr, returncode = process.call( + ['lvs', '--noheadings', '--readonly', '--separator=";"', '-a', '-o', fields], + verbose_on_failure=False + ) + return _output_parser(stdout, fields) class Volume(object): @@ -1209,64 +984,309 @@ class Volume(object): self.tags[key] = value -class PVolume(object): +class Volumes(list): """ - Represents a Physical Volume from LVM, with some top-level attributes like - ``pv_name`` and parsed tags as a dictionary of key/value pairs. + A list of all known (logical) volumes for the current system, with the ability + to filter them via keyword arguments. """ - def __init__(self, **kw): - for k, v in kw.items(): - setattr(self, k, v) - self.pv_api = kw - self.name = kw['pv_name'] - self.tags = parse_tags(kw['pv_tags']) - - def __str__(self): - return '<%s>' % self.pv_api['pv_name'] + def __init__(self): + self._populate() - def __repr__(self): - return self.__str__() + def _populate(self): + # get all the lvs in the current system + for lv_item in get_api_lvs(): + self.append(Volume(**lv_item)) - def set_tags(self, tags): + def _purge(self): """ - :param tags: A dictionary of tag names and values, like:: + Delete all the items in the list, used internally only so that we can + dynamically allocate the items when filtering without the concern of + messing up the contents + """ + self[:] = [] - { - "ceph.osd_fsid": "aaa-fff-bbbb", - "ceph.osd_id": "0" - } + def _filter(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): + """ + The actual method that filters using a new list. Useful so that other + methods that do not want to alter the contents of the list (e.g. + ``self.find``) can operate safely. + """ + filtered = [i for i in self] + if lv_name: + filtered = [i for i in filtered if i.lv_name == lv_name] - At the end of all modifications, the tags are refreshed to reflect - LVM's most current view. + if vg_name: + filtered = [i for i in filtered if i.vg_name == vg_name] + + if lv_uuid: + filtered = [i for i in filtered if i.lv_uuid == lv_uuid] + + if lv_path: + filtered = [i for i in filtered if i.lv_path == lv_path] + + # at this point, `filtered` has either all the volumes in self or is an + # actual filtered list if any filters were applied + if lv_tags: + tag_filtered = [] + for volume in filtered: + # all the tags we got need to match on the volume + matches = all(volume.tags.get(k) == str(v) for k, v in lv_tags.items()) + if matches: + tag_filtered.append(volume) + return tag_filtered + + return filtered + + def filter(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): """ - for k, v in tags.items(): - self.set_tag(k, v) - # after setting all the tags, refresh them for the current object, use the - # pv_* identifiers to filter because those shouldn't change - pv_object = get_pv(pv_name=self.pv_name, pv_uuid=self.pv_uuid) - self.tags = pv_object.tags + Filter out volumes on top level attributes like ``lv_name`` or by + ``lv_tags`` where a dict is required. For example, to find a volume + that has an OSD ID of 0, the filter would look like:: + + lv_tags={'ceph.osd_id': '0'} - def set_tag(self, key, value): """ - Set the key/value pair as an LVM tag. Does not "refresh" the values of - the current object for its tags. Meant to be a "fire and forget" type - of modification. + if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]): + raise TypeError('.filter() requires lv_name, vg_name, lv_path, lv_uuid, or tags (none given)') + # first find the filtered volumes with the values in self + filtered_volumes = self._filter( + lv_name=lv_name, + vg_name=vg_name, + lv_path=lv_path, + lv_uuid=lv_uuid, + lv_tags=lv_tags + ) + # then purge everything + self._purge() + # and add the filtered items + self.extend(filtered_volumes) - **warning**: Altering tags on a PV has to be done ensuring that the - device is actually the one intended. ``pv_name`` is *not* a persistent - value, only ``pv_uuid`` is. Using ``pv_uuid`` is the best way to make - sure the device getting changed is the one needed. + def get(self, lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None): """ - # remove it first if it exists - if self.tags.get(key): - current_value = self.tags[key] - tag = "%s=%s" % (key, current_value) - process.call(['pvchange', '--deltag', tag, self.pv_name]) + This is a bit expensive, since it will try to filter out all the + matching items in the list, filter them out applying anything that was + added and return the matching item. - process.call( - [ - 'pvchange', - '--addtag', '%s=%s' % (key, value), self.pv_name - ] + This method does *not* alter the list, and it will raise an error if + multiple LVs are matched + + It is useful to use ``tags`` when trying to find a specific logical volume, + but it can also lead to multiple lvs being found, since a lot of metadata + is shared between lvs of a distinct OSD. + """ + if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]): + return None + lvs = self._filter( + lv_name=lv_name, + vg_name=vg_name, + lv_path=lv_path, + lv_uuid=lv_uuid, + lv_tags=lv_tags ) + if not lvs: + return None + if len(lvs) > 1: + raise MultipleLVsError(lv_name, lv_path) + return lvs[0] + + +def create_lv(name, group, extents=None, size=None, tags=None, uuid_name=False, pv=None): + """ + Create a Logical Volume in a Volume Group. Command looks like:: + + lvcreate -L 50G -n gfslv vg0 + + ``name``, ``group``, are required. If ``size`` is provided it must follow + lvm's size notation (like 1G, or 20M). Tags are an optional dictionary and is expected to + conform to the convention of prefixing them with "ceph." like:: + + {"ceph.block_device": "/dev/ceph/osd-1"} + + :param uuid_name: Optionally combine the ``name`` with UUID to ensure uniqueness + """ + if uuid_name: + name = '%s-%s' % (name, uuid.uuid4()) + if tags is None: + tags = { + "ceph.osd_id": "null", + "ceph.type": "null", + "ceph.cluster_fsid": "null", + "ceph.osd_fsid": "null", + } + + # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations + type_path_tag = { + 'journal': 'ceph.journal_device', + 'data': 'ceph.data_device', + 'block': 'ceph.block_device', + 'wal': 'ceph.wal_device', + 'db': 'ceph.db_device', + 'lockbox': 'ceph.lockbox_device', # XXX might not ever need this lockbox sorcery + } + if size: + command = [ + 'lvcreate', + '--yes', + '-L', + '%s' % size, + '-n', name, group + ] + elif extents: + command = [ + 'lvcreate', + '--yes', + '-l', + '%s' % extents, + '-n', name, group + ] + # create the lv with all the space available, this is needed because the + # system call is different for LVM + else: + command = [ + 'lvcreate', + '--yes', + '-l', + '100%FREE', + '-n', name, group + ] + if pv: + command.append(pv) + process.run(command) + + lv = get_lv(lv_name=name, vg_name=group) + lv.set_tags(tags) + + # when creating a distinct type, the caller doesn't know what the path will + # be so this function will set it after creation using the mapping + path_tag = type_path_tag.get(tags.get('ceph.type')) + if path_tag: + lv.set_tags( + {path_tag: lv.lv_path} + ) + return lv + + +def remove_lv(lv): + """ + Removes a logical volume given it's absolute path. + + Will return True if the lv is successfully removed or + raises a RuntimeError if the removal fails. + + :param lv: A ``Volume`` object or the path for an LV + """ + if isinstance(lv, Volume): + path = lv.lv_path + else: + path = lv + + stdout, stderr, returncode = process.call( + [ + 'lvremove', + '-v', # verbose + '-f', # force it + path + ], + show_command=True, + terminal_verbose=True, + ) + if returncode != 0: + raise RuntimeError("Unable to remove %s" % path) + return True + + +def is_lv(dev, lvs=None): + """ + Boolean to detect if a device is an LV or not. + """ + splitname = dmsetup_splitname(dev) + # Allowing to optionally pass `lvs` can help reduce repetitive checks for + # multiple devices at once. + if lvs is None or len(lvs) == 0: + lvs = Volumes() + + if splitname.get('LV_NAME'): + lvs.filter(lv_name=splitname['LV_NAME'], vg_name=splitname['VG_NAME']) + return len(lvs) > 0 + return False + + +def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_uuid=None, lv_tags=None, lvs=None): + """ + Return a matching lv for the current system, requiring ``lv_name``, + ``vg_name``, ``lv_path`` or ``tags``. Raises an error if more than one lv + is found. + + It is useful to use ``tags`` when trying to find a specific logical volume, + but it can also lead to multiple lvs being found, since a lot of metadata + is shared between lvs of a distinct OSD. + """ + if not any([lv_name, vg_name, lv_path, lv_uuid, lv_tags]): + return None + if lvs is None: + lvs = Volumes() + return lvs.get( + lv_name=lv_name, vg_name=vg_name, lv_path=lv_path, lv_uuid=lv_uuid, + lv_tags=lv_tags + ) + + +def get_lv_from_argument(argument): + """ + Helper proxy function that consumes a possible logical volume passed in from the CLI + in the form of `vg/lv`, but with some validation so that an argument that is a full + path to a device can be ignored + """ + if argument.startswith('/'): + lv = get_lv(lv_path=argument) + return lv + try: + vg_name, lv_name = argument.split('/') + except (ValueError, AttributeError): + return None + return get_lv(lv_name=lv_name, vg_name=vg_name) + + +def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'): + """ + Create multiple Logical Volumes from a Volume Group by calculating the + proper extents from ``parts`` or ``size``. A custom prefix can be used + (defaults to ``ceph-lv``), these names are always suffixed with a uuid. + + LV creation in ceph-volume will require tags, this is expected to be + pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It + will probably not be the case when mass-creating LVs, so common/default + tags will be set to ``"null"``. + + .. note:: LVs that are not in use can be detected by querying LVM for tags that are + set to ``"null"``. + + :param volume_group: The volume group (vg) to use for LV creation + :type group: ``VolumeGroup()`` object + :param parts: Number of LVs to create *instead of* ``size``. + :type parts: int + :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible" + :type size: int + :param extents: The number of LVM extents to use to create the LV. Useful if looking to have + accurate LV sizes (LVM rounds sizes otherwise) + """ + if parts is None and size is None: + # fallback to just one part (using 100% of the vg) + parts = 1 + lvs = [] + tags = { + "ceph.osd_id": "null", + "ceph.type": "null", + "ceph.cluster_fsid": "null", + "ceph.osd_fsid": "null", + } + sizing = volume_group.sizing(parts=parts, size=size) + for part in range(0, sizing['parts']): + size = sizing['sizes'] + extents = sizing['extents'] + lv_name = '%s-%s' % (name_prefix, uuid.uuid4()) + lvs.append( + create_lv(lv_name, volume_group.name, extents=extents, tags=tags) + ) + return lvs diff --git a/ceph/src/ceph-volume/ceph_volume/configuration.py b/ceph/src/ceph-volume/ceph_volume/configuration.py index 6379ef67a..2fee47ffa 100644 --- a/ceph/src/ceph-volume/ceph_volume/configuration.py +++ b/ceph/src/ceph-volume/ceph_volume/configuration.py @@ -1,13 +1,19 @@ -try: - import configparser -except ImportError: - import ConfigParser as configparser import contextlib import logging import os import re from ceph_volume import terminal, conf from ceph_volume import exceptions +from sys import version_info as sys_version_info + +if sys_version_info.major >= 3: + import configparser + conf_parentclass = configparser.ConfigParser +elif sys_version_info.major < 3: + import ConfigParser as configparser + conf_parentclass = configparser.SafeConfigParser +else: + raise RuntimeError('Not expecting python version > 3 yet.') logger = logging.getLogger(__name__) @@ -50,7 +56,7 @@ def load(abspath=None): ceph_file = open(abspath) trimmed_conf = _TrimIndentFile(ceph_file) with contextlib.closing(ceph_file): - parser.readfp(trimmed_conf) + parser.read_conf(trimmed_conf) conf.ceph = parser return parser except configparser.ParsingError as error: @@ -59,9 +65,9 @@ def load(abspath=None): raise RuntimeError('Unable to read configuration file: %s' % abspath) -class Conf(configparser.SafeConfigParser): +class Conf(conf_parentclass): """ - Subclasses from SafeConfigParser to give a few helpers for Ceph + Subclasses from ConfigParser to give a few helpers for Ceph configuration. """ @@ -215,3 +221,11 @@ class Conf(configparser.SafeConfigParser): for name, val in options.items(): if isinstance(val, list): options[name] = '\n'.join(val) + + def read_conf(self, conffile): + if sys_version_info.major >= 3: + self.read_file(conffile) + elif sys_version_info.major < 3: + self.readfp(conffile) + else: + raise RuntimeError('Not expecting python version > 3 yet.') diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py index 968759363..f3416472a 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py @@ -112,8 +112,8 @@ class List(object): def list(self, args): # ensure everything is up to date before calling out # to list lv's - self.update() - report = self.generate(args) + lvs = self.update() + report = self.generate(args, lvs) if args.format == 'json': # If the report is empty, we don't return a non-zero exit status # because it is assumed this is going to be consumed by automated @@ -153,25 +153,27 @@ class List(object): # this means that the device has changed, so it must be updated # on the API to reflect this lv.set_tags({device_name: disk_device}) + return lvs - def generate(self, args): + def generate(self, args, lvs=None): """ Generate reports for an individual device or for all Ceph-related devices, logical or physical, as long as they have been prepared by this tool before and contain enough metadata. """ if args.device: - return self.single_report(args.device) + return self.single_report(args.device, lvs) else: - return self.full_report() + return self.full_report(lvs) - def single_report(self, device): + def single_report(self, device, lvs=None): """ Generate a report for a single device. This can be either a logical volume in the form of vg/lv or a device with an absolute path like /dev/sda1 or /dev/sda """ - lvs = api.Volumes() + if lvs is None: + lvs = api.Volumes() report = {} lv = api.get_lv_from_argument(device) @@ -227,6 +229,7 @@ class List(object): if lvs is None: lvs = api.Volumes() report = {} + for lv in lvs: try: _id = lv.tags['ceph.osd_id'] @@ -246,7 +249,7 @@ class List(object): # bluestore will not have a journal, filestore will not have # a block/wal/db, so we must skip if not present continue - if not api.get_lv(lv_uuid=device_uuid): + if not api.get_lv(lv_uuid=device_uuid, lvs=lvs): # means we have a regular device, so query blkid disk_device = disk.get_device_from_partuuid(device_uuid) if disk_device: diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py index 328a03615..bca32ac88 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py @@ -1,12 +1,13 @@ import argparse import os import logging +import time from textwrap import dedent from ceph_volume import decorators, terminal, process from ceph_volume.api import lvm as api -from ceph_volume.util import system, encryption, disk, arg_validators +from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int from ceph_volume.util.device import Device from ceph_volume.systemd import systemctl @@ -17,12 +18,38 @@ mlogger = terminal.MultiLogger(__name__) def wipefs(path): """ Removes the filesystem from an lv or partition. + + Environment variables supported:: + + * ``CEPH_VOLUME_WIPEFS_TRIES``: Defaults to 8 + * ``CEPH_VOLUME_WIPEFS_INTERVAL``: Defaults to 5 + """ - process.run([ - 'wipefs', - '--all', - path - ]) + tries = str_to_int( + os.environ.get('CEPH_VOLUME_WIPEFS_TRIES', 8) + ) + interval = str_to_int( + os.environ.get('CEPH_VOLUME_WIPEFS_INTERVAL', 5) + ) + + for trying in range(tries): + stdout, stderr, exit_code = process.call([ + 'wipefs', + '--all', + path + ]) + if exit_code != 0: + # this could narrow the retry by poking in the stderr of the output + # to verify that 'probing initialization failed' appears, but + # better to be broad in this retry to prevent missing on + # a different message that needs to be retried as well + terminal.warning( + 'failed to wipefs device, will try again to workaround probable race condition' + ) + time.sleep(interval) + else: + return + raise RuntimeError("could not complete wipefs on device: %s" % path) def zap_data(path): @@ -77,7 +104,7 @@ def ensure_associated_lvs(lvs): wal_lvs = lvs._filter(lv_tags={'ceph.type': 'wal'}) backing_devices = [ (journal_lvs, 'journal'), - (db_lvs, 'block'), + (db_lvs, 'db'), (wal_lvs, 'wal') ] @@ -243,8 +270,9 @@ class Zap(object): "Zapping successful for: %s" % ", ".join([str(d) for d in self.args.devices]) ) else: + identifier = self.args.osd_id or self.args.osd_fsid terminal.success( - "Zapping successful for OSD: %s" % self.args.osd_id or self.args.osd_fsid + "Zapping successful for OSD: %s" % identifier ) @decorators.needs_root diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/main.py b/ceph/src/ceph-volume/ceph_volume/systemd/main.py index bf24f0a01..e4d244f5d 100644 --- a/ceph/src/ceph-volume/ceph_volume/systemd/main.py +++ b/ceph/src/ceph-volume/ceph_volume/systemd/main.py @@ -99,7 +99,7 @@ def main(args=None): # don't log any output to the terminal, just rely on stderr/stdout # going to logging process.run(command, terminal_logging=False) - logger.info('successfully trggered activation for: %s', extra_data) + logger.info('successfully triggered activation for: %s', extra_data) break except RuntimeError as error: logger.warning(error) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py b/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py index ddf99f6ae..223ac5013 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py @@ -186,8 +186,8 @@ class TestPVolumes(object): pv_name='/dev/vg/foo', pv_uuid='1111', pv_tags=pv_tags, vg_name='vg') pvolumes.append(FooPVolume) - pvolumes.filter(pv_tags={'ceph.type': 'journal', 'ceph.osd_id': '2'}) - assert pvolumes == [] + assert pvolumes.filter(pv_tags={'ceph.type': 'journal', + 'ceph.osd_id': '2'}) == [] def test_filter_by_tags_matches(self, pvolumes, monkeypatch): pv_tags = "ceph.type=journal,ceph.osd_id=1" @@ -195,8 +195,8 @@ class TestPVolumes(object): pv_name='/dev/vg/foo', pv_uuid='1111', pv_tags=pv_tags, vg_name="vg") pvolumes.append(FooPVolume) - pvolumes.filter(pv_tags={'ceph.type': 'journal', 'ceph.osd_id': '1'}) - assert pvolumes == [FooPVolume] + assert pvolumes.filter(pv_tags={'ceph.type': 'journal', + 'ceph.osd_id': '1'}) == [FooPVolume] class TestGetVG(object): @@ -337,7 +337,7 @@ class TestVolumeGroups(object): journal = api.VolumeGroup(vg_name='volume2', vg_tags='ceph.group=plain') volume_groups.append(osd) volume_groups.append(journal) - volume_groups.filter(vg_tags={'ceph.group': 'dmcache'}) + volume_groups = volume_groups.filter(vg_tags={'ceph.group': 'dmcache'}) assert len(volume_groups) == 1 assert volume_groups[0].vg_name == 'volume1' @@ -345,7 +345,7 @@ class TestVolumeGroups(object): vg_tags = "ceph.group=dmcache,ceph.disk_type=ssd" osd = api.VolumeGroup(vg_name='volume1', vg_path='/dev/vg/lv', vg_tags=vg_tags) volume_groups.append(osd) - volume_groups.filter(vg_tags={'ceph.group': 'data', 'ceph.disk_type': 'ssd'}) + volume_groups = volume_groups.filter(vg_tags={'ceph.group': 'data', 'ceph.disk_type': 'ssd'}) assert volume_groups == [] def test_filter_by_vg_name(self, volume_groups): @@ -354,13 +354,13 @@ class TestVolumeGroups(object): journal = api.VolumeGroup(vg_name='volume2', vg_tags='ceph.type=journal') volume_groups.append(osd) volume_groups.append(journal) - volume_groups.filter(vg_name='ceph_vg') + volume_groups = volume_groups.filter(vg_name='ceph_vg') assert len(volume_groups) == 1 assert volume_groups[0].vg_name == 'ceph_vg' def test_filter_requires_params(self, volume_groups): with pytest.raises(TypeError): - volume_groups.filter() + volume_groups = volume_groups.filter() class TestVolumeGroupFree(object): @@ -928,6 +928,7 @@ class TestSplitNameParser(object): class TestIsLV(object): def test_is_not_an_lv(self, monkeypatch): + monkeypatch.setattr(api.process, 'call', lambda x, **kw: ('', '', 0)) monkeypatch.setattr(api, 'dmsetup_splitname', lambda x, **kw: {}) assert api.is_lv('/dev/sda1', lvs=[]) is False diff --git a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py index fad7df44e..b108ce2f1 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py @@ -153,6 +153,10 @@ def volume_groups(monkeypatch): vgs._purge() return vgs +def volume_groups_empty(monkeypatch): + monkeypatch.setattr('ceph_volume.process.call', lambda x, **kw: ('', '', 0)) + vgs = lvm_api.VolumeGroups(populate=False) + return vgs @pytest.fixture def stub_vgs(monkeypatch, volume_groups): @@ -167,6 +171,12 @@ def pvolumes(monkeypatch): pvolumes = lvm_api.PVolumes() pvolumes._purge() return pvolumes +@pytest.fixture +def pvolumes_empty(monkeypatch): + monkeypatch.setattr('ceph_volume.process.call', lambda x, **kw: ('', '', 0)) + pvolumes = lvm_api.PVolumes(populate=False) + return pvolumes + @pytest.fixture diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/strategies/test_bluestore.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/strategies/test_bluestore.py index 8df5840b6..69be0d5c0 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/strategies/test_bluestore.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/strategies/test_bluestore.py @@ -1,5 +1,6 @@ import pytest from ceph_volume.devices.lvm.strategies import bluestore +from ceph_volume.api import lvm class TestSingleType(object): @@ -51,7 +52,7 @@ class TestSingleType(object): class TestMixedType(object): - def test_filter_all_data_devs(self, fakedevice, factory): + def test_filter_all_data_devs(self, fakedevice, factory, monkeypatch): # in this scenario the user passed a already used device to be used for # data and an unused device to be used as db device. db_dev = fakedevice(used_by_ceph=False, is_lvm_member=False, rotational=False, sys_api=dict(size=6073740000)) @@ -59,6 +60,7 @@ class TestMixedType(object): args = factory(filtered_devices=[data_dev], osds_per_device=1, block_db_size=None, block_wal_size=None, osd_ids=[]) + monkeypatch.setattr(lvm, 'VolumeGroup', lambda x, **kw: []) bluestore.MixedType(args, [], [db_dev], []) diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py index 1af1498f0..efbb460f9 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py @@ -79,6 +79,23 @@ class TestList(object): with pytest.raises(SystemExit): lvm.listing.List([]).list(args) + def test_lvs_list_is_created_just_once(self, monkeypatch, is_root, volumes, factory): + api.volumes_obj_create_count = 0 + + def monkey_populate(self): + api.volumes_obj_create_count += 1 + for lv_item in api.get_api_lvs(): + self.append(api.Volume(**lv_item)) + monkeypatch.setattr(api.Volumes, '_populate', monkey_populate) + + args = factory(format='pretty', device='/dev/sda1') + with pytest.raises(SystemExit): + lvm.listing.List([]).list(args) + + # XXX: Ideally, the count should be just 1. Volumes._populate() is + # being called thrice out of which only twice is moneky_populate. + assert api.volumes_obj_create_count == 2 + class TestFullReport(object): diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py index 55daa4f87..20ca56b54 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py @@ -1,3 +1,4 @@ +import os import pytest from ceph_volume.api import lvm as api from ceph_volume.devices.lvm import zap @@ -90,6 +91,22 @@ class TestEnsureAssociatedLVs(object): result = zap.ensure_associated_lvs(volumes) assert result == ['/dev/VolGroup/block'] + def test_success_message_for_fsid(self, factory, is_root, capsys): + cli_zap = zap.Zap([]) + args = factory(devices=[], osd_id=None, osd_fsid='asdf-lkjh') + cli_zap.args = args + cli_zap.zap() + out, err = capsys.readouterr() + assert "Zapping successful for OSD: asdf-lkjh" in err + + def test_success_message_for_id(self, factory, is_root, capsys): + cli_zap = zap.Zap([]) + args = factory(devices=[], osd_id='1', osd_fsid=None) + cli_zap.args = args + cli_zap.zap() + out, err = capsys.readouterr() + assert "Zapping successful for OSD: 1" in err + def test_block_and_partition_are_found(self, volumes, monkeypatch): monkeypatch.setattr(zap.disk, 'get_device_from_partuuid', lambda x: '/dev/sdb1') tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=block' @@ -151,3 +168,26 @@ class TestEnsureAssociatedLVs(object): assert '/dev/VolGroup/lvjournal' in result assert '/dev/VolGroup/lvwal' in result assert '/dev/VolGroup/lvdb' in result + + +class TestWipeFs(object): + + def setup(self): + os.environ['CEPH_VOLUME_WIPEFS_INTERVAL'] = '0' + + def test_works_on_second_try(self, stub_call): + os.environ['CEPH_VOLUME_WIPEFS_TRIES'] = '2' + stub_call([('wiping /dev/sda', '', 1), ('', '', 0)]) + result = zap.wipefs('/dev/sda') + assert result is None + + def test_does_not_work_after_several_tries(self, stub_call): + os.environ['CEPH_VOLUME_WIPEFS_TRIES'] = '2' + stub_call([('wiping /dev/sda', '', 1), ('', '', 1)]) + with pytest.raises(RuntimeError): + zap.wipefs('/dev/sda') + + def test_does_not_work_default_tries(self, stub_call): + stub_call([('wiping /dev/sda', '', 1)]*8) + with pytest.raises(RuntimeError): + zap.wipefs('/dev/sda') diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml index c0fe8fea9..0b65a1725 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml @@ -17,6 +17,9 @@ delegate_facts_host: True dashboard_enabled: False + environment: + DEBIAN_FRONTEND: noninteractive + pre_tasks: # If we can't get python2 installed before any module is used we will fail # so just try what we can to get it installed @@ -32,6 +35,17 @@ when: - systempython2.stat is undefined or systempython2.stat.exists == false + # Ansible will try to auto-install python-apt, in some systems this might be + # python3-apt, or python-apt, and it has caused whole runs to fail because + # it is trying to do an interactive prompt + - name: install python-apt and aptitude in debian based systems + raw: sudo apt-get -y install "{{ item }}" + ignore_errors: yes + with_items: + - python3-apt + - python-apt + - aptitude + - name: install python2 for fedora raw: sudo dnf -y install python creates=/usr/bin/python ignore_errors: yes diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py b/ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py index 2e26ead7c..9af6cd9be 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/test_configuration.py @@ -28,13 +28,13 @@ class TestConf(object): def test_get_non_existing_list(self): cfg = configuration.Conf() cfg.is_valid = lambda: True - cfg.readfp(self.conf_file) + cfg.read_conf(self.conf_file) assert cfg.get_list('global', 'key') == [] def test_get_non_existing_list_get_default(self): cfg = configuration.Conf() cfg.is_valid = lambda: True - cfg.readfp(self.conf_file) + cfg.read_conf(self.conf_file) assert cfg.get_list('global', 'key', ['a']) == ['a'] def test_get_rid_of_comments(self): @@ -45,7 +45,7 @@ class TestConf(object): default = 0 # this is a comment """)) - cfg.readfp(conf_file) + cfg.read_conf(conf_file) assert cfg.get_list('foo', 'default') == ['0'] def test_gets_split_on_commas(self): @@ -56,7 +56,7 @@ class TestConf(object): default = 0,1,2,3 # this is a comment """)) - cfg.readfp(conf_file) + cfg.read_conf(conf_file) assert cfg.get_list('foo', 'default') == ['0', '1', '2', '3'] def test_spaces_and_tabs_are_ignored(self): @@ -67,7 +67,7 @@ class TestConf(object): default = 0, 1, 2 ,3 # this is a comment """)) - cfg.readfp(conf_file) + cfg.read_conf(conf_file) assert cfg.get_list('foo', 'default') == ['0', '1', '2', '3'] diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_inventory.py b/ceph/src/ceph-volume/ceph_volume/tests/test_inventory.py index 71cb027ed..9721fccdb 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/test_inventory.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/test_inventory.py @@ -25,7 +25,8 @@ def device_report_keys(device_info): 'sectorsize': '512', 'size': 1999844147200.0, 'support_discard': '', - 'vendor': 'DELL'} + 'vendor': 'DELL', + 'device_id': 'Vendor-Model-Serial'} } ) report = Devices().json_report()[0] @@ -67,6 +68,7 @@ class TestInventory(object): 'sys_api', 'available', 'lvs', + 'device_id', ] expected_sys_api_keys = [ diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py index 3a098d7ed..7477777bc 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py @@ -32,19 +32,19 @@ class TestDevice(object): disk = device.Device("vg/lv") assert disk.is_lv - def test_vgs_is_empty(self, device_info, pvolumes, monkeypatch): + def test_vgs_is_empty(self, device_info, pvolumes, pvolumes_empty, monkeypatch): BarPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000", pv_tags={}) pvolumes.append(BarPVolume) - monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + monkeypatch.setattr(api, 'PVolumes', lambda populate=True: pvolumes if populate else pvolumes_empty) lsblk = {"TYPE": "disk"} device_info(lsblk=lsblk) disk = device.Device("/dev/nvme0n1") assert disk.vgs == [] - def test_vgs_is_not_empty(self, device_info, pvolumes, monkeypatch): + def test_vgs_is_not_empty(self, device_info, pvolumes, pvolumes_empty, monkeypatch): BarPVolume = api.PVolume(vg_name='foo', lv_uuid='111', pv_name='/dev/nvme0n1', pv_uuid="0000", pv_tags={}) pvolumes.append(BarPVolume) - monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + monkeypatch.setattr(api, 'PVolumes', lambda populate=True: pvolumes if populate else pvolumes_empty) lsblk = {"TYPE": "disk"} device_info(lsblk=lsblk) disk = device.Device("/dev/nvme0n1") @@ -179,10 +179,10 @@ class TestDevice(object): disk = device.Device("/dev/sda") assert disk.is_ceph_disk_member is False - def test_pv_api(self, device_info, pvolumes, monkeypatch): + def test_pv_api(self, device_info, pvolumes, pvolumes_empty, monkeypatch): FooPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000", lv_uuid="0000", pv_tags={}, vg_name="vg") pvolumes.append(FooPVolume) - monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + monkeypatch.setattr(api, 'PVolumes', lambda populate=True: pvolumes if populate else pvolumes_empty) data = {"/dev/sda": {"foo": "bar"}} lsblk = {"TYPE": "part"} device_info(devices=data, lsblk=lsblk) @@ -190,10 +190,10 @@ class TestDevice(object): assert disk.pvs_api @pytest.mark.parametrize("ceph_type", ["data", "block"]) - def test_used_by_ceph(self, device_info, pvolumes, monkeypatch, ceph_type): + def test_used_by_ceph(self, device_info, pvolumes, pvolumes_empty, monkeypatch, ceph_type): FooPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000", lv_uuid="0000", pv_tags={}, vg_name="vg") pvolumes.append(FooPVolume) - monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + monkeypatch.setattr(api, 'PVolumes', lambda populate=True: pvolumes if populate else pvolumes_empty) data = {"/dev/sda": {"foo": "bar"}} lsblk = {"TYPE": "part"} lv_data = {"lv_path": "vg/lv", "vg_name": "vg", "lv_uuid": "0000", "tags": {"ceph.osd_id": 0, "ceph.type": ceph_type}} @@ -201,10 +201,10 @@ class TestDevice(object): disk = device.Device("/dev/sda") assert disk.used_by_ceph - def test_not_used_by_ceph(self, device_info, pvolumes, monkeypatch): + def test_not_used_by_ceph(self, device_info, pvolumes, pvolumes_empty, monkeypatch): FooPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000", lv_uuid="0000", pv_tags={}, vg_name="vg") pvolumes.append(FooPVolume) - monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes) + monkeypatch.setattr(api, 'PVolumes', lambda populate=True: pvolumes if populate else pvolumes_empty) data = {"/dev/sda": {"foo": "bar"}} lsblk = {"TYPE": "part"} lv_data = {"lv_path": "vg/lv", "vg_name": "vg", "lv_uuid": "0000", "tags": {"ceph.osd_id": 0, "ceph.type": "journal"}} @@ -248,7 +248,7 @@ class TestDeviceEncryption(object): disk = device.Device("/dev/sda") assert disk.is_encrypted is True - def test_mapper_is_encrypted_luks1(self, device_info, pvolumes, monkeypatch): + def test_mapper_is_encrypted_luks1(self, device_info, pvolumes, pvolumes_empty, monkeypatch): status = {'type': 'LUKS1'} monkeypatch.setattr(device, 'encryption_status', lambda x: status) lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} @@ -257,7 +257,7 @@ class TestDeviceEncryption(object): disk = device.Device("/dev/mapper/uuid") assert disk.is_encrypted is True - def test_mapper_is_encrypted_luks2(self, device_info, pvolumes, monkeypatch): + def test_mapper_is_encrypted_luks2(self, device_info, pvolumes, pvolumes_empty, monkeypatch): status = {'type': 'LUKS2'} monkeypatch.setattr(device, 'encryption_status', lambda x: status) lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} @@ -266,7 +266,7 @@ class TestDeviceEncryption(object): disk = device.Device("/dev/mapper/uuid") assert disk.is_encrypted is True - def test_mapper_is_encrypted_plain(self, device_info, pvolumes, monkeypatch): + def test_mapper_is_encrypted_plain(self, device_info, pvolumes, pvolumes_empty, monkeypatch): status = {'type': 'PLAIN'} monkeypatch.setattr(device, 'encryption_status', lambda x: status) lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} @@ -275,7 +275,7 @@ class TestDeviceEncryption(object): disk = device.Device("/dev/mapper/uuid") assert disk.is_encrypted is True - def test_mapper_is_not_encrypted_plain(self, device_info, pvolumes, monkeypatch): + def test_mapper_is_not_encrypted_plain(self, device_info, pvolumes, pvolumes_empty, monkeypatch): monkeypatch.setattr(device, 'encryption_status', lambda x: {}) lsblk = {'FSTYPE': 'xfs', 'TYPE': 'lvm'} blkid = {'TYPE': 'mapper'} diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py index d0919c998..4073cf381 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py @@ -4,6 +4,7 @@ import getpass import pytest from textwrap import dedent from ceph_volume.util import system +from mock.mock import patch class TestMkdirP(object): @@ -216,3 +217,64 @@ class TestWhich(object): cap = capsys.readouterr() assert 'Absolute path not found for executable: exedir' in cap.err assert 'Ensure $PATH environment variable contains common executable locations' in cap.err + + +@pytest.fixture +def stub_which(monkeypatch): + def apply(value='/bin/restorecon'): + monkeypatch.setattr(system, 'which', lambda x: value) + return apply + + +# python2 has no FileNotFoundError +try: + FileNotFoundError +except NameError: + FileNotFoundError = OSError + + +class TestSetContext(object): + + def setup(self): + try: + os.environ.pop('CEPH_VOLUME_SKIP_RESTORECON') + except KeyError: + pass + + @pytest.mark.parametrize('value', ['1', 'True', 'true', 'TRUE', 'yes']) + def test_set_context_skips(self, stub_call, fake_run, value): + stub_call(('', '', 0)) + os.environ['CEPH_VOLUME_SKIP_RESTORECON'] = value + system.set_context('/tmp/foo') + assert fake_run.calls == [] + + @pytest.mark.parametrize('value', ['0', 'False', 'false', 'FALSE', 'no']) + def test_set_context_doesnt_skip_with_env(self, stub_call, stub_which, fake_run, value): + stub_call(('', '', 0)) + stub_which() + os.environ['CEPH_VOLUME_SKIP_RESTORECON'] = value + system.set_context('/tmp/foo') + assert len(fake_run.calls) + + def test_set_context_skips_on_executable(self, stub_call, stub_which, fake_run): + stub_call(('', '', 0)) + stub_which('restorecon') + system.set_context('/tmp/foo') + assert fake_run.calls == [] + + def test_set_context_no_skip_on_executable(self, stub_call, stub_which, fake_run): + stub_call(('', '', 0)) + stub_which('/bin/restorecon') + system.set_context('/tmp/foo') + assert len(fake_run.calls) + + @patch('ceph_volume.process.call') + def test_selinuxenabled_doesnt_exist(self, mocked_call, fake_run): + mocked_call.side_effect = FileNotFoundError() + system.set_context('/tmp/foo') + assert fake_run.calls == [] + + def test_selinuxenabled_is_not_enabled(self, stub_call, fake_run): + stub_call(('', '', 1)) + system.set_context('/tmp/foo') + assert fake_run.calls == [] diff --git a/ceph/src/ceph-volume/ceph_volume/util/device.py b/ceph/src/ceph-volume/ceph_volume/util/device.py index ddec0e848..ea478f387 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/device.py +++ b/ceph/src/ceph-volume/ceph_volume/util/device.py @@ -62,6 +62,7 @@ class Device(object): 'available', 'path', 'sys_api', + 'device_id', ] pretty_report_sys_fields = [ 'human_readable_size', @@ -232,8 +233,7 @@ class Device(object): for path in self._get_pv_paths(): # check if there was a pv created with the # name of device - pvs = lvm.PVolumes() - pvs.filter(pv_name=path) + pvs = lvm.PVolumes().filter(pv_name=path) has_vgs = [pv.vg_name for pv in pvs if pv.vg_name] if has_vgs: self.vgs = list(set(has_vgs)) diff --git a/ceph/src/ceph-volume/ceph_volume/util/encryption.py b/ceph/src/ceph-volume/ceph_volume/util/encryption.py index 4beac7c64..72a0ccf12 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/encryption.py +++ b/ceph/src/ceph-volume/ceph_volume/util/encryption.py @@ -103,7 +103,8 @@ def dmcrypt_close(mapping): logger.debug('device mapper path does not exist %s' % mapping) logger.debug('will skip cryptsetup removal') return - process.run(['cryptsetup', 'remove', mapping]) + # don't be strict about the remove call, but still warn on the terminal if it fails + process.run(['cryptsetup', 'remove', mapping], stop_on_error=False) def get_dmcrypt_key(osd_id, osd_fsid, lockbox_keyring=None): diff --git a/ceph/src/ceph-volume/ceph_volume/util/system.py b/ceph/src/ceph-volume/ceph_volume/util/system.py index 98f6fc42d..5aaca59af 100644 --- a/ceph/src/ceph-volume/ceph_volume/util/system.py +++ b/ceph/src/ceph-volume/ceph_volume/util/system.py @@ -8,6 +8,12 @@ import uuid from ceph_volume import process, terminal from . import as_string +# python2 has no FileNotFoundError +try: + FileNotFoundError +except NameError: + FileNotFoundError = OSError + logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) @@ -275,7 +281,39 @@ def get_mounts(devices=False, paths=False, realpath=False): return paths_mounted -def set_context(path, recursive = False): +def set_context(path, recursive=False): + """ + Calls ``restorecon`` to set the proper context on SELinux systems. Only if + the ``restorecon`` executable is found anywhere in the path it will get + called. + + If the ``CEPH_VOLUME_SKIP_RESTORECON`` environment variable is set to + any of: "1", "true", "yes" the call will be skipped as well. + + Finally, if SELinux is not enabled, or not available in the system, + ``restorecon`` will not be called. This is checked by calling out to the + ``selinuxenabled`` executable. If that tool is not installed or returns + a non-zero exit status then no further action is taken and this function + will return. + """ + skip = os.environ.get('CEPH_VOLUME_SKIP_RESTORECON', '') + if skip.lower() in ['1', 'true', 'yes']: + logger.info( + 'CEPH_VOLUME_SKIP_RESTORECON environ is set, will not call restorecon' + ) + return + + try: + stdout, stderr, code = process.call(['selinuxenabled'], + verbose_on_failure=False) + except FileNotFoundError: + logger.info('No SELinux found, skipping call to restorecon') + return + + if code != 0: + logger.info('SELinux is not enabled, will not call restorecon') + return + # restore selinux context to default policy values if which('restorecon').startswith('/'): if recursive: diff --git a/ceph/src/ceph-volume/shell_tox.ini b/ceph/src/ceph-volume/shell_tox.ini new file mode 100644 index 000000000..5cd4606e4 --- /dev/null +++ b/ceph/src/ceph-volume/shell_tox.ini @@ -0,0 +1,11 @@ +[tox] +envlist = py27, py35, py36 +skip_missing_interpreters = true + +[testenv] +passenv=* +whitelist_externals= + bash + grep + mktemp +commands=bash {posargs:ceph_volume/tests/functional/scripts/test_unicode.sh} {posargs:ceph_volume/tests/functional/scripts/output.py} diff --git a/ceph/src/ceph-volume/tox.ini b/ceph/src/ceph-volume/tox.ini index 71eb8c597..dc66681d2 100644 --- a/ceph/src/ceph-volume/tox.ini +++ b/ceph/src/ceph-volume/tox.ini @@ -5,6 +5,7 @@ skip_missing_interpreters = true [testenv] deps= pytest + mock commands=py.test -v {posargs:ceph_volume/tests} --ignore=ceph_volume/tests/functional [testenv:flake8] diff --git a/ceph/src/ceph.in b/ceph/src/ceph.in index f060023f5..785296168 100755 --- a/ceph/src/ceph.in +++ b/ceph/src/ceph.in @@ -96,24 +96,27 @@ def get_pythonlib_dir(): return "lib.{version[0]}".format(version=sys.version_info) -def get_cmake_variables(names): +def get_cmake_variables(*names): vars = dict((name, None) for name in names) for line in open(os.path.join(MYPDIR, "CMakeCache.txt")): # parse lines like "WITH_ASAN:BOOL=ON" for name in names: if line.startswith("{}:".format(name)): - vars[name] = line.split("=")[1].strip() + type_value = line.split(":")[1].strip() + t, v = type_value.split("=") + if t == 'BOOL': + v = v.upper() in ('TRUE', '1', 'Y', 'YES', 'ON') + vars[name] = v break if all(vars.values()): break - return vars + return [vars[name] for name in names] if os.path.exists(os.path.join(MYPDIR, "CMakeCache.txt")) \ and os.path.exists(os.path.join(MYPDIR, "bin/init-ceph")): - vars = get_cmake_variables(["ceph_SOURCE_DIR", "ASAN_LIBRARY"]) - src_path = vars["ceph_SOURCE_DIR"] - asan_lib_path = vars["ASAN_LIBRARY"] + src_path, with_asan, asan_lib_path = \ + get_cmake_variables("ceph_SOURCE_DIR", "WITH_ASAN", "ASAN_LIBRARY") if src_path is None: # Huh, maybe we're not really in a cmake environment? pass @@ -125,8 +128,8 @@ if os.path.exists(os.path.join(MYPDIR, "CMakeCache.txt")) \ pythonlib_path = os.path.join(lib_path, "cython_modules", get_pythonlib_dir()) - - respawn_in_path(lib_path, pybind_path, pythonlib_path, asan_lib_path) + respawn_in_path(lib_path, pybind_path, pythonlib_path, + asan_lib_path if with_asan else None) if 'PATH' in os.environ and bin_path not in os.environ['PATH']: os.environ['PATH'] = os.pathsep.join([bin_path, os.environ['PATH']]) diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc index e821de07f..08bfb2f3d 100644 --- a/ceph/src/client/Client.cc +++ b/ceph/src/client/Client.cc @@ -2942,8 +2942,21 @@ void Client::kick_requests_closed(MetaSession *session) if (req->got_unsafe) { lderr(cct) << __func__ << " removing unsafe request " << req->get_tid() << dendl; req->unsafe_item.remove_myself(); - req->unsafe_dir_item.remove_myself(); - req->unsafe_target_item.remove_myself(); + if (is_dir_operation(req)) { + Inode *dir = req->inode(); + assert(dir); + dir->set_async_err(-EIO); + lderr(cct) << "kick_requests_closed drop req of inode(dir) : " + << dir->ino << " " << req->get_tid() << dendl; + req->unsafe_dir_item.remove_myself(); + } + if (req->target) { + InodeRef &in = req->target; + in->set_async_err(-EIO); + lderr(cct) << "kick_requests_closed drop req of inode : " + << in->ino << " " << req->get_tid() << dendl; + req->unsafe_target_item.remove_myself(); + } signal_cond_list(req->waitfor_safe); unregister_request(req); } @@ -3309,7 +3322,7 @@ void Client::cap_delay_requeue(Inode *in) } void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, - bool sync, int used, int want, int retain, + int flags, int used, int want, int retain, int flush, ceph_tid_t flush_tid) { int held = cap->issued | cap->implemented; @@ -3320,7 +3333,6 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, ldout(cct, 10) << __func__ << " " << *in << " mds." << session->mds_num << " seq " << cap->seq - << (sync ? " sync " : " async ") << " used " << ccap_string(used) << " want " << ccap_string(want) << " flush " << ccap_string(flush) @@ -3395,11 +3407,13 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap, m->btime = in->btime; m->time_warp_seq = in->time_warp_seq; m->change_attr = in->change_attr; - if (sync) - m->flags |= MClientCaps::FLAG_SYNC; - if (!in->cap_snaps.empty()) - m->flags |= MClientCaps::FLAG_PENDING_CAPSNAP; - + + if (!(flags & MClientCaps::FLAG_PENDING_CAPSNAP) && + !in->cap_snaps.empty() && + in->cap_snaps.rbegin()->second.flush_tid == 0) + flags |= MClientCaps::FLAG_PENDING_CAPSNAP; + m->flags = flags; + if (flush & CEPH_CAP_FILE_WR) { m->inline_version = in->inline_version; m->inline_data = in->inline_data; @@ -3527,8 +3541,6 @@ void Client::check_caps(Inode *in, unsigned flags) used &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO); } - if (!in->cap_snaps.empty()) - flush_snaps(in); for (auto &p : in->caps) { mds_rank_t mds = p.first; @@ -3585,17 +3597,15 @@ void Client::check_caps(Inode *in, unsigned flags) } ack: - // re-send old cap/snapcap flushes first. - if (session->mds_state >= MDSMap::STATE_RECONNECT && - session->mds_state < MDSMap::STATE_ACTIVE && - session->early_flushing_caps.count(in) == 0) { - ldout(cct, 20) << " reflushing caps (check_caps) on " << *in - << " to mds." << session->mds_num << dendl; - session->early_flushing_caps.insert(in); - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session, flags & CHECK_CAPS_SYNCHRONOUS); + if (&cap == in->auth_cap) { + if (in->flags & I_KICK_FLUSH) { + ldout(cct, 20) << " reflushing caps (check_caps) on " << *in + << " to mds." << mds << dendl; + kick_flushing_caps(in, session); + } + if (!in->cap_snaps.empty() && + in->cap_snaps.rbegin()->second.flush_tid == 0) + flush_snaps(in); } int flushing; @@ -3607,8 +3617,9 @@ void Client::check_caps(Inode *in, unsigned flags) flush_tid = 0; } - send_cap(in, session, &cap, flags & CHECK_CAPS_SYNCHRONOUS, cap_used, wanted, - retain, flushing, flush_tid); + int msg_flags = (flags & CHECK_CAPS_SYNCHRONOUS) ? MClientCaps::FLAG_SYNC : 0; + send_cap(in, session, &cap, msg_flags, cap_used, wanted, retain, + flushing, flush_tid); } } @@ -3693,23 +3704,63 @@ void Client::_flushed_cap_snap(Inode *in, snapid_t seq) flush_snaps(in); } -void Client::flush_snaps(Inode *in, bool all_again) +void Client::send_flush_snap(Inode *in, MetaSession *session, + snapid_t follows, CapSnap& capsnap) { - ldout(cct, 10) << "flush_snaps on " << *in << " all_again " << all_again << dendl; + auto m = MClientCaps::create(CEPH_CAP_OP_FLUSHSNAP, + in->ino, in->snaprealm->ino, 0, + in->auth_cap->mseq, cap_epoch_barrier); + m->caller_uid = capsnap.cap_dirtier_uid; + m->caller_gid = capsnap.cap_dirtier_gid; + + m->set_client_tid(capsnap.flush_tid); + m->head.snap_follows = follows; + + m->head.caps = capsnap.issued; + m->head.dirty = capsnap.dirty; + + m->head.uid = capsnap.uid; + m->head.gid = capsnap.gid; + m->head.mode = capsnap.mode; + m->btime = capsnap.btime; + + m->size = capsnap.size; + + m->head.xattr_version = capsnap.xattr_version; + encode(capsnap.xattrs, m->xattrbl); + + m->ctime = capsnap.ctime; + m->btime = capsnap.btime; + m->mtime = capsnap.mtime; + m->atime = capsnap.atime; + m->time_warp_seq = capsnap.time_warp_seq; + m->change_attr = capsnap.change_attr; + + if (capsnap.dirty & CEPH_CAP_FILE_WR) { + m->inline_version = in->inline_version; + m->inline_data = in->inline_data; + } + + ceph_assert(!session->flushing_caps_tids.empty()); + m->set_oldest_flush_tid(*session->flushing_caps_tids.begin()); + + session->con->send_message2(std::move(m)); +} + +void Client::flush_snaps(Inode *in) +{ + ldout(cct, 10) << "flush_snaps on " << *in << dendl; ceph_assert(in->cap_snaps.size()); // pick auth mds ceph_assert(in->auth_cap); MetaSession *session = in->auth_cap->session; - int mseq = in->auth_cap->mseq; for (auto &p : in->cap_snaps) { CapSnap &capsnap = p.second; - if (!all_again) { - // only flush once per session - if (capsnap.flush_tid > 0) - continue; - } + // only do new flush + if (capsnap.flush_tid > 0) + continue; ldout(cct, 10) << "flush_snaps mds." << session->mds_num << " follows " << p.first @@ -3719,57 +3770,18 @@ void Client::flush_snaps(Inode *in, bool all_again) << " writing=" << capsnap.writing << " on " << *in << dendl; if (capsnap.dirty_data || capsnap.writing) - continue; + break; - if (capsnap.flush_tid == 0) { - capsnap.flush_tid = ++last_flush_tid; - if (!in->flushing_cap_item.is_on_list()) - session->flushing_caps.push_back(&in->flushing_cap_item); - session->flushing_caps_tids.insert(capsnap.flush_tid); - } - - auto m = MClientCaps::create(CEPH_CAP_OP_FLUSHSNAP, in->ino, in->snaprealm->ino, 0, mseq, - cap_epoch_barrier); - m->caller_uid = capsnap.cap_dirtier_uid; - m->caller_gid = capsnap.cap_dirtier_gid; - - m->set_client_tid(capsnap.flush_tid); - m->head.snap_follows = p.first; - - m->head.caps = capsnap.issued; - m->head.dirty = capsnap.dirty; - - m->head.uid = capsnap.uid; - m->head.gid = capsnap.gid; - m->head.mode = capsnap.mode; - m->btime = capsnap.btime; + capsnap.flush_tid = ++last_flush_tid; + session->flushing_caps_tids.insert(capsnap.flush_tid); + in->flushing_cap_tids[capsnap.flush_tid] = 0; + if (!in->flushing_cap_item.is_on_list()) + session->flushing_caps.push_back(&in->flushing_cap_item); - m->size = capsnap.size; - - m->head.xattr_version = capsnap.xattr_version; - encode(capsnap.xattrs, m->xattrbl); - - m->ctime = capsnap.ctime; - m->btime = capsnap.btime; - m->mtime = capsnap.mtime; - m->atime = capsnap.atime; - m->time_warp_seq = capsnap.time_warp_seq; - m->change_attr = capsnap.change_attr; - - if (capsnap.dirty & CEPH_CAP_FILE_WR) { - m->inline_version = in->inline_version; - m->inline_data = in->inline_data; - } - - ceph_assert(!session->flushing_caps_tids.empty()); - m->set_oldest_flush_tid(*session->flushing_caps_tids.begin()); - - session->con->send_message2(std::move(m)); + send_flush_snap(in, session, p.first, capsnap); } } - - void Client::wait_on_list(list& ls) { Cond cond; @@ -4119,9 +4131,8 @@ void Client::remove_session_caps(MetaSession *s) while (s->caps.size()) { Cap *cap = *s->caps.begin(); InodeRef in(&cap->inode); - bool dirty_caps = false, cap_snaps = false; + bool dirty_caps = false; if (in->auth_cap == cap) { - cap_snaps = !in->cap_snaps.empty(); dirty_caps = in->dirty_caps | in->flushing_caps; in->wanted_max_size = 0; in->requested_max_size = 0; @@ -4129,9 +4140,7 @@ void Client::remove_session_caps(MetaSession *s) if (cap->wanted | cap->issued) in->flags |= I_CAP_DROPPED; remove_cap(cap, false); - if (cap_snaps) { - in->cap_snaps.clear(); - } + in->cap_snaps.clear(); if (dirty_caps) { lderr(cct) << __func__ << " still has dirty|flushing caps on " << *in << dendl; if (in->flushing_caps) { @@ -4393,28 +4402,6 @@ void Client::flush_caps_sync() } } -void Client::flush_caps(Inode *in, MetaSession *session, bool sync) -{ - ldout(cct, 10) << __func__ << " " << in << " mds." << session->mds_num << dendl; - Cap *cap = in->auth_cap; - ceph_assert(cap->session == session); - - for (map::iterator p = in->flushing_cap_tids.begin(); - p != in->flushing_cap_tids.end(); - ++p) { - bool req_sync = false; - - /* If this is a synchronous request, then flush the journal on last one */ - if (sync && (p->first == in->flushing_cap_tids.rbegin()->first)) - req_sync = true; - - send_cap(in, session, cap, req_sync, - (get_caps_used(in) | in->caps_dirty()), - in->caps_wanted(), (cap->issued | cap->implemented), - p->second, p->first); - } -} - void Client::wait_sync_caps(Inode *in, ceph_tid_t want) { while (in->flushing_caps) { @@ -4448,6 +4435,40 @@ void Client::wait_sync_caps(ceph_tid_t want) } } +void Client::kick_flushing_caps(Inode *in, MetaSession *session) +{ + in->flags &= ~I_KICK_FLUSH; + + Cap *cap = in->auth_cap; + ceph_assert(cap->session == session); + + ceph_tid_t last_snap_flush = 0; + for (auto p = in->flushing_cap_tids.rbegin(); + p != in->flushing_cap_tids.rend(); + ++p) { + if (!p->second) { + last_snap_flush = p->first; + break; + } + } + + int wanted = in->caps_wanted(); + int used = get_caps_used(in) | in->caps_dirty(); + auto it = in->cap_snaps.begin(); + for (auto& p : in->flushing_cap_tids) { + if (p.second) { + int msg_flags = p.first < last_snap_flush ? MClientCaps::FLAG_PENDING_CAPSNAP : 0; + send_cap(in, session, cap, msg_flags, used, wanted, (cap->issued | cap->implemented), + p.second, p.first); + } else { + ceph_assert(it != in->cap_snaps.end()); + ceph_assert(it->second.flush_tid == p.first); + send_flush_snap(in, session, it->first, it->second); + ++it; + } + } +} + void Client::kick_flushing_caps(MetaSession *session) { mds_rank_t mds = session->mds_num; @@ -4455,22 +4476,15 @@ void Client::kick_flushing_caps(MetaSession *session) for (xlist::iterator p = session->flushing_caps.begin(); !p.end(); ++p) { Inode *in = *p; - if (session->early_flushing_caps.count(in)) - continue; - ldout(cct, 20) << " reflushing caps on " << *in << " to mds." << mds << dendl; - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session); + if (in->flags & I_KICK_FLUSH) { + ldout(cct, 20) << " reflushing caps on " << *in << " to mds." << mds << dendl; + kick_flushing_caps(in, session); + } } - - session->early_flushing_caps.clear(); } void Client::early_kick_flushing_caps(MetaSession *session) { - session->early_flushing_caps.clear(); - for (xlist::iterator p = session->flushing_caps.begin(); !p.end(); ++p) { Inode *in = *p; Cap *cap = in->auth_cap; @@ -4479,14 +4493,13 @@ void Client::early_kick_flushing_caps(MetaSession *session) // if flushing caps were revoked, we re-send the cap flush in client reconnect // stage. This guarantees that MDS processes the cap flush message before issuing // the flushing caps to other client. - if ((in->flushing_caps & in->auth_cap->issued) == in->flushing_caps) + if ((in->flushing_caps & in->auth_cap->issued) == in->flushing_caps) { + in->flags |= I_KICK_FLUSH; continue; + } ldout(cct, 20) << " reflushing caps (early_kick) on " << *in << " to mds." << session->mds_num << dendl; - - session->early_flushing_caps.insert(in); - // send_reconnect() also will reset these sequence numbers. make sure // sequence numbers in cap flush message match later reconnect message. cap->seq = 0; @@ -4494,11 +4507,7 @@ void Client::early_kick_flushing_caps(MetaSession *session) cap->mseq = 0; cap->issued = cap->implemented; - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session); - + kick_flushing_caps(in, session); } } @@ -4894,12 +4903,9 @@ void Client::handle_cap_import(MetaSession *session, Inode *in, const MConstRef< if (realm) put_snap_realm(realm); - if (in->auth_cap && in->auth_cap->session->mds_num == mds) { + if (in->auth_cap && in->auth_cap->session == session) { // reflush any/all caps (if we are now the auth_cap) - if (in->cap_snaps.size()) - flush_snaps(in, true); - if (in->flushing_caps) - flush_caps(in, session); + kick_flushing_caps(in, session); } } @@ -4978,6 +4984,11 @@ void Client::handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, con << " expected is " << it->first << dendl; } for (; it != in->flushing_cap_tids.end(); ) { + if (!it->second) { + // cap snap + ++it; + continue; + } if (it->first == flush_ack_tid) cleaned = it->second; if (it->first <= flush_ack_tid) { @@ -5018,7 +5029,7 @@ void Client::handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, con if (in->flushing_caps == 0) { ldout(cct, 10) << " " << *in << " !flushing" << dendl; num_flushing_caps--; - if (in->cap_snaps.empty()) + if (in->flushing_cap_tids.empty()) in->flushing_cap_item.remove_myself(); } if (!in->caps_dirty()) @@ -5030,24 +5041,29 @@ void Client::handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, con void Client::handle_cap_flushsnap_ack(MetaSession *session, Inode *in, const MConstRef& m) { + ceph_tid_t flush_ack_tid = m->get_client_tid(); mds_rank_t mds = session->mds_num; ceph_assert(in->caps.count(mds)); snapid_t follows = m->get_snap_follows(); if (auto it = in->cap_snaps.find(follows); it != in->cap_snaps.end()) { auto& capsnap = it->second; - if (m->get_client_tid() != capsnap.flush_tid) { - ldout(cct, 10) << " tid " << m->get_client_tid() << " != " << capsnap.flush_tid << dendl; + if (flush_ack_tid != capsnap.flush_tid) { + ldout(cct, 10) << " tid " << flush_ack_tid << " != " << capsnap.flush_tid << dendl; } else { + InodeRef tmp_ref(in); ldout(cct, 5) << __func__ << " mds." << mds << " flushed snap follows " << follows << " on " << *in << dendl; - InodeRef tmp_ref; - if (in->get_num_ref() == 1) - tmp_ref = in; // make sure inode not get freed while erasing item from in->cap_snaps - if (in->flushing_caps == 0 && in->cap_snaps.empty()) - in->flushing_cap_item.remove_myself(); session->flushing_caps_tids.erase(capsnap.flush_tid); + in->flushing_cap_tids.erase(capsnap.flush_tid); + if (in->flushing_caps == 0 && in->flushing_cap_tids.empty()) + in->flushing_cap_item.remove_myself(); in->cap_snaps.erase(it); + + signal_cond_list(in->waitfor_caps); + if (session->flushing_caps_tids.empty() || + *session->flushing_caps_tids.begin() > flush_ack_tid) + sync_cond.Signal(); } } else { ldout(cct, 5) << __func__ << " DUP(?) mds." << mds << " flushed snap follows " << follows @@ -8000,12 +8016,12 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, fill_statx(dn->inode, caps, &stx); uint64_t next_off = dn->offset + 1; + fill_dirent(&de, dn->name.c_str(), stx.stx_mode, stx.stx_ino, next_off); ++pd; if (pd == dir->readdir_cache.end()) next_off = dir_result_t::END; Inode *in = NULL; - fill_dirent(&de, dn->name.c_str(), stx.stx_mode, stx.stx_ino, next_off); if (getref) { in = dn->inode.get(); _ll_get(in); diff --git a/ceph/src/client/Client.h b/ceph/src/client/Client.h index 755088685..742cb6812 100644 --- a/ceph/src/client/Client.h +++ b/ceph/src/client/Client.h @@ -651,7 +651,7 @@ public: int mark_caps_flushing(Inode *in, ceph_tid_t *ptid); void adjust_session_flushing_caps(Inode *in, MetaSession *old_s, MetaSession *new_s); void flush_caps_sync(); - void flush_caps(Inode *in, MetaSession *session, bool sync=false); + void kick_flushing_caps(Inode *in, MetaSession *session); void kick_flushing_caps(MetaSession *session); void early_kick_flushing_caps(MetaSession *session); int get_caps(Inode *in, int need, int want, int *have, loff_t endoff); @@ -670,13 +670,16 @@ public: void handle_cap_flushsnap_ack(MetaSession *session, Inode *in, const MConstRef& m); void handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, const MConstRef& m); void cap_delay_requeue(Inode *in); - void send_cap(Inode *in, MetaSession *session, Cap *cap, bool sync, + + void send_cap(Inode *in, MetaSession *session, Cap *cap, int flags, int used, int want, int retain, int flush, ceph_tid_t flush_tid); + void send_flush_snap(Inode *in, MetaSession *session, snapid_t follows, CapSnap& capsnap); + + void flush_snaps(Inode *in); void get_cap_ref(Inode *in, int cap); void put_cap_ref(Inode *in, int cap); - void flush_snaps(Inode *in, bool all_again=false); void wait_sync_caps(Inode *in, ceph_tid_t want); void wait_sync_caps(ceph_tid_t want); void queue_cap_snap(Inode *in, SnapContext &old_snapc); diff --git a/ceph/src/client/Inode.h b/ceph/src/client/Inode.h index e101596c7..0e6586cb6 100644 --- a/ceph/src/client/Inode.h +++ b/ceph/src/client/Inode.h @@ -113,6 +113,7 @@ struct CapSnap { #define I_DIR_ORDERED 2 #define I_CAP_DROPPED 4 #define I_SNAPDIR_OPEN 8 +#define I_KICK_FLUSH 16 struct Inode { Client *client; diff --git a/ceph/src/client/MetaSession.h b/ceph/src/client/MetaSession.h index af2d7acd7..ba1f8d80f 100644 --- a/ceph/src/client/MetaSession.h +++ b/ceph/src/client/MetaSession.h @@ -52,7 +52,6 @@ struct MetaSession { xlist requests; xlist unsafe_requests; std::set flushing_caps_tids; - std::set early_flushing_caps; MClientCapRelease::ref release; diff --git a/ceph/src/cls/rbd/cls_rbd.cc b/ceph/src/cls/rbd/cls_rbd.cc index 3a810e2e9..09f2e7901 100644 --- a/ceph/src/cls/rbd/cls_rbd.cc +++ b/ceph/src/cls/rbd/cls_rbd.cc @@ -4590,6 +4590,13 @@ int uuid_get(cls_method_context_t hctx, std::string *mirror_uuid) { return 0; } +void sanitize_entity_inst(entity_inst_t* entity_inst) { + // make all addrs of type ANY because the type isn't what uniquely + // identifies them and clients and on-disk formats can be encoded + // with different backwards compatibility settings. + entity_inst->addr.set_type(entity_addr_t::TYPE_ANY); +} + int list_watchers(cls_method_context_t hctx, std::set *entities) { obj_list_watch_response_t watchers; @@ -4601,7 +4608,10 @@ int list_watchers(cls_method_context_t hctx, entities->clear(); for (auto &w : watchers.entries) { - entities->emplace(w.name, w.addr); + entity_inst_t entity_inst{w.name, w.addr}; + sanitize_entity_inst(&entity_inst); + + entities->insert(entity_inst); } return 0; } @@ -4804,7 +4814,9 @@ struct MirrorImageStatusOnDisk : cls::rbd::MirrorImageStatus { void encode_meta(bufferlist &bl, uint64_t features) const { ENCODE_START(1, 1, bl); - encode(origin, bl, features); + auto sanitized_origin = origin; + sanitize_entity_inst(&sanitized_origin); + encode(sanitized_origin, bl, features); ENCODE_FINISH(bl); } @@ -4816,6 +4828,7 @@ struct MirrorImageStatusOnDisk : cls::rbd::MirrorImageStatus { void decode_meta(bufferlist::const_iterator &it) { DECODE_START(1, it); decode(origin, it); + sanitize_entity_inst(&origin); DECODE_FINISH(it); } diff --git a/ceph/src/cls/rbd/cls_rbd_client.cc b/ceph/src/cls/rbd/cls_rbd_client.cc index ebf2104ad..9b868c55d 100644 --- a/ceph/src/cls/rbd/cls_rbd_client.cc +++ b/ceph/src/cls/rbd/cls_rbd_client.cc @@ -1808,8 +1808,8 @@ int mirror_peer_list(librados::IoCtx *ioctx, int mirror_peer_add(librados::IoCtx *ioctx, const std::string &uuid, const std::string &cluster_name, - const std::string &client_name, int64_t pool_id) { - cls::rbd::MirrorPeer peer(uuid, cluster_name, client_name, pool_id); + const std::string &client_name) { + cls::rbd::MirrorPeer peer(uuid, cluster_name, client_name, -1); bufferlist in_bl; encode(peer, in_bl); diff --git a/ceph/src/cls/rbd/cls_rbd_client.h b/ceph/src/cls/rbd/cls_rbd_client.h index e03f0f410..27a64cc23 100644 --- a/ceph/src/cls/rbd/cls_rbd_client.h +++ b/ceph/src/cls/rbd/cls_rbd_client.h @@ -385,8 +385,7 @@ int mirror_peer_list(librados::IoCtx *ioctx, std::vector *peers); int mirror_peer_add(librados::IoCtx *ioctx, const std::string &uuid, const std::string &cluster_name, - const std::string &client_name, - int64_t pool_id = -1); + const std::string &client_name); int mirror_peer_remove(librados::IoCtx *ioctx, const std::string &uuid); int mirror_peer_set_client(librados::IoCtx *ioctx, diff --git a/ceph/src/cls/rgw/cls_rgw_types.cc b/ceph/src/cls/rgw/cls_rgw_types.cc index 892ef5617..a94c1134b 100644 --- a/ceph/src/cls/rgw/cls_rgw_types.cc +++ b/ceph/src/cls/rgw/cls_rgw_types.cc @@ -481,7 +481,7 @@ void rgw_bi_log_entry::generate_test_instances(list& ls) ls.push_back(new rgw_bi_log_entry); ls.back()->id = "midf"; ls.back()->object = "obj"; - ls.back()->timestamp = ceph::real_clock::from_ceph_timespec({{2}, {3}}); + ls.back()->timestamp = ceph::real_clock::from_ceph_timespec({init_le32(2), init_le32(3)}); ls.back()->index_ver = 4323; ls.back()->tag = "tagasdfds"; ls.back()->op = CLS_RGW_OP_DEL; @@ -663,7 +663,7 @@ void cls_rgw_reshard_entry::generate_test_instances(list { ls.push_back(new cls_rgw_reshard_entry); ls.push_back(new cls_rgw_reshard_entry); - ls.back()->time = ceph::real_clock::from_ceph_timespec({{2}, {3}}); + ls.back()->time = ceph::real_clock::from_ceph_timespec({init_le32(2), init_le32(3)}); ls.back()->tenant = "tenant"; ls.back()->bucket_name = "bucket1"""; ls.back()->bucket_id = "bucket_id"; diff --git a/ceph/src/cls/rgw/cls_rgw_types.h b/ceph/src/cls/rgw/cls_rgw_types.h index d069e8f51..6098f1be6 100644 --- a/ceph/src/cls/rgw/cls_rgw_types.h +++ b/ceph/src/cls/rgw/cls_rgw_types.h @@ -1093,7 +1093,7 @@ struct cls_rgw_gc_obj_info ls.push_back(new cls_rgw_gc_obj_info); ls.push_back(new cls_rgw_gc_obj_info); ls.back()->tag = "footag"; - ceph_timespec ts{21, 32}; + ceph_timespec ts{init_le32(21), init_le32(32)}; ls.back()->time = ceph::real_clock::from_ceph_timespec(ts); } }; diff --git a/ceph/src/cls/user/cls_user.cc b/ceph/src/cls/user/cls_user.cc index 17e394b67..e80e6e231 100644 --- a/ceph/src/cls/user/cls_user.cc +++ b/ceph/src/cls/user/cls_user.cc @@ -370,11 +370,12 @@ static int cls_user_get_header(cls_method_context_t hctx, bufferlist *in, buffer return 0; } -/// A method to reset the user.buckets header stats in accordance to the values -/// seen in the user.buckets omap keys. This will not be equivalent to --sync-stats -/// which requires comparing the values with actual bucket meta stats supplied -/// by RGW -static int cls_user_reset_stats(cls_method_context_t hctx, bufferlist *in, bufferlist *out /*ignore*/) +/// A method to reset the user.buckets header stats in accordance to +/// the values seen in the user.buckets omap keys. This is not be +/// equivalent to --sync-stats which also re-calculates the stats for +/// each bucket. +static int cls_user_reset_stats(cls_method_context_t hctx, + bufferlist *in, bufferlist *out /*ignore*/) { cls_user_reset_stats_op op; @@ -382,27 +383,33 @@ static int cls_user_reset_stats(cls_method_context_t hctx, bufferlist *in, buffe auto bliter = in->cbegin(); decode(op, bliter); } catch (buffer::error& err) { - CLS_LOG(0, "ERROR: cls_user_reset_op(): failed to decode op"); + CLS_LOG(0, "ERROR: %s failed to decode op", __func__); return -EINVAL; } + cls_user_header header; bool truncated = false; string from_index, prefix; do { map keys; - int rc = cls_cxx_map_get_vals(hctx, from_index, prefix, MAX_ENTRIES, &keys, &truncated); - - if (rc < 0) + int rc = cls_cxx_map_get_vals(hctx, from_index, prefix, MAX_ENTRIES, + &keys, &truncated); + if (rc < 0) { + CLS_LOG(0, "ERROR: %s failed to retrieve omap key-values", __func__); return rc; + } + CLS_LOG(20, "%s: read %lu key-values, truncated=%d", + __func__, keys.size(), truncated); - for (const auto&kv : keys){ + for (const auto& kv : keys) { cls_user_bucket_entry e; try { auto bl = kv.second; auto bliter = bl.cbegin(); decode(e, bliter); } catch (buffer::error& err) { - CLS_LOG(0, "ERROR: failed to decode bucket entry for %s", kv.first.c_str()); + CLS_LOG(0, "ERROR: %s failed to decode bucket entry for %s", + __func__, kv.first.c_str()); return -EIO; } add_header_stats(&header.stats, e); @@ -413,6 +420,7 @@ static int cls_user_reset_stats(cls_method_context_t hctx, bufferlist *in, buffe header.last_stats_update = op.time; encode(header, bl); + CLS_LOG(20, "%s: updating header", __func__); return cls_cxx_map_write_header(hctx, &bl); } diff --git a/ceph/src/common/CMakeLists.txt b/ceph/src/common/CMakeLists.txt index 65ba10b0f..bd9276520 100644 --- a/ceph/src/common/CMakeLists.txt +++ b/ceph/src/common/CMakeLists.txt @@ -4,6 +4,9 @@ add_library(common_buffer_obj OBJECT add_library(common_texttable_obj OBJECT TextTable.cc) +add_library(common_prioritycache_obj OBJECT + PriorityCache.cc) + set(common_srcs AsyncOpTracker.cc BackTrace.cc @@ -20,7 +23,6 @@ set(common_srcs Mutex.cc OutputDataSocket.cc PluginRegistry.cc - PriorityCache.cc Readahead.cc SloppyCRCMap.cc SubProcess.cc @@ -175,7 +177,7 @@ target_link_libraries(crc32 add_library(common_utf8 STATIC utf8.c) -if(WITH_LIBCEPHFS OR WITH_KRBD) +if(HAVE_KEYUTILS) set(parse_secret_srcs secret.c) add_library(parse_secret_objs OBJECT ${parse_secret_srcs}) diff --git a/ceph/src/common/Checksummer.h b/ceph/src/common/Checksummer.h index 2137c1d66..ceb551bcb 100644 --- a/ceph/src/common/Checksummer.h +++ b/ceph/src/common/Checksummer.h @@ -5,6 +5,7 @@ #define CEPH_OS_BLUESTORE_CHECKSUMMER #include "xxHash/xxhash.h" +#include "include/byteorder.h" class Checksummer { public: @@ -69,7 +70,7 @@ public: struct crc32c { typedef uint32_t init_value_t; - typedef __le32 value_t; + typedef ceph_le32 value_t; // we have no execution context/state. typedef int state_t; @@ -78,7 +79,7 @@ public: static void fini(state_t *state) { } - static value_t calc( + static init_value_t calc( state_t state, init_value_t init_value, size_t len, @@ -90,7 +91,7 @@ public: struct crc32c_16 { typedef uint32_t init_value_t; - typedef __le16 value_t; + typedef ceph_le16 value_t; // we have no execution context/state. typedef int state_t; @@ -99,7 +100,7 @@ public: static void fini(state_t *state) { } - static value_t calc( + static init_value_t calc( state_t state, init_value_t init_value, size_t len, @@ -120,7 +121,7 @@ public: static void fini(state_t *state) { } - static value_t calc( + static init_value_t calc( state_t state, init_value_t init_value, size_t len, @@ -132,7 +133,7 @@ public: struct xxhash32 { typedef uint32_t init_value_t; - typedef __le32 value_t; + typedef ceph_le32 value_t; typedef XXH32_state_t *state_t; static void init(state_t *s) { @@ -142,7 +143,7 @@ public: XXH32_freeState(*s); } - static value_t calc( + static init_value_t calc( state_t state, init_value_t init_value, size_t len, @@ -161,7 +162,7 @@ public: struct xxhash64 { typedef uint64_t init_value_t; - typedef __le64 value_t; + typedef ceph_le64 value_t; typedef XXH64_state_t *state_t; static void init(state_t *s) { @@ -171,7 +172,7 @@ public: XXH64_freeState(*s); } - static value_t calc( + static init_value_t calc( state_t state, init_value_t init_value, size_t len, @@ -250,7 +251,7 @@ public: pv += offset / csum_block_size; size_t pos = offset; while (length > 0) { - typename Alg::value_t v = Alg::calc(state, -1, csum_block_size, p); + typename Alg::init_value_t v = Alg::calc(state, -1, csum_block_size, p); if (*pv != v) { if (bad_csum) { *bad_csum = v; diff --git a/ceph/src/common/Formatter.cc b/ceph/src/common/Formatter.cc index f2498c932..786272c16 100644 --- a/ceph/src/common/Formatter.cc +++ b/ceph/src/common/Formatter.cc @@ -25,6 +25,32 @@ // ----------------------- namespace ceph { +std::string +fixed_u_to_string(uint64_t num, int scale) +{ + std::ostringstream t; + + t.fill('0'); + t.width(scale + 1); + t << num; + int len = t.str().size(); + return t.str().substr(0,len - scale) + "." + t.str().substr(len - scale); +} + +std::string +fixed_to_string(int64_t num, int scale) +{ + std::ostringstream t; + bool neg = num < 0; + if (neg) num = -num; + + t.fill('0'); + t.width(scale + 1); + t << num; + int len = t.str().size(); + return (neg ? "-" : "") + t.str().substr(0,len - scale) + "." + t.str().substr(len - scale); +} + /* * FormatterAttrs(const char *attr, ...) * diff --git a/ceph/src/common/Formatter.h b/ceph/src/common/Formatter.h index 1363f1f67..c4cdd5523 100644 --- a/ceph/src/common/Formatter.h +++ b/ceph/src/common/Formatter.h @@ -299,5 +299,7 @@ namespace ceph { std::vector< std::string > m_column_name; }; + std::string fixed_to_string(int64_t num, int scale); + std::string fixed_u_to_string(uint64_t num, int scale); } #endif diff --git a/ceph/src/common/PriorityCache.cc b/ceph/src/common/PriorityCache.cc index cbcf17430..bb4366b6c 100644 --- a/ceph/src/common/PriorityCache.cc +++ b/ceph/src/common/PriorityCache.cc @@ -13,9 +13,17 @@ */ #include "PriorityCache.h" +#include "common/dout.h" +#include "perfglue/heap_profiler.h" +#define dout_context cct +#define dout_subsys ceph_subsys_prioritycache +#undef dout_prefix +#define dout_prefix *_dout << "prioritycache " -namespace PriorityCache { - int64_t get_chunk(uint64_t usage, uint64_t total_bytes) { +namespace PriorityCache +{ + int64_t get_chunk(uint64_t usage, uint64_t total_bytes) + { uint64_t chunk = total_bytes; // Find the nearest power of 2 @@ -50,6 +58,341 @@ namespace PriorityCache { return val; } - PriCache::~PriCache() { + Manager::Manager(CephContext *c, + uint64_t min, + uint64_t max, + uint64_t target, + bool reserve_extra) : + cct(c), + caches{}, + min_mem(min), + max_mem(max), + target_mem(target), + tuned_mem(min), + reserve_extra(reserve_extra) + { + PerfCountersBuilder b(cct, "prioritycache", + MallocStats::M_FIRST, MallocStats::M_LAST); + + b.add_u64(MallocStats::M_TARGET_BYTES, "target_bytes", + "target process memory usage in bytes", "t", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(MallocStats::M_MAPPED_BYTES, "mapped_bytes", + "total bytes mapped by the process", "m", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(MallocStats::M_UNMAPPED_BYTES, "unmapped_bytes", + "unmapped bytes that the kernel has yet to reclaimed", "u", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(MallocStats::M_HEAP_BYTES, "heap_bytes", + "aggregate bytes in use by the heap", "h", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(MallocStats::M_CACHE_BYTES, "cache_bytes", + "current memory available for caches.", "c", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + logger = b.create_perf_counters(); + cct->get_perfcounters_collection()->add(logger); + + tune_memory(); + } + + Manager::~Manager() + { + clear(); + cct->get_perfcounters_collection()->remove(logger); + delete logger; + } + + void Manager::tune_memory() + { + size_t heap_size = 0; + size_t unmapped = 0; + uint64_t mapped = 0; + + ceph_heap_release_free_memory(); + ceph_heap_get_numeric_property("generic.heap_size", &heap_size); + ceph_heap_get_numeric_property("tcmalloc.pageheap_unmapped_bytes", &unmapped); + mapped = heap_size - unmapped; + + uint64_t new_size = tuned_mem; + new_size = (new_size < max_mem) ? new_size : max_mem; + new_size = (new_size > min_mem) ? new_size : min_mem; + + // Approach the min/max slowly, but bounce away quickly. + if ((uint64_t) mapped < target_mem) { + double ratio = 1 - ((double) mapped / target_mem); + new_size += ratio * (max_mem - new_size); + } else { + double ratio = 1 - ((double) target_mem / mapped); + new_size -= ratio * (new_size - min_mem); + } + + ldout(cct, 5) << __func__ + << " target: " << target_mem + << " mapped: " << mapped + << " unmapped: " << unmapped + << " heap: " << heap_size + << " old mem: " << tuned_mem + << " new mem: " << new_size << dendl; + + tuned_mem = new_size; + + logger->set(MallocStats::M_TARGET_BYTES, target_mem); + logger->set(MallocStats::M_MAPPED_BYTES, mapped); + logger->set(MallocStats::M_UNMAPPED_BYTES, unmapped); + logger->set(MallocStats::M_HEAP_BYTES, heap_size); + logger->set(MallocStats::M_CACHE_BYTES, new_size); + } + + void Manager::insert(const std::string& name, std::shared_ptr c, + bool enable_perf_counters) + { + ceph_assert(!caches.count(name)); + ceph_assert(!indexes.count(name)); + + caches.emplace(name, c); + + if (!enable_perf_counters) { + return; + } + + // TODO: If we ever assign more than + // PERF_COUNTER_MAX_BOUND - PERF_COUNTER_LOWER_BOUND perf counters for + // priority caching we could run out of slots. Recycle them some day? + // Also note that start and end are *exclusive*. + int start = cur_index++; + int end = cur_index + Extra::E_LAST + 1; + + ceph_assert(end < PERF_COUNTER_MAX_BOUND); + indexes.emplace(name, std::vector(Extra::E_LAST + 1)); + + PerfCountersBuilder b(cct, "prioritycache:" + name, start, end); + + b.add_u64(cur_index + Priority::PRI0, "pri0_bytes", + "bytes allocated to pri0", "p0", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI1, "pri1_bytes", + "bytes allocated to pri1", "p1", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI2, "pri2_bytes", + "bytes allocated to pri2", "p2", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI3, "pri3_bytes", + "bytes allocated to pri3", "p3", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI4, "pri4_bytes", + "bytes allocated to pri4", "p4", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI5, "pri5_bytes", + "bytes allocated to pri5", "p5", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI6, "pri6_bytes", + "bytes allocated to pri6", "p6", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI7, "pri7_bytes", + "bytes allocated to pri7", "p7", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI8, "pri8_bytes", + "bytes allocated to pri8", "p8", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI9, "pri9_bytes", + "bytes allocated to pri9", "p9", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI10, "pri10_bytes", + "bytes allocated to pri10", "p10", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Priority::PRI11, "pri11_bytes", + "bytes allocated to pri11", "p11", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Extra::E_RESERVED, "reserved_bytes", + "bytes reserved for future growth.", "r", + PerfCountersBuilder::PRIO_INTERESTING, unit_t(UNIT_BYTES)); + + b.add_u64(cur_index + Extra::E_COMMITTED, "committed_bytes", + "total bytes committed,", "c", + PerfCountersBuilder::PRIO_CRITICAL, unit_t(UNIT_BYTES)); + + for (int i = 0; i < Extra::E_LAST+1; i++) { + indexes[name][i] = cur_index + i; + } + + auto l = b.create_perf_counters(); + loggers.emplace(name, l); + cct->get_perfcounters_collection()->add(l); + + cur_index = end; + } + + void Manager::erase(const std::string& name) + { + auto li = loggers.find(name); + if (li != loggers.end()) { + cct->get_perfcounters_collection()->remove(li->second); + delete li->second; + loggers.erase(li); + } + indexes.erase(name); + caches.erase(name); + } + + void Manager::clear() + { + auto li = loggers.begin(); + while (li != loggers.end()) { + cct->get_perfcounters_collection()->remove(li->second); + delete li->second; + li = loggers.erase(li); + } + indexes.clear(); + caches.clear(); + } + + void Manager::balance() + { + int64_t mem_avail = tuned_mem; + // Each cache is going to get a little extra from get_chunk, so shrink the + // available memory here to compensate. + if (reserve_extra) { + mem_avail -= get_chunk(1, tuned_mem) * caches.size(); + } + + if (mem_avail < 0) { + // There's so little memory available that just assigning a chunk per + // cache pushes us over the limit. Set mem_avail to 0 and continue to + // ensure each priority's byte counts are zeroed in balance_priority. + mem_avail = 0; + } + + // Assign memory for each priority level + for (int i = 0; i < Priority::LAST+1; i++) { + ldout(cct, 10) << __func__ << " assigning cache bytes for PRI: " << i << dendl; + + auto pri = static_cast(i); + balance_priority(&mem_avail, pri); + + // Update the per-priority perf counters + for (auto &l : loggers) { + auto it = caches.find(l.first); + ceph_assert(it != caches.end()); + + auto bytes = it->second->get_cache_bytes(pri); + l.second->set(indexes[it->first][pri], bytes); + } + } + // assert if we assigned more memory than is available. + ceph_assert(mem_avail >= 0); + + for (auto &l : loggers) { + auto it = caches.find(l.first); + ceph_assert(it != caches.end()); + + // Commit the new cache size + int64_t committed = it->second->commit_cache_size(tuned_mem); + + // Update the perf counters + int64_t alloc = it->second->get_cache_bytes(); + + l.second->set(indexes[it->first][Extra::E_RESERVED], committed - alloc); + l.second->set(indexes[it->first][Extra::E_COMMITTED], committed); + } + } + + void Manager::balance_priority(int64_t *mem_avail, Priority pri) + { + std::unordered_map> tmp_caches = caches; + double cur_ratios = 0; + double new_ratios = 0; + uint64_t round = 0; + + // First, zero this priority's bytes, sum the initial ratios. + for (auto it = caches.begin(); it != caches.end(); it++) { + it->second->set_cache_bytes(pri, 0); + cur_ratios += it->second->get_cache_ratio(); + } + + // For other priorities, loop until caches are satisified or we run out of + // memory (stop if we can't guarantee a full byte allocation). + while (!tmp_caches.empty() && *mem_avail > static_cast(tmp_caches.size())) { + uint64_t total_assigned = 0; + for (auto it = tmp_caches.begin(); it != tmp_caches.end();) { + int64_t cache_wants = it->second->request_cache_bytes(pri, tuned_mem); + // Usually the ratio should be set to the fraction of the current caches' + // assigned ratio compared to the total ratio of all caches that still + // want memory. There is a special case where the only caches left are + // all assigned 0% ratios but still want memory. In that case, give + // them an equal shot at the remaining memory for this priority. + double ratio = 1.0 / tmp_caches.size(); + if (cur_ratios > 0) { + ratio = it->second->get_cache_ratio() / cur_ratios; + } + int64_t fair_share = static_cast(*mem_avail * ratio); + + ldout(cct, 10) << __func__ << " " << it->first + << " pri: " << (int) pri + << " round: " << round + << " wanted: " << cache_wants + << " ratio: " << it->second->get_cache_ratio() + << " cur_ratios: " << cur_ratios + << " fair_share: " << fair_share + << " mem_avail: " << *mem_avail + << dendl; + + if (cache_wants > fair_share) { + // If we want too much, take what we can get but stick around for more + it->second->add_cache_bytes(pri, fair_share); + total_assigned += fair_share; + new_ratios += it->second->get_cache_ratio(); + ++it; + } else { + // Otherwise assign only what we want + if (cache_wants > 0) { + it->second->add_cache_bytes(pri, cache_wants); + total_assigned += cache_wants; + } + // Either the cache didn't want anything or got what it wanted, so + // remove it from the tmp list. + it = tmp_caches.erase(it); + } + } + // Reset the ratios + *mem_avail -= total_assigned; + cur_ratios = new_ratios; + new_ratios = 0; + ++round; + } + + // If this is the last priority, divide up any remaining memory based + // solely on the ratios. + if (pri == Priority::LAST) { + uint64_t total_assigned = 0; + for (auto it = caches.begin(); it != caches.end(); it++) { + double ratio = it->second->get_cache_ratio(); + int64_t fair_share = static_cast(*mem_avail * ratio); + it->second->set_cache_bytes(Priority::LAST, fair_share); + total_assigned += fair_share; + } + *mem_avail -= total_assigned; + return; + } + } + + PriCache::~PriCache() + { } } diff --git a/ceph/src/common/PriorityCache.h b/ceph/src/common/PriorityCache.h index 8dcb3e03a..6ac607022 100644 --- a/ceph/src/common/PriorityCache.h +++ b/ceph/src/common/PriorityCache.h @@ -17,14 +17,47 @@ #include #include +#include +#include +#include +#include "common/perf_counters.h" +#include "include/ceph_assert.h" namespace PriorityCache { + // Reserve 16384 slots for PriorityCache perf counters + const int PERF_COUNTER_LOWER_BOUND = 1073741824; + const int PERF_COUNTER_MAX_BOUND = 1073758208; + + enum MallocStats { + M_FIRST = PERF_COUNTER_LOWER_BOUND, + M_TARGET_BYTES, + M_MAPPED_BYTES, + M_UNMAPPED_BYTES, + M_HEAP_BYTES, + M_CACHE_BYTES, + M_LAST, + }; + enum Priority { - PRI0, // Reserved for special items - PRI1, // High priority cache items - PRI2, // Medium priority cache items - PRI3, // Low priority cache items - LAST = PRI3, + PRI0, + PRI1, + PRI2, + PRI3, + PRI4, + PRI5, + PRI6, + PRI7, + PRI8, + PRI9, + PRI10, + PRI11, + LAST = PRI11, + }; + + enum Extra { + E_RESERVED = Priority::LAST+1, + E_COMMITTED, + E_LAST = E_COMMITTED, }; int64_t get_chunk(uint64_t usage, uint64_t total_bytes); @@ -68,6 +101,49 @@ namespace PriorityCache { // Get the name of this cache. virtual std::string get_cache_name() const = 0; }; + + class Manager { + CephContext* cct = nullptr; + PerfCounters* logger; + std::unordered_map loggers; + std::unordered_map> indexes; + std::unordered_map> caches; + + // Start perf counter slots after the malloc stats. + int cur_index = MallocStats::M_LAST; + + uint64_t min_mem = 0; + uint64_t max_mem = 0; + uint64_t target_mem = 0; + uint64_t tuned_mem = 0; + bool reserve_extra; + + public: + Manager(CephContext *c, uint64_t min, uint64_t max, uint64_t target, + bool reserve_extra); + ~Manager(); + void set_min_memory(uint64_t min) { + min_mem = min; + } + void set_max_memory(uint64_t max) { + max_mem = max; + } + void set_target_memory(uint64_t target) { + target_mem = target; + } + uint64_t get_tuned_mem() const { + return tuned_mem; + } + void insert(const std::string& name, const std::shared_ptr c, + bool enable_perf_counters); + void erase(const std::string& name); + void clear(); + void tune_memory(); + void balance(); + + private: + void balance_priority(int64_t *mem_avail, Priority pri); + }; } #endif diff --git a/ceph/src/common/Thread.h b/ceph/src/common/Thread.h index bc32755c3..0ab65fca5 100644 --- a/ceph/src/common/Thread.h +++ b/ceph/src/common/Thread.h @@ -16,6 +16,8 @@ #ifndef CEPH_THREAD_H #define CEPH_THREAD_H +#include +#include #include #include @@ -68,13 +70,14 @@ std::string get_thread_name(const std::thread& t); void kill(std::thread& t, int signal); template -std::thread make_named_thread(const std::string& s, +std::thread make_named_thread(std::string_view n, Fun&& fun, Args&& ...args) { - auto t = std::thread(std::forward(fun), - std::forward(args)...); - set_thread_name(t, s); - return t; -} + return std::thread([n = std::string(n)](auto&& fun, auto&& ...args) { + ceph_pthread_setname(pthread_self(), n.data()); + std::invoke(std::forward(fun), + std::forward(args)...); + }, std::forward(fun), std::forward(args)...); +} #endif diff --git a/ceph/src/common/WorkQueue.h b/ceph/src/common/WorkQueue.h index a978a6d69..2ccdd9a10 100644 --- a/ceph/src/common/WorkQueue.h +++ b/ceph/src/common/WorkQueue.h @@ -41,6 +41,7 @@ class CephContext; /// Pool of threads that share work submitted to multiple work queues. class ThreadPool : public md_config_obs_t { +protected: CephContext *cct; std::string name; std::string thread_name; @@ -69,7 +70,7 @@ public: void reset_tp_timeout(); void suspend_tp_timeout(); }; -private: +protected: /// Basic interface to a work queue used by the worker threads. struct WorkQueue_ { @@ -450,7 +451,7 @@ public: std::list m_items; uint32_t m_processing; }; -private: +protected: std::vector work_queues; int next_work_queue = 0; @@ -472,7 +473,7 @@ private: void start_threads(); void join_old_threads(); - void worker(WorkThread *wt); + virtual void worker(WorkThread *wt); public: ThreadPool(CephContext *cct_, std::string nm, std::string tn, int n, const char *option = NULL); diff --git a/ceph/src/common/admin_socket.cc b/ceph/src/common/admin_socket.cc index c7f21f7b0..25e8d8faf 100644 --- a/ceph/src/common/admin_socket.cc +++ b/ceph/src/common/admin_socket.cc @@ -317,6 +317,8 @@ bool AdminSocket::do_accept() c = "foo"; break; } + //wrap command with new protocol + c = "{\"prefix\": \"" + c + "\"}"; break; } } else { @@ -334,8 +336,28 @@ bool AdminSocket::do_accept() } } - bool rval = false; + bool rval; + bufferlist out; + rval = execute_command(c, out); + if (rval) { + uint32_t len = htonl(out.length()); + int ret = safe_write(connection_fd, &len, sizeof(len)); + if (ret < 0) { + lderr(m_cct) << "AdminSocket: error writing response length " + << cpp_strerror(ret) << dendl; + rval = false; + } else { + if (out.write_fd(connection_fd) >= 0) + rval = true; + } + } + retry_sys_call(::close, connection_fd); + return rval; +} + +int AdminSocket::execute_command(const std::string& cmd, ceph::bufferlist& out) +{ cmdmap_t cmdmap; string format; vector cmdvec; @@ -343,14 +365,13 @@ bool AdminSocket::do_accept() cmdvec.push_back(cmd); if (!cmdmap_from_json(cmdvec, &cmdmap, errss)) { ldout(m_cct, 0) << "AdminSocket: " << errss.str() << dendl; - retry_sys_call(::close, connection_fd); return false; } + string match; try { cmd_getval(m_cct, cmdmap, "format", format); - cmd_getval(m_cct, cmdmap, "prefix", c); + cmd_getval(m_cct, cmdmap, "prefix", match); } catch (const bad_cmd_get& e) { - retry_sys_call(::close, connection_fd); return false; } if (format != "json" && format != "json-pretty" && @@ -359,7 +380,6 @@ bool AdminSocket::do_accept() std::unique_lock l(lock); decltype(hooks)::iterator p; - string match = c; while (match.size()) { p = hooks.find(match); if (p != hooks.cend()) @@ -375,53 +395,41 @@ bool AdminSocket::do_accept() } } - bufferlist out; if (p == hooks.cend()) { - lderr(m_cct) << "AdminSocket: request '" << c << "' not defined" << dendl; - } else { - string args; - if (match != c) { - args = c.substr(match.length() + 1); - } - - // Drop lock to avoid cycles in cases where the hook takes - // the same lock that was held during calls to register/unregister, - // and set in_hook to allow unregister to wait for us before - // removing this hook. - in_hook = true; - auto match_hook = p->second.hook; - l.unlock(); - bool success = (validate(match, cmdmap, out) && - match_hook->call(match, cmdmap, format, out)); - l.lock(); - in_hook = false; - in_hook_cond.notify_all(); - - if (!success) { - ldout(m_cct, 0) << "AdminSocket: request '" << match << "' args '" << args - << "' to " << match_hook << " failed" << dendl; - out.append("failed"); - } else { - ldout(m_cct, 5) << "AdminSocket: request '" << match << "' '" << args - << "' to " << match_hook - << " returned " << out.length() << " bytes" << dendl; - } - uint32_t len = htonl(out.length()); - int ret = safe_write(connection_fd, &len, sizeof(len)); - if (ret < 0) { - lderr(m_cct) << "AdminSocket: error writing response length " - << cpp_strerror(ret) << dendl; - } else { - if (out.write_fd(connection_fd) >= 0) - rval = true; - } + lderr(m_cct) << "AdminSocket: request '" << cmd << "' not defined" << dendl; + return false; + } + string args; + if (match != cmd) { + args = cmd.substr(match.length() + 1); } - l.unlock(); - retry_sys_call(::close, connection_fd); - return rval; + // Drop lock to avoid cycles in cases where the hook takes + // the same lock that was held during calls to register/unregister, + // and set in_hook to allow unregister to wait for us before + // removing this hook. + in_hook = true; + auto match_hook = p->second.hook; + l.unlock(); + bool success = (validate(match, cmdmap, out) && + match_hook->call(match, cmdmap, format, out)); + l.lock(); + in_hook = false; + in_hook_cond.notify_all(); + if (!success) { + ldout(m_cct, 0) << "AdminSocket: request '" << match << "' args '" << args + << "' to " << match_hook << " failed" << dendl; + out.append("failed"); + } else { + ldout(m_cct, 5) << "AdminSocket: request '" << match << "' '" << args + << "' to " << match_hook + << " returned " << out.length() << " bytes" << dendl; + } + return true; } + + bool AdminSocket::validate(const std::string& command, const cmdmap_t& cmdmap, bufferlist& out) const diff --git a/ceph/src/common/admin_socket.h b/ceph/src/common/admin_socket.h index 9f2c5ee87..3603fde35 100644 --- a/ceph/src/common/admin_socket.h +++ b/ceph/src/common/admin_socket.h @@ -94,6 +94,7 @@ public: void chown(uid_t uid, gid_t gid); void chmod(mode_t mode); + int execute_command(const std::string& cmd, ceph::bufferlist& out); private: diff --git a/ceph/src/common/ceph_context.cc b/ceph/src/common/ceph_context.cc index 3d70344d2..d45459368 100644 --- a/ceph/src/common/ceph_context.cc +++ b/ceph/src/common/ceph_context.cc @@ -196,6 +196,9 @@ public: { while (1) { std::unique_lock l(_lock); + if (_exit_thread) { + break; + } if (_cct->_conf->heartbeat_interval) { auto interval = ceph::make_timespan(_cct->_conf->heartbeat_interval); diff --git a/ceph/src/common/config_proxy.h b/ceph/src/common/config_proxy.h index 51aeaa842..ce7c35d83 100644 --- a/ceph/src/common/config_proxy.h +++ b/ceph/src/common/config_proxy.h @@ -81,12 +81,17 @@ class ConfigProxy { std::map obs_call_gate; - void call_observers(rev_obs_map_t &rev_obs) { + void call_observers(std::unique_lock& locker, + rev_obs_map_t& rev_obs) { + // observers are notified outside of lock + locker.unlock(); for (auto& [obs, keys] : rev_obs) { obs->handle_conf_change(*this, keys); - // this can be done outside the lock as call_gate_enter() - // and remove_observer() are serialized via lock - call_gate_leave(obs); + } + locker.lock(); + + for (auto& rev_ob : rev_obs) { + call_gate_leave(rev_ob.first); } } @@ -184,16 +189,14 @@ public: } // for those want to reexpand special meta, e.g, $pid void finalize_reexpand_meta() { + std::unique_lock locker(lock); rev_obs_map_t rev_obs; - { - std::lock_guard l(lock); - if (config.finalize_reexpand_meta(values, obs_mgr)) { - _gather_changes(values.changed, &rev_obs, nullptr); - values.changed.clear(); - } + if (config.finalize_reexpand_meta(values, obs_mgr)) { + _gather_changes(values.changed, &rev_obs, nullptr); + values.changed.clear(); } - call_observers(rev_obs); + call_observers(locker, rev_obs); } void add_observer(md_config_obs_t* obs) { std::lock_guard l(lock); @@ -207,16 +210,14 @@ public: obs_mgr.remove_observer(obs); } void call_all_observers() { + std::unique_lock locker(lock); rev_obs_map_t rev_obs; - { - std::lock_guard l(lock); - obs_mgr.for_each_observer( - [this, &rev_obs](md_config_obs_t *obs, const std::string &key) { - map_observer_changes(obs, key, &rev_obs); - }); - } + obs_mgr.for_each_observer( + [this, &rev_obs](md_config_obs_t *obs, const std::string &key) { + map_observer_changes(obs, key, &rev_obs); + }); - call_observers(rev_obs); + call_observers(locker, rev_obs); } void set_safe_to_start_threads() { config.set_safe_to_start_threads(); @@ -242,18 +243,17 @@ public: } // Expand all metavariables. Make any pending observer callbacks. void apply_changes(std::ostream* oss) { + std::unique_lock locker(lock); rev_obs_map_t rev_obs; - { - std::lock_guard l{lock}; - // apply changes until the cluster name is assigned - if (!values.cluster.empty()) { - // meta expands could have modified anything. Copy it all out again. - _gather_changes(values.changed, &rev_obs, oss); - values.changed.clear(); - } + + // apply changes until the cluster name is assigned + if (!values.cluster.empty()) { + // meta expands could have modified anything. Copy it all out again. + _gather_changes(values.changed, &rev_obs, oss); + values.changed.clear(); } - call_observers(rev_obs); + call_observers(locker, rev_obs); } void _gather_changes(std::set &changes, rev_obs_map_t *rev_obs, std::ostream* oss) { @@ -279,29 +279,25 @@ public: int set_mon_vals(CephContext *cct, const map& kv, md_config_t::config_callback config_cb) { - int ret; + std::unique_lock locker(lock); + int ret = config.set_mon_vals(cct, values, obs_mgr, kv, config_cb); + rev_obs_map_t rev_obs; - { - std::lock_guard l{lock}; - ret = config.set_mon_vals(cct, values, obs_mgr, kv, config_cb); - _gather_changes(values.changed, &rev_obs, nullptr); - values.changed.clear(); - } + _gather_changes(values.changed, &rev_obs, nullptr); + values.changed.clear(); - call_observers(rev_obs); + call_observers(locker, rev_obs); return ret; } int injectargs(const std::string &s, std::ostream *oss) { - int ret; + std::unique_lock locker(lock); + int ret = config.injectargs(values, obs_mgr, s, oss); + rev_obs_map_t rev_obs; - { - std::lock_guard l{lock}; - ret = config.injectargs(values, obs_mgr, s, oss); - _gather_changes(values.changed, &rev_obs, oss); - values.changed.clear(); - } + _gather_changes(values.changed, &rev_obs, oss); + values.changed.clear(); - call_observers(rev_obs); + call_observers(locker, rev_obs); return ret; } void parse_env(unsigned entity_type, diff --git a/ceph/src/common/legacy_config_opts.h b/ceph/src/common/legacy_config_opts.h index 340e740b4..79d9c1fa7 100644 --- a/ceph/src/common/legacy_config_opts.h +++ b/ceph/src/common/legacy_config_opts.h @@ -201,6 +201,9 @@ OPTION(mon_compact_on_bootstrap, OPT_BOOL) // trigger leveldb compaction on boo OPTION(mon_compact_on_trim, OPT_BOOL) // compact (a prefix) when we trim old states OPTION(mon_osd_cache_size, OPT_INT) // the size of osdmaps cache, not to rely on underlying store's cache +OPTION(mon_osd_cache_size_min, OPT_U64) // minimum amount of memory to cache osdmaps +OPTION(mon_memory_target, OPT_U64) // amount of mapped memory for osdmaps +OPTION(mon_memory_autotune, OPT_BOOL) // autotune cache memory for osdmap OPTION(mon_cpu_threads, OPT_INT) OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT) OPTION(mon_clean_pg_upmaps_per_chunk, OPT_INT) @@ -343,6 +346,7 @@ OPTION(mon_client_ping_timeout, OPT_DOUBLE) // fail if we don't hear back OPTION(mon_client_hunt_interval_backoff, OPT_DOUBLE) // each time we reconnect to a monitor, double our timeout OPTION(mon_client_hunt_interval_max_multiple, OPT_DOUBLE) // up to a max of 10*default (30 seconds) OPTION(mon_client_max_log_entries_per_message, OPT_INT) +OPTION(mon_client_directed_command_retry, OPT_INT) OPTION(client_cache_size, OPT_INT) OPTION(client_cache_mid, OPT_FLOAT) OPTION(client_use_random_mds, OPT_BOOL) @@ -714,6 +718,7 @@ OPTION(osd_max_push_cost, OPT_U64) // max size of push message OPTION(osd_max_push_objects, OPT_U64) // max objects in single push op OPTION(osd_max_scrubs, OPT_INT) OPTION(osd_scrub_during_recovery, OPT_BOOL) // Allow new scrubs to start while recovery is active on the OSD +OPTION(osd_repair_during_recovery, OPT_BOOL) // Allow new requested repairs to start while recovery is active on the OSD OPTION(osd_scrub_begin_hour, OPT_INT) OPTION(osd_scrub_end_hour, OPT_INT) OPTION(osd_scrub_begin_week_day, OPT_INT) @@ -781,6 +786,7 @@ OPTION(osd_debug_random_push_read_error, OPT_DOUBLE) OPTION(osd_debug_verify_cached_snaps, OPT_BOOL) OPTION(osd_debug_deep_scrub_sleep, OPT_FLOAT) OPTION(osd_debug_no_acting_change, OPT_BOOL) +OPTION(osd_debug_pretend_recovery_active, OPT_BOOL) OPTION(osd_enable_op_tracker, OPT_BOOL) // enable/disable OSD op tracking OPTION(osd_num_op_tracker_shard, OPT_U32) // The number of shards for holding the ops OPTION(osd_op_history_size, OPT_U32) // Max number of completed ops to track @@ -931,6 +937,7 @@ OPTION(bdev_async_discard, OPT_BOOL) OPTION(objectstore_blackhole, OPT_BOOL) OPTION(bluefs_alloc_size, OPT_U64) +OPTION(bluefs_shared_alloc_size, OPT_U64) OPTION(bluefs_max_prefetch, OPT_U64) OPTION(bluefs_min_log_runway, OPT_U64) // alloc when we get this low OPTION(bluefs_max_log_runway, OPT_U64) // alloc this much at a time @@ -1038,12 +1045,14 @@ OPTION(bluestore_max_deferred_txc, OPT_U64) OPTION(bluestore_rocksdb_options, OPT_STR) OPTION(bluestore_fsck_on_mount, OPT_BOOL) OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL) +OPTION(bluestore_fsck_quick_fix_on_mount, OPT_BOOL) OPTION(bluestore_fsck_on_umount, OPT_BOOL) OPTION(bluestore_fsck_on_umount_deep, OPT_BOOL) OPTION(bluestore_fsck_on_mkfs, OPT_BOOL) OPTION(bluestore_fsck_on_mkfs_deep, OPT_BOOL) OPTION(bluestore_sync_submit_transaction, OPT_BOOL) // submit kv txn in queueing thread (not kv_sync_thread) OPTION(bluestore_fsck_read_bytes_cap, OPT_U64) +OPTION(bluestore_fsck_quick_fix_threads, OPT_INT) OPTION(bluestore_throttle_bytes, OPT_U64) OPTION(bluestore_throttle_deferred_bytes, OPT_U64) OPTION(bluestore_throttle_cost_per_io_hdd, OPT_U64) @@ -1072,7 +1081,7 @@ OPTION(bluestore_debug_permit_any_bdev_label, OPT_BOOL) OPTION(bluestore_debug_random_read_err, OPT_DOUBLE) OPTION(bluestore_debug_inject_bug21040, OPT_BOOL) OPTION(bluestore_debug_inject_csum_err_probability, OPT_FLOAT) -OPTION(bluestore_no_per_pool_stats_tolerance, OPT_STR) +OPTION(bluestore_fsck_error_on_no_per_pool_stats, OPT_BOOL) OPTION(bluestore_warn_on_bluefs_spillover, OPT_BOOL) OPTION(bluestore_warn_on_legacy_statfs, OPT_BOOL) OPTION(bluestore_log_op_age, OPT_DOUBLE) @@ -1262,6 +1271,10 @@ OPTION(rados_tracing, OPT_BOOL) // true if LTTng-UST tracepoints should be enabl OPTION(nss_db_path, OPT_STR) // path to nss db +OPTION(rgw_max_attr_name_len, OPT_SIZE) +OPTION(rgw_max_attr_size, OPT_SIZE) +OPTION(rgw_max_attrs_num_in_req, OPT_U64) + OPTION(rgw_max_chunk_size, OPT_INT) OPTION(rgw_put_obj_min_window_size, OPT_INT) OPTION(rgw_put_obj_max_window_size, OPT_INT) @@ -1397,6 +1410,10 @@ OPTION(rgw_nfs_max_gc, OPT_INT) /* max gc events per cycle */ OPTION(rgw_nfs_write_completion_interval_s, OPT_INT) /* stateless (V3) * commit * delay */ +OPTION(rgw_nfs_s3_fast_attrs, OPT_BOOL) /* use fast S3 attrs from + * bucket index--currently + * assumes NFS mounts are + * immutable */ OPTION(rgw_zone, OPT_STR) // zone name OPTION(rgw_zone_root_pool, OPT_STR) // pool where zone specific info is stored diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc index 4d42dfedc..cc10924dc 100644 --- a/ceph/src/common/options.cc +++ b/ceph/src/common/options.cc @@ -1370,6 +1370,23 @@ std::vector