From adb31ebba1b9f736f0a7e05b9be808af79cfce80 Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Thu, 25 Feb 2021 16:56:55 +0100 Subject: [PATCH] import 15.2.9 Signed-off-by: Thomas Lamprecht --- ceph/CMakeLists.txt | 5 +- ceph/PendingReleaseNotes | 8 + ceph/admin/build-doc | 2 +- ceph/admin/doc-python-common-requirements.txt | 3 + ceph/alpine/APKBUILD | 6 +- ceph/ceph.spec | 20 +- ceph/ceph.spec.in | 14 +- ceph/changelog.upstream | 10 +- ceph/cmake/modules/CephChecks.cmake | 1 + ceph/debian/rules | 2 +- ceph/do_cmake.sh | 4 +- ceph/doc/cephadm/drivegroups.rst | 5 +- ceph/doc/cephadm/index.rst | 3 +- ceph/doc/cephadm/monitoring.rst | 129 +- ceph/doc/cephadm/operations.rst | 50 + ceph/doc/cephfs/add-remove-mds.rst | 2 +- ceph/doc/cephfs/cache-configuration.rst | 181 +++ ceph/doc/cephfs/cache-size-limits.rst | 14 - ceph/doc/cephfs/fs-volumes.rst | 43 +- ceph/doc/cephfs/index.rst | 2 +- ceph/doc/dev/cephadm/compliance-check.rst | 121 ++ ceph/doc/dev/cephadm/host-maintenance.rst | 104 ++ ceph/doc/dev/cephadm/index.rst | 13 + ceph/doc/man/8/rbd.rst | 43 +- ceph/doc/mgr/dashboard.rst | 63 +- ceph/doc/mgr/orchestrator.rst | 4 +- .../rados/configuration/osd-config-ref.rst | 2 +- .../rados/operations/erasure-code-clay.rst | 2 +- ceph/doc/rados/operations/health-checks.rst | 13 + ceph/install-deps.sh | 3 +- .../dashboards/osd-device-details.json | 2 +- .../prometheus/alerts/ceph_default_alerts.yml | 16 +- ceph/qa/distros/all/ubuntu_18.04_podman.yaml | 2 +- .../mon/osd-erasure-code-profile.sh | 11 + .../fs/basic_functional/tasks/volumes.yaml | 1 + ceph/qa/suites/krbd/basic/ms_mode/.qa | 1 + ceph/qa/suites/krbd/basic/ms_mode/crc.yaml | 5 + ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml | 5 + ceph/qa/suites/krbd/basic/ms_mode/secure.yaml | 5 + ceph/qa/suites/krbd/fsx/conf.yaml | 2 - ceph/qa/suites/krbd/fsx/ms_mode$/.qa | 1 + ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml | 5 + ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml | 5 + .../suites/krbd/fsx/ms_mode$/prefer-crc.yaml | 5 + ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml | 5 + ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa | 1 + .../suites/krbd/rbd-nomount/ms_mode/crc.yaml | 5 + .../krbd/rbd-nomount/ms_mode/legacy.yaml | 5 + .../krbd/rbd-nomount/ms_mode/secure.yaml | 5 + ceph/qa/suites/krbd/rbd/ms_mode/.qa | 1 + ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml | 5 + ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml | 5 + ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml | 5 + ceph/qa/suites/krbd/singleton/ms_mode$/.qa | 1 + .../suites/krbd/singleton/ms_mode$/crc.yaml | 5 + .../krbd/singleton/ms_mode$/legacy.yaml | 5 + .../krbd/singleton/ms_mode$/prefer-crc.yaml | 5 + .../krbd/singleton/ms_mode$/secure.yaml | 5 + ceph/qa/suites/krbd/thrash/ms_mode$/.qa | 1 + ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml | 5 + .../suites/krbd/thrash/ms_mode$/legacy.yaml | 5 + .../krbd/thrash/ms_mode$/prefer-crc.yaml | 5 + .../suites/krbd/thrash/ms_mode$/secure.yaml | 5 + .../sysfs/tasks/stable_pages_required.yaml | 5 - .../krbd/wac/sysfs/tasks/stable_writes.yaml | 5 + ceph/qa/suites/rados/cephadm/smoke/start.yaml | 1 + .../rados/dashboard/tasks/dashboard.yaml | 1 + .../all/pg-autoscaler-progress-off.yaml | 44 + ceph/qa/suites/rgw/multisite/overrides.yaml | 1 + .../1-install/nautilus.yaml | 1 + .../parallel/1-ceph-install/nautilus.yaml | 1 + .../stress-split/1-ceph-install/nautilus.yaml | 2 + ceph/qa/tasks/ceph.py | 19 +- ceph/qa/tasks/cephadm.py | 14 +- ceph/qa/tasks/cephfs/cephfs_test_case.py | 3 + ceph/qa/tasks/cephfs/mount.py | 5 +- ceph/qa/tasks/cephfs/test_client_limits.py | 30 + ceph/qa/tasks/cephfs/test_nfs.py | 4 +- ceph/qa/tasks/cephfs/test_volume_client.py | 4 +- ceph/qa/tasks/cephfs/test_volumes.py | 1384 +++++++++++++++-- ceph/qa/tasks/mgr/dashboard/helper.py | 95 +- ceph/qa/tasks/mgr/dashboard/test_auth.py | 203 +++ ceph/qa/tasks/mgr/dashboard/test_osd.py | 131 +- ceph/qa/tasks/mgr/mgr_test_case.py | 4 +- ceph/qa/tasks/mgr/test_dashboard.py | 18 +- ceph/qa/tasks/mgr/test_progress.py | 158 +- ceph/qa/tasks/radosgw_admin_rest.py | 5 + ceph/qa/tasks/vstart_runner.py | 5 +- ceph/qa/workunits/fs/misc/subvolume.sh | 63 + ...ages_required.sh => krbd_stable_writes.sh} | 8 +- ceph/qa/workunits/rbd/rbd_mirror_helpers.sh | 2 +- ceph/src/.git_version | 4 +- .../ceph_volume/devices/lvm/create.py | 2 +- ceph/src/ceph_fuse.cc | 3 +- ceph/src/cephadm/cephadm | 190 ++- ceph/src/cephadm/tests/test_cephadm.py | 9 + ceph/src/client/Client.cc | 158 +- ceph/src/client/Client.h | 7 +- ceph/src/client/MetaSession.cc | 10 +- ceph/src/client/MetaSession.h | 2 +- ceph/src/client/fuse_ll.cc | 3 +- ceph/src/cls/rgw/cls_rgw.cc | 5 +- ceph/src/cls/user/cls_user.cc | 3 + ceph/src/common/buffer.cc | 2 + ceph/src/common/config.cc | 24 +- ceph/src/common/config.h | 2 +- ceph/src/common/config_proxy.h | 5 +- ceph/src/common/legacy_config_opts.h | 4 + ceph/src/common/options.cc | 55 +- ceph/src/crimson/osd/pg.cc | 3 +- ceph/src/crimson/osd/pg.h | 3 +- ceph/src/global/global_init.cc | 3 + ceph/src/global/signal_handler.h | 8 +- ceph/src/include/config-h.in.cmake | 3 + ceph/src/krbd.cc | 34 +- ceph/src/librbd/CMakeLists.txt | 4 + ceph/src/librbd/api/Image.cc | 2 +- ceph/src/librbd/api/Migration.cc | 4 - ceph/src/librbd/api/PoolMetadata.cc | 55 +- ceph/src/librbd/deep_copy/ImageCopyRequest.cc | 51 +- ceph/src/librbd/deep_copy/ImageCopyRequest.h | 3 + ceph/src/librbd/image/CreateRequest.cc | 6 +- ceph/src/librbd/object_map/DiffRequest.cc | 107 +- ceph/src/librbd/object_map/DiffRequest.h | 5 +- ceph/src/mds/CInode.cc | 8 +- ceph/src/mds/MDCache.cc | 1 + ceph/src/mds/MDSDaemon.cc | 10 +- ceph/src/mds/MDSRank.cc | 20 +- ceph/src/mds/MDSRank.h | 2 +- ceph/src/mds/Server.cc | 177 ++- ceph/src/mds/Server.h | 9 +- ceph/src/mds/SessionMap.cc | 17 +- ceph/src/mds/SessionMap.h | 13 +- ceph/src/mds/SnapRealm.cc | 7 + ceph/src/mds/SnapRealm.h | 6 + ceph/src/mds/snap.h | 7 +- ceph/src/messages/MMonCommand.h | 25 +- ceph/src/messages/MMonCommandAck.h | 26 +- ceph/src/mgr/ActivePyModules.cc | 31 +- ceph/src/mgr/ClusterState.cc | 12 +- ceph/src/mgr/MgrStandby.cc | 6 +- ceph/src/mgr/PyModuleRegistry.cc | 5 +- ceph/src/mon/ConfigMonitor.cc | 12 +- ceph/src/mon/Monitor.cc | 18 +- ceph/src/mon/Monitor.h | 9 +- ceph/src/mon/OSDMonitor.cc | 12 + ceph/src/mon/Paxos.cc | 1 - ceph/src/mon/Paxos.h | 6 +- ceph/src/msg/async/ProtocolV2.cc | 6 +- ceph/src/mypy.ini | 2 +- ceph/src/ocf/rbd.in | 37 +- ceph/src/os/bluestore/AvlAllocator.cc | 10 +- ceph/src/os/bluestore/BlueFS.cc | 17 +- ceph/src/os/bluestore/BlueFS.h | 8 +- ceph/src/os/bluestore/BlueRocksEnv.cc | 4 +- ceph/src/os/bluestore/BlueStore.cc | 256 ++- ceph/src/os/bluestore/BlueStore.h | 29 +- ceph/src/os/bluestore/HybridAllocator.cc | 2 +- ceph/src/os/bluestore/KernelDevice.cc | 23 +- .../os/bluestore/fastbmap_allocator_impl.cc | 3 + .../os/bluestore/fastbmap_allocator_impl.h | 2 +- ceph/src/osd/PG.cc | 31 +- ceph/src/osd/PG.h | 3 +- ceph/src/osd/PeeringState.cc | 9 +- ceph/src/osd/PeeringState.h | 5 +- ceph/src/osdc/ObjectCacher.cc | 20 +- ceph/src/pybind/cephfs/cephfs.pyx | 2 +- ceph/src/pybind/mgr/balancer/module.py | 4 +- ceph/src/pybind/mgr/cephadm/inventory.py | 166 +- ceph/src/pybind/mgr/cephadm/migrations.py | 8 +- ceph/src/pybind/mgr/cephadm/module.py | 270 ++-- ceph/src/pybind/mgr/cephadm/remotes.py | 8 +- ceph/src/pybind/mgr/cephadm/schedule.py | 6 +- ceph/src/pybind/mgr/cephadm/serve.py | 151 +- .../mgr/cephadm/services/cephadmservice.py | 5 +- ceph/src/pybind/mgr/cephadm/services/iscsi.py | 35 +- ceph/src/pybind/mgr/cephadm/services/nfs.py | 1 + ceph/src/pybind/mgr/cephadm/services/osd.py | 268 ++-- ceph/src/pybind/mgr/cephadm/template.py | 6 +- ceph/src/pybind/mgr/cephadm/tests/fixtures.py | 5 +- .../pybind/mgr/cephadm/tests/test_cephadm.py | 57 +- .../mgr/cephadm/tests/test_migration.py | 15 +- .../mgr/cephadm/tests/test_osd_removal.py | 24 +- .../pybind/mgr/cephadm/tests/test_services.py | 2 +- .../src/pybind/mgr/cephadm/tests/test_spec.py | 13 +- ceph/src/pybind/mgr/cephadm/upgrade.py | 42 +- ceph/src/pybind/mgr/cephadm/utils.py | 26 +- ceph/src/pybind/mgr/crash/module.py | 29 +- .../mgr/dashboard/cherrypy_backports.py | 11 +- ceph/src/pybind/mgr/dashboard/constraints.txt | 1 - .../mgr/dashboard/controllers/__init__.py | 9 + .../pybind/mgr/dashboard/controllers/auth.py | 74 +- .../pybind/mgr/dashboard/controllers/docs.py | 5 +- .../controllers/erasure_code_profile.py | 4 +- .../mgr/dashboard/controllers/grafana.py | 13 +- .../pybind/mgr/dashboard/controllers/osd.py | 65 +- .../pybind/mgr/dashboard/controllers/rgw.py | 16 +- .../pybind/mgr/dashboard/controllers/saml2.py | 5 +- .../mgr/dashboard/controllers/service.py | 49 +- ceph/src/pybind/mgr/dashboard/exceptions.py | 7 + .../integration/block/images.e2e-spec.ts | 3 + .../integration/block/iscsi.e2e-spec.ts | 1 + .../integration/block/mirroring.e2e-spec.ts | 1 + .../cluster/configuration.e2e-spec.ts | 7 +- .../integration/cluster/crush-map.e2e-spec.ts | 1 + .../integration/cluster/hosts.e2e-spec.ts | 1 + .../integration/cluster/logs.e2e-spec.ts | 17 +- .../cluster/mgr-modules.e2e-spec.ts | 1 + .../integration/cluster/monitors.e2e-spec.ts | 1 + .../integration/cluster/osds.e2e-spec.ts | 5 +- .../filesystems/filesystems.e2e-spec.ts | 1 + .../integration/pools/pools.e2e-spec.ts | 1 + .../integration/rgw/buckets.e2e-spec.ts | 1 + .../cypress/integration/rgw/buckets.po.ts | 2 +- .../integration/rgw/daemons.e2e-spec.ts | 1 + .../cypress/integration/rgw/users.e2e-spec.ts | 1 + .../integration/ui/dashboard.e2e-spec.ts | 1 + .../integration/ui/notification.e2e-spec.ts | 3 + .../integration/ui/role-mgmt.e2e-spec.ts | 1 + .../integration/ui/user-mgmt.e2e-spec.ts | 1 + .../frontend/cypress/support/commands.ts | 1 - .../dist/en-US/1.9e79c41bbaed982a50af.js | 1 - .../dist/en-US/1.a08d918239b8b76c4810.js | 1 + .../frontend/dist/en-US/3rdpartylicenses.txt | 101 +- .../dist/en-US/6.9a60f7741889f52ed7ae.js | 1 - .../dist/en-US/6.bbb14e8467017ca13aa5.js | 1 + .../dist/en-US/7.1891b10149a7c2d765ac.js | 1 - .../dist/en-US/7.7ae591a28c2c89c12020.js | 1 + .../dist/en-US/8.88af57a0fd5b75779391.js | 1 - .../dist/en-US/8.a5d6e73e48ae0cf89ca6.js | 1 + .../dashboard/frontend/dist/en-US/index.html | 4 +- .../dist/en-US/main.8b6127522c6248eda88e.js | 2 + ... main.8b6127522c6248eda88e.js.LICENSE.txt} | 2 - .../dist/en-US/main.c43d13b597196a5f022f.js | 2 - .../en-US/runtime.0e1c754813ff535e4bd5.js | 1 - .../en-US/runtime.c67af31016ccc1ccaa71.js | 1 + ...6a.css => styles.b61c4a8f9329c6b4f0c0.css} | 2 +- .../mgr/dashboard/frontend/package-lock.json | 8 - .../mgr/dashboard/frontend/package.json | 3 +- .../frontend/src/app/app-routing.module.ts | 12 +- .../dashboard/frontend/src/app/app.module.ts | 10 - .../rbd-snapshot-list.component.spec.ts | 2 +- .../src/app/ceph/cluster/cluster.module.ts | 9 +- .../cluster/crushmap/crushmap.component.html | 2 +- .../cluster/crushmap/crushmap.component.scss | 8 +- .../cluster/crushmap/crushmap.component.ts | 1 + .../inventory/inventory.component.html | 2 +- .../inventory/inventory.component.spec.ts | 4 + .../cluster/inventory/inventory.component.ts | 2 + .../osd-flags-indiv-modal.component.html | 52 + .../osd-flags-indiv-modal.component.scss | 0 .../osd-flags-indiv-modal.component.spec.ts | 353 +++++ .../osd-flags-indiv-modal.component.ts | 140 ++ .../osd-flags-modal.component.html | 2 +- .../osd-flags-modal.component.scss | 5 - .../osd/osd-list/osd-list.component.html | 8 + .../osd/osd-list/osd-list.component.spec.ts | 52 +- .../osd/osd-list/osd-list.component.ts | 36 +- .../cluster/services/placement.pipe.spec.ts | 92 ++ .../ceph/cluster/services/placement.pipe.ts | 44 + .../service-daemon-list.component.html | 4 +- .../service-daemon-list.component.spec.ts | 4 + .../service-daemon-list.component.ts | 2 + .../service-form/service-form.component.html | 415 +++++ .../service-form/service-form.component.scss | 0 .../service-form.component.spec.ts | 340 ++++ .../service-form/service-form.component.ts | 328 ++++ .../cluster/services/services.component.html | 12 +- .../services/services.component.spec.ts | 11 +- .../cluster/services/services.component.ts | 88 +- .../dashboard/health/health.component.spec.ts | 5 +- .../ceph/dashboard/health/health.component.ts | 2 +- .../src/app/ceph/nfs/nfs-cluster-type.enum.ts | 4 + .../ceph/nfs/nfs-form/nfs-form.component.html | 27 +- .../nfs/nfs-form/nfs-form.component.spec.ts | 36 +- .../ceph/nfs/nfs-form/nfs-form.component.ts | 61 +- ...ure-code-profile-form-modal.component.html | 73 +- ...-code-profile-form-modal.component.spec.ts | 219 ++- ...asure-code-profile-form-modal.component.ts | 146 +- .../rgw-user-form/rgw-user-form.component.ts | 6 +- .../app/core/auth/login/login.component.ts | 1 - .../auth/user-form/user-form.component.html | 4 +- .../user-password-form.component.html | 2 +- .../dashboard-help.component.ts | 9 +- .../src/app/shared/api/auth.service.spec.ts | 3 +- .../src/app/shared/api/auth.service.ts | 3 +- .../app/shared/api/ceph-service.service.ts | 23 + .../api/erasure-code-profile.service.ts | 17 +- .../src/app/shared/api/host.service.ts | 4 +- .../src/app/shared/api/osd.service.spec.ts | 9 + .../src/app/shared/api/osd.service.ts | 4 + .../back-button/back-button.component.html | 1 - .../src/app/shared/constants/app.constants.ts | 2 + .../datatable/table/table.component.html | 2 +- .../datatable/table/table.component.spec.ts | 19 +- .../shared/datatable/table/table.component.ts | 3 +- .../app/shared/forms/cd-validators.spec.ts | 143 ++ .../src/app/shared/forms/cd-validators.ts | 78 +- .../app/shared/models/erasure-code-profile.ts | 2 + .../frontend/src/app/shared/models/flag.ts | 8 + .../src/app/shared/models/login-response.ts | 1 - .../services/auth-storage.service.spec.ts | 4 +- .../shared/services/auth-storage.service.ts | 7 - .../shared/services/task-message.service.ts | 11 + .../mgr/dashboard/frontend/src/styles.scss | 5 +- ceph/src/pybind/mgr/dashboard/module.py | 11 +- ceph/src/pybind/mgr/dashboard/openapi.yaml | 0 .../src/pybind/mgr/dashboard/plugins/debug.py | 30 +- .../mgr/dashboard/plugins/interfaces.py | 10 + .../mgr/dashboard/services/access_control.py | 20 + .../src/pybind/mgr/dashboard/services/auth.py | 20 +- .../mgr/dashboard/services/orchestrator.py | 14 +- .../mgr/dashboard/services/rgw_client.py | 24 +- ceph/src/pybind/mgr/dashboard/settings.py | 4 + .../pybind/mgr/dashboard/tests/test_rgw.py | 18 +- ceph/src/pybind/mgr/devicehealth/module.py | 2 +- .../src/pybind/mgr/orchestrator/_interface.py | 17 +- ceph/src/pybind/mgr/orchestrator/module.py | 86 +- .../orchestrator/tests/test_orchestrator.py | 14 +- ceph/src/pybind/mgr/progress/module.py | 44 +- ceph/src/pybind/mgr/prometheus/module.py | 121 +- ceph/src/pybind/mgr/rbd_support/schedule.py | 18 +- ceph/src/pybind/mgr/rbd_support/task.py | 39 +- ceph/src/pybind/mgr/restful/common.py | 3 +- .../src/pybind/mgr/volumes/fs/async_cloner.py | 135 +- .../pybind/mgr/volumes/fs/operations/group.py | 8 + .../pybind/mgr/volumes/fs/operations/op_sm.py | 68 - .../mgr/volumes/fs/operations/resolver.py | 9 + .../mgr/volumes/fs/operations/subvolume.py | 24 +- .../mgr/volumes/fs/operations/template.py | 37 +- .../pybind/mgr/volumes/fs/operations/trash.py | 19 +- .../fs/operations/versions/__init__.py | 42 +- .../volumes/fs/operations/versions/op_sm.py | 114 ++ .../fs/operations/versions/subvolume_attrs.py | 61 + .../fs/operations/versions/subvolume_base.py | 119 +- .../fs/operations/versions/subvolume_v1.py | 158 +- .../fs/operations/versions/subvolume_v2.py | 363 +++++ ceph/src/pybind/mgr/volumes/fs/purge_queue.py | 57 +- ceph/src/pybind/mgr/volumes/fs/volume.py | 120 +- ceph/src/pybind/mgr/volumes/module.py | 9 +- .../ceph/deployment/drive_group.py | 2 +- .../ceph/deployment/service_spec.py | 12 +- .../python-common/ceph/tests/test_datetime.py | 61 + ceph/src/python-common/ceph/utils.py | 68 + ceph/src/rgw/CMakeLists.txt | 1 + ceph/src/rgw/rgw_auth.cc | 5 + ceph/src/rgw/rgw_auth.h | 10 +- ceph/src/rgw/rgw_auth_s3.h | 5 +- ceph/src/rgw/rgw_bucket.cc | 37 +- ceph/src/rgw/rgw_common.h | 3 + ceph/src/rgw/rgw_compression.cc | 45 +- ceph/src/rgw/rgw_compression.h | 7 +- ceph/src/rgw/rgw_etag_verifier.cc | 185 +++ ceph/src/rgw/rgw_etag_verifier.h | 85 + ceph/src/rgw/rgw_file.h | 237 ++- ceph/src/rgw/rgw_lc.cc | 8 +- ceph/src/rgw/rgw_log.cc | 17 + ceph/src/rgw/rgw_log.h | 9 +- ceph/src/rgw/rgw_obj_manifest.h | 5 + ceph/src/rgw/rgw_op.cc | 4 + ceph/src/rgw/rgw_rados.cc | 81 +- ceph/src/rgw/rgw_rest_s3.cc | 4 +- ceph/src/rgw/rgw_sts.cc | 13 + ceph/src/rgw/rgw_sts.h | 10 +- ceph/src/rgw/services/svc_notify.cc | 20 +- ceph/src/rgw/services/svc_sys_obj_cache.cc | 15 +- ceph/src/test/common/CMakeLists.txt | 6 + ceph/src/test/common/test_counter.cc | 40 + .../cram-0.5.0ceph.2011-01-14.tar.gz | Bin 23497 -> 0 bytes ceph/src/test/libcephfs/test.cc | 25 + ceph/src/test/librbd/CMakeLists.txt | 1 + .../object_map/test_mock_DiffRequest.cc | 490 ++++++ ceph/src/test/librbd/test_DeepCopy.cc | 1 - ceph/src/test/librbd/test_Migration.cc | 2 - ceph/src/test/objectstore/Allocator_test.cc | 18 + .../objectstore/fastbmap_allocator_test.cc | 5 + ceph/src/test/objectstore/store_test.cc | 35 +- ceph/src/test/objectstore/test_bluefs.cc | 27 +- ceph/src/test/old/testcounter.cc | 73 - ceph/src/test/rbd_mirror/test_ImageDeleter.cc | 3 +- ceph/src/test/rbd_mirror/test_fixture.cc | 4 +- ceph/src/test/run-cli-tests | 3 +- ceph/src/test/test_mempool.cc | 15 + ceph/src/tools/CMakeLists.txt | 3 + ceph/src/tools/rbd/action/Kernel.cc | 13 + .../image_replayer/CreateImageRequest.cc | 3 +- 386 files changed, 10872 insertions(+), 2173 deletions(-) create mode 100644 ceph/admin/doc-python-common-requirements.txt create mode 100644 ceph/doc/cephfs/cache-configuration.rst delete mode 100644 ceph/doc/cephfs/cache-size-limits.rst create mode 100644 ceph/doc/dev/cephadm/compliance-check.rst create mode 100644 ceph/doc/dev/cephadm/host-maintenance.rst create mode 100644 ceph/doc/dev/cephadm/index.rst create mode 120000 ceph/qa/suites/krbd/basic/ms_mode/.qa create mode 100644 ceph/qa/suites/krbd/basic/ms_mode/crc.yaml create mode 100644 ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml create mode 100644 ceph/qa/suites/krbd/basic/ms_mode/secure.yaml create mode 120000 ceph/qa/suites/krbd/fsx/ms_mode$/.qa create mode 100644 ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml create mode 100644 ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml create mode 100644 ceph/qa/suites/krbd/fsx/ms_mode$/prefer-crc.yaml create mode 100644 ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml create mode 120000 ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa create mode 100644 ceph/qa/suites/krbd/rbd-nomount/ms_mode/crc.yaml create mode 100644 ceph/qa/suites/krbd/rbd-nomount/ms_mode/legacy.yaml create mode 100644 ceph/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml create mode 120000 ceph/qa/suites/krbd/rbd/ms_mode/.qa create mode 100644 ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml create mode 100644 ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml create mode 100644 ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml create mode 120000 ceph/qa/suites/krbd/singleton/ms_mode$/.qa create mode 100644 ceph/qa/suites/krbd/singleton/ms_mode$/crc.yaml create mode 100644 ceph/qa/suites/krbd/singleton/ms_mode$/legacy.yaml create mode 100644 ceph/qa/suites/krbd/singleton/ms_mode$/prefer-crc.yaml create mode 100644 ceph/qa/suites/krbd/singleton/ms_mode$/secure.yaml create mode 120000 ceph/qa/suites/krbd/thrash/ms_mode$/.qa create mode 100644 ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml create mode 100644 ceph/qa/suites/krbd/thrash/ms_mode$/legacy.yaml create mode 100644 ceph/qa/suites/krbd/thrash/ms_mode$/prefer-crc.yaml create mode 100644 ceph/qa/suites/krbd/thrash/ms_mode$/secure.yaml delete mode 100644 ceph/qa/suites/krbd/wac/sysfs/tasks/stable_pages_required.yaml create mode 100644 ceph/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml create mode 100644 ceph/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml create mode 100755 ceph/qa/workunits/fs/misc/subvolume.sh rename ceph/qa/workunits/rbd/{krbd_stable_pages_required.sh => krbd_stable_writes.sh} (91%) delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/1.9e79c41bbaed982a50af.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/1.a08d918239b8b76c4810.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/6.9a60f7741889f52ed7ae.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/6.bbb14e8467017ca13aa5.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/7.1891b10149a7c2d765ac.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/7.7ae591a28c2c89c12020.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/8.88af57a0fd5b75779391.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/8.a5d6e73e48ae0cf89ca6.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/main.8b6127522c6248eda88e.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/{main.c43d13b597196a5f022f.js.LICENSE.txt => main.8b6127522c6248eda88e.js.LICENSE.txt} (98%) delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/main.c43d13b597196a5f022f.js delete mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/runtime.0e1c754813ff535e4bd5.js create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/runtime.c67af31016ccc1ccaa71.js rename ceph/src/pybind/mgr/dashboard/frontend/dist/en-US/{styles.0d3cd206c82d5fe7076a.css => styles.b61c4a8f9329c6b4f0c0.css} (98%) create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.html create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.scss create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.spec.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.spec.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.scss create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-cluster-type.enum.ts create mode 100644 ceph/src/pybind/mgr/dashboard/frontend/src/app/shared/models/flag.ts create mode 100644 ceph/src/pybind/mgr/dashboard/openapi.yaml delete mode 100644 ceph/src/pybind/mgr/volumes/fs/operations/op_sm.py create mode 100644 ceph/src/pybind/mgr/volumes/fs/operations/versions/op_sm.py create mode 100644 ceph/src/pybind/mgr/volumes/fs/operations/versions/subvolume_attrs.py create mode 100644 ceph/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py create mode 100644 ceph/src/python-common/ceph/tests/test_datetime.py create mode 100644 ceph/src/python-common/ceph/utils.py create mode 100644 ceph/src/rgw/rgw_etag_verifier.cc create mode 100644 ceph/src/rgw/rgw_etag_verifier.h create mode 100644 ceph/src/test/common/test_counter.cc delete mode 100644 ceph/src/test/downloads/cram-0.5.0ceph.2011-01-14.tar.gz create mode 100644 ceph/src/test/librbd/object_map/test_mock_DiffRequest.cc delete mode 100644 ceph/src/test/old/testcounter.cc diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt index 7a71e53ad..0de31a5a8 100644 --- a/ceph/CMakeLists.txt +++ b/ceph/CMakeLists.txt @@ -13,7 +13,8 @@ foreach(policy CMP0054 CMP0056 CMP0065 - CMP0075) + CMP0075 + CMP0093) if(POLICY ${policy}) cmake_policy(SET ${policy} NEW) endif() @@ -667,4 +668,4 @@ add_custom_target(tags DEPENDS ctags) find_package(CppCheck) find_package(IWYU) -set(VERSION 15.2.8) +set(VERSION 15.2.9) diff --git a/ceph/PendingReleaseNotes b/ceph/PendingReleaseNotes index a1f05a987..9715be569 100644 --- a/ceph/PendingReleaseNotes +++ b/ceph/PendingReleaseNotes @@ -1,5 +1,13 @@ +15.2.9 +------ +* MGR: progress module can now be turned on/off, using the commands: + ``ceph progress on`` and ``ceph progress off``. + 15.2.8 ------ +* $pid expansion in config paths like `admin_socket` will now properly expand + to the daemon pid for commands like `ceph-mds` or `ceph-osd`. Previously only + `ceph-fuse`/`rbd-nbd` expanded `$pid` with the actual daemon pid. * ceph-volume: The ``lvm batch` subcommand received a major rewrite. This closed a number of bugs and improves usability in terms of size specification and diff --git a/ceph/admin/build-doc b/ceph/admin/build-doc index 289a2a688..aa8baae6c 100755 --- a/ceph/admin/build-doc +++ b/ceph/admin/build-doc @@ -59,7 +59,7 @@ cd build-doc if [ ! -e $vdir ]; then virtualenv --python=python3 $vdir fi -$vdir/bin/pip install --quiet -r $TOPDIR/admin/doc-requirements.txt +$vdir/bin/pip install --use-feature=2020-resolver --quiet -r $TOPDIR/admin/doc-requirements.txt -r $TOPDIR/admin/doc-python-common-requirements.txt install -d -m0755 \ $TOPDIR/build-doc/output/html \ diff --git a/ceph/admin/doc-python-common-requirements.txt b/ceph/admin/doc-python-common-requirements.txt new file mode 100644 index 000000000..8cd8ec78c --- /dev/null +++ b/ceph/admin/doc-python-common-requirements.txt @@ -0,0 +1,3 @@ +pcpp +Jinja2 +-e../src/python-common diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD index 8b1d3e98d..2bf1923ce 100644 --- a/ceph/alpine/APKBUILD +++ b/ceph/alpine/APKBUILD @@ -1,7 +1,7 @@ # Contributor: John Coyle # Maintainer: John Coyle pkgname=ceph -pkgver=15.2.8 +pkgver=15.2.9 pkgrel=0 pkgdesc="Ceph is a distributed object store and file system" pkgusers="ceph" @@ -63,7 +63,7 @@ makedepends=" xmlstarlet yasm " -source="ceph-15.2.8.tar.bz2" +source="ceph-15.2.9.tar.bz2" subpackages=" $pkgname-base $pkgname-common @@ -116,7 +116,7 @@ _sysconfdir=/etc _udevrulesdir=/etc/udev/rules.d _python_sitelib=/usr/lib/python2.7/site-packages -builddir=$srcdir/ceph-15.2.8 +builddir=$srcdir/ceph-15.2.9 build() { export CEPH_BUILD_VIRTUALENV=$builddir diff --git a/ceph/ceph.spec b/ceph/ceph.spec index d41e5b110..6e5bf4a8e 100644 --- a/ceph/ceph.spec +++ b/ceph/ceph.spec @@ -98,7 +98,7 @@ # main package definition ################################################################################# Name: ceph -Version: 15.2.8 +Version: 15.2.9 Release: 0%{?dist} %if 0%{?fedora} || 0%{?rhel} Epoch: 2 @@ -114,7 +114,7 @@ License: LGPL-2.1 and LGPL-3.0 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD- Group: System/Filesystems %endif URL: http://ceph.com/ -Source0: %{?_remote_tarball_prefix}ceph-15.2.8.tar.bz2 +Source0: %{?_remote_tarball_prefix}ceph-15.2.9.tar.bz2 %if 0%{?suse_version} # _insert_obs_source_lines_here ExclusiveArch: x86_64 aarch64 ppc64le s390x @@ -414,10 +414,8 @@ Base is the package that includes all the files shared amongst ceph servers %package -n cephadm Summary: Utility to bootstrap Ceph clusters +BuildArch: noarch Requires: lvm2 -%if 0%{?suse_version} -Requires: apparmor-abstractions -%endif Requires: python%{python3_pkgversion} %if 0%{?weak_deps} Recommends: podman @@ -477,8 +475,12 @@ Provides: ceph-test:/usr/bin/ceph-monstore-tool Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} %if 0%{?weak_deps} Recommends: nvme-cli +%if 0%{?suse_version} +Requires: smartmontools +%else Recommends: smartmontools %endif +%endif %description mon ceph-mon is the cluster monitor daemon for the Ceph distributed file system. One or more instances of ceph-mon form a Paxos part-time @@ -757,8 +759,12 @@ Requires: libstoragemgmt Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release} %if 0%{?weak_deps} Recommends: nvme-cli +%if 0%{?suse_version} +Requires: smartmontools +%else Recommends: smartmontools %endif +%endif %description osd ceph-osd is the object storage daemon for the Ceph distributed file system. It is responsible for storing objects on a local file system @@ -1134,7 +1140,7 @@ This package provides Ceph’s default alerts for Prometheus. # common ################################################################################# %prep -%autosetup -p1 -n ceph-15.2.8 +%autosetup -p1 -n ceph-15.2.9 %build # LTO can be enabled as soon as the following GCC bug is fixed: @@ -1309,7 +1315,7 @@ ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules # sudoers.d -install -m 0600 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl +install -m 0440 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl %if 0%{?rhel} >= 8 pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/* diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in index 70942aca7..39a948960 100644 --- a/ceph/ceph.spec.in +++ b/ceph/ceph.spec.in @@ -414,10 +414,8 @@ Base is the package that includes all the files shared amongst ceph servers %package -n cephadm Summary: Utility to bootstrap Ceph clusters +BuildArch: noarch Requires: lvm2 -%if 0%{?suse_version} -Requires: apparmor-abstractions -%endif Requires: python%{python3_pkgversion} %if 0%{?weak_deps} Recommends: podman @@ -477,8 +475,12 @@ Provides: ceph-test:/usr/bin/ceph-monstore-tool Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} %if 0%{?weak_deps} Recommends: nvme-cli +%if 0%{?suse_version} +Requires: smartmontools +%else Recommends: smartmontools %endif +%endif %description mon ceph-mon is the cluster monitor daemon for the Ceph distributed file system. One or more instances of ceph-mon form a Paxos part-time @@ -757,8 +759,12 @@ Requires: libstoragemgmt Requires: python%{python3_pkgversion}-ceph-common = %{_epoch_prefix}%{version}-%{release} %if 0%{?weak_deps} Recommends: nvme-cli +%if 0%{?suse_version} +Requires: smartmontools +%else Recommends: smartmontools %endif +%endif %description osd ceph-osd is the object storage daemon for the Ceph distributed file system. It is responsible for storing objects on a local file system @@ -1309,7 +1315,7 @@ ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules # sudoers.d -install -m 0600 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl +install -m 0440 -D sudoers.d/ceph-osd-smartctl %{buildroot}%{_sysconfdir}/sudoers.d/ceph-osd-smartctl %if 0%{?rhel} >= 8 pathfix.py -pni "%{__python3} %{py3_shbang_opts}" %{buildroot}%{_bindir}/* diff --git a/ceph/changelog.upstream b/ceph/changelog.upstream index 8f14a46c1..51d93f8a0 100644 --- a/ceph/changelog.upstream +++ b/ceph/changelog.upstream @@ -1,7 +1,13 @@ -ceph (15.2.8-1bionic) bionic; urgency=medium +ceph (15.2.9-1bionic) bionic; urgency=medium - -- Jenkins Build Slave User Wed, 16 Dec 2020 18:29:12 +0000 + -- Jenkins Build Slave User Tue, 23 Feb 2021 14:23:03 +0000 + +ceph (15.2.9-1) stable; urgency=medium + + * New upstream release + + -- Ceph Release Team Tue, 23 Feb 2021 14:10:13 +0000 ceph (15.2.8-1) stable; urgency=medium diff --git a/ceph/cmake/modules/CephChecks.cmake b/ceph/cmake/modules/CephChecks.cmake index 23687283a..ca86dcbc7 100644 --- a/ceph/cmake/modules/CephChecks.cmake +++ b/ceph/cmake/modules/CephChecks.cmake @@ -24,6 +24,7 @@ check_function_exists(strerror_r HAVE_Strerror_R) check_function_exists(name_to_handle_at HAVE_NAME_TO_HANDLE_AT) check_function_exists(pipe2 HAVE_PIPE2) check_function_exists(accept4 HAVE_ACCEPT4) +check_function_exists(sigdescr_np HAVE_SIGDESCR_NP) include(CMakePushCheckState) cmake_push_check_state(RESET) diff --git a/ceph/debian/rules b/ceph/debian/rules index dc32da308..3a40f99dc 100755 --- a/ceph/debian/rules +++ b/ceph/debian/rules @@ -61,7 +61,7 @@ override_dh_auto_install: install -D -m 644 udev/50-rbd.rules $(DESTDIR)/lib/udev/rules.d/50-rbd.rules install -D -m 644 src/etc-rbdmap $(DESTDIR)/etc/ceph/rbdmap install -D -m 644 etc/sysctl/90-ceph-osd.conf $(DESTDIR)/etc/sysctl.d/30-ceph-osd.conf - install -D -m 600 sudoers.d/ceph-osd-smartctl $(DESTDIR)/etc/sudoers.d/ceph-osd-smartctl + install -D -m 440 sudoers.d/ceph-osd-smartctl $(DESTDIR)/etc/sudoers.d/ceph-osd-smartctl install -m 755 src/cephadm/cephadm $(DESTDIR)/usr/sbin/cephadm diff --git a/ceph/do_cmake.sh b/ceph/do_cmake.sh index cc68e560a..a6a6e8b37 100755 --- a/ceph/do_cmake.sh +++ b/ceph/do_cmake.sh @@ -17,8 +17,10 @@ if [ -r /etc/os-release ]; then case "$ID" in fedora) PYBUILD="3.7" - if [ "$VERSION_ID" -ge "32" ] ; then + if [ "$VERSION_ID" -eq "32" ] ; then PYBUILD="3.8" + elif [ "$VERSION_ID" -ge "33" ] ; then + PYBUILD="3.9" fi ;; rhel|centos) diff --git a/ceph/doc/cephadm/drivegroups.rst b/ceph/doc/cephadm/drivegroups.rst index a1397af01..845898843 100644 --- a/ceph/doc/cephadm/drivegroups.rst +++ b/ceph/doc/cephadm/drivegroups.rst @@ -319,7 +319,7 @@ This can be described with two layouts. db_devices: model: MC-55-44-XZ limit: 2 (db_slots is actually to be favoured here, but it's not implemented yet) - + --- service_type: osd service_id: osd_spec_ssd placement: @@ -376,8 +376,7 @@ You can use the 'host_pattern' key in the layout to target certain nodes. Salt t rotational: 1 db_devices: rotational: 0 - - + --- service_type: osd service_id: osd_spec_six_to_ten placement: diff --git a/ceph/doc/cephadm/index.rst b/ceph/doc/cephadm/index.rst index 3156721df..730fef3a5 100644 --- a/ceph/doc/cephadm/index.rst +++ b/ceph/doc/cephadm/index.rst @@ -37,4 +37,5 @@ versions of Ceph. Client Setup DriveGroups troubleshooting - concepts \ No newline at end of file + concepts + Cephadm Feature Planning <../dev/cephadm/index> \ No newline at end of file diff --git a/ceph/doc/cephadm/monitoring.rst b/ceph/doc/cephadm/monitoring.rst index b1a415773..a694efaed 100644 --- a/ceph/doc/cephadm/monitoring.rst +++ b/ceph/doc/cephadm/monitoring.rst @@ -72,8 +72,20 @@ monitoring by following the steps below. ceph orch apply grafana 1 -Cephadm handles the prometheus, grafana, and alertmanager -configurations automatically. +Cephadm takes care of the configuration of Prometheus, Grafana, and Alertmanager +automatically. + +However, there is one exception to this rule. In a some setups, the Dashboard +user's browser might not be able to access the Grafana URL configured in Ceph +Dashboard. One such scenario is when the cluster and the accessing user are each +in a different DNS zone. + +For this case, there is an extra configuration option for Ceph Dashboard, which +can be used to configure the URL for accessing Grafana by the user's browser. +This value will never be altered by cephadm. To set this configuration option, +issue the following command:: + + $ ceph dashboard set-grafana-frontend-api-url It may take a minute or two for services to be deployed. Once completed, you should see something like this from ``ceph orch ls`` @@ -88,6 +100,37 @@ completed, you should see something like this from ``ceph orch ls`` node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present +Configuring SSL/TLS for Grafana +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``cephadm`` will deploy Grafana using the certificate defined in the ceph +key/value store. If a certificate is not specified, ``cephadm`` will generate a +self-signed certificate during deployment of the Grafana service. + +A custom certificate can be configured using the following commands. + +.. code-block:: bash + + ceph config-key set mgr/cephadm/grafana_key -i $PWD/key.pem + ceph config-key set mgr/cephadm/grafana_crt -i $PWD/certificate.pem + +The ``cephadm`` manager module needs to be restarted to be able to read updates +to these keys. + +.. code-block:: bash + + ceph orch restart mgr + +If you already deployed Grafana, you need to redeploy the service for the +configuration to be updated. + +.. code-block:: bash + + ceph orch redeploy grafana + +The ``redeploy`` command also takes care of setting the right URL for Ceph +Dashboard. + Using custom images ~~~~~~~~~~~~~~~~~~~ @@ -120,7 +163,7 @@ For example you have set the custom image for automatically. You will need to manually update the configuration (image name and tag) to be able to install updates. - + If you choose to go with the recommendations instead, you can reset the custom image you have set before. After that, the default value will be used again. Use ``ceph config rm`` to reset the configuration option @@ -135,6 +178,86 @@ For example ceph config rm mgr mgr/cephadm/container_image_prometheus +Using custom configuration files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By overriding cephadm templates, it is possible to completely customize the +configuration files for monitoring services. + +Internally, cephadm already uses `Jinja2 +`_ templates to generate the +configuration files for all monitoring components. To be able to customize the +configuration of Prometheus, Grafana or the Alertmanager it is possible to store +a Jinja2 template for each service that will be used for configuration +generation instead. This template will be evaluated every time a service of that +kind is deployed or reconfigured. That way, the custom configuration is +preserved and automatically applied on future deployments of these services. + +.. note:: + + The configuration of the custom template is also preserved when the default + configuration of cephadm changes. If the updated configuration is to be used, + the custom template needs to be migrated *manually*. + +Option names +"""""""""""" + +The following templates for files that will be generated by cephadm can be +overridden. These are the names to be used when storing with ``ceph config-key +set``: + +- ``alertmanager_alertmanager.yml`` +- ``grafana_ceph-dashboard.yml`` +- ``grafana_grafana.ini`` +- ``prometheus_prometheus.yml`` + +You can look up the file templates that are currently used by cephadm in +``src/pybind/mgr/cephadm/templates``: + +- ``services/alertmanager/alertmanager.yml.j2`` +- ``services/grafana/ceph-dashboard.yml.j2`` +- ``services/grafana/grafana.ini.j2`` +- ``services/prometheus/prometheus.yml.j2`` + +Usage +""""" + +The following command applies a single line value: + +.. code-block:: bash + + ceph config-key set mgr/cephadm/ + +To set contents of files as template use the ``-i`` argument: + +.. code-block:: bash + + ceph config-key set mgr/cephadm/ -i $PWD/ + +.. note:: + + When using files as input to ``config-key`` an absolute path to the file must + be used. + +It is required to restart the cephadm mgr module after a configuration option +has been set. Then the configuration file for the service needs to be recreated. +This is done using `redeploy`. For more details see the following example. + +Example +""""""" + +.. code-block:: bash + + # set the contents of ./prometheus.yml.j2 as template + ceph config-key set mgr/cephadm/services_prometheus_prometheus.yml \ + -i $PWD/prometheus.yml.j2 + + # restart cephadm mgr module + ceph orch restart mgr + + # redeploy the prometheus service + ceph orch redeploy prometheus + Disabling monitoring -------------------- diff --git a/ceph/doc/cephadm/operations.rst b/ceph/doc/cephadm/operations.rst index 456f48628..f852ef944 100644 --- a/ceph/doc/cephadm/operations.rst +++ b/ceph/doc/cephadm/operations.rst @@ -305,3 +305,53 @@ the cluster, create an initial ``ceph.conf`` file. For example:: Then, run bootstrap referencing this file:: cephadm bootstrap -c /root/ceph.conf ... + + +.. _cephadm-removing-hosts: + +Removing Hosts +============== + +If the node that want you to remove is running OSDs, make sure you remove the OSDs from the node. + +To remove a host from a cluster, do the following: + +For all Ceph service types, except for ``node-exporter`` and ``crash``, remove +the host from the placement specification file (for example, cluster.yml). +For example, if you are removing the host named host2, remove all occurrences of +``- host2`` from all ``placement:`` sections. + +Update: + +.. code-block:: yaml + + service_type: rgw + placement: + hosts: + - host1 + - host2 + +To: + +.. code-block:: yaml + + + service_type: rgw + placement: + hosts: + - host1 + +Remove the host from cephadm's environment: + +.. code-block:: bash + + ceph orch host rm host2 + +See also :ref:`orchestrator-cli-host-management`. + +If the host is running ``node-exporter`` and crash services, remove them by running +the following command on the host: + +.. code-block:: bash + + cephadm rm-daemon --fsid CLUSTER_ID --name SERVICE_NAME diff --git a/ceph/doc/cephfs/add-remove-mds.rst b/ceph/doc/cephfs/add-remove-mds.rst index 545779a6e..6a6bc08de 100644 --- a/ceph/doc/cephfs/add-remove-mds.rst +++ b/ceph/doc/cephfs/add-remove-mds.rst @@ -28,7 +28,7 @@ The other dimension to MDS performance is the available RAM for caching. The MDS necessarily manages a distributed and cooperative metadata cache among all clients and other active MDSs. Therefore it is essential to provide the MDS with sufficient RAM to enable faster metadata access and mutation. The default -MDS cache size (see also :doc:`/cephfs/cache-size-limits`) is 4GB. It is +MDS cache size (see also :doc:`/cephfs/cache-configuration`) is 4GB. It is recommended to provision at least 8GB of RAM for the MDS to support this cache size. diff --git a/ceph/doc/cephfs/cache-configuration.rst b/ceph/doc/cephfs/cache-configuration.rst new file mode 100644 index 000000000..480bb562d --- /dev/null +++ b/ceph/doc/cephfs/cache-configuration.rst @@ -0,0 +1,181 @@ +======================= +MDS Cache Configuration +======================= + +The Metadata Server coordinates a distributed cache among all MDS and CephFS +clients. The cache serves to improve metadata access latency and allow clients +to safely (coherently) mutate metadata state (e.g. via `chmod`). The MDS issues +**capabilities** and **directory entry leases** to indicate what state clients +may cache and what manipulations clients may perform (e.g. writing to a file). + +The MDS and clients both try to enforce a cache size. The mechanism for +specifying the MDS cache size is described below. Note that the MDS cache size +is a not a hard limit. The MDS always allows clients to lookup new metadata +which is loaded into the cache. This is an essential policy as its avoids +deadlock in client requests (some requests may rely on held capabilities before +capabilities are released). + +When the MDS cache is too large, the MDS will **recall** client state so cache +items become unpinned and eligble to be dropped. The MDS can only drop cache +state when no clients refer to the metadata to be dropped. Also described below +is how to configure the MDS recall settings for your workload's needs. This is +necessary if the internal throttles on the MDS recall can not keep up with the +client workload. + + +MDS Cache Size +-------------- + +You can limit the size of the Metadata Server (MDS) cache by a byte count. This +is done through the `mds_cache_memory_limit` configuration. For example:: + + ceph config set mds mds_cache_memory_limit 8GB + +In addition, you can specify a cache reservation by using the +`mds_cache_reservation` parameter for MDS operations. The cache reservation is +limited as a percentage of the memory and is set to 5% by default. The intent +of this parameter is to have the MDS maintain an extra reserve of memory for +its cache for new metadata operations to use. As a consequence, the MDS should +in general operate below its memory limit because it will recall old state from +clients in order to drop unused metadata in its cache. + +If the MDS cannot keep its cache under the target size, the MDS will send a +health alert to the Monitors indicating the cache is too large. This is +controlled by the `mds_health_cache_threshold` configuration which is by +default 150% of the maximum cache size. + +Because the cache limit is not a hard limit, potential bugs in the CephFS +client, MDS, or misbehaving applications might cause the MDS to exceed its +cache size. The health warnings are intended to help the operator detect this +situation and make necessary adjustments or investigate buggy clients. + +MDS Cache Trimming +------------------ + +There are two configurations for throttling the rate of cache trimming in the MDS: + +:: + + mds_cache_trim_threshold (default 64k) + + +and + +:: + + mds_cache_trim_decay_rate (default 1) + + +The intent of the throttle is to prevent the MDS from spending too much time +trimming its cache. This may limit its ability to handle client requests or +perform other upkeep. + +The trim configurations control an internal **decay counter**. Anytime metadata +is trimmed from the cache, the counter is incremented. The threshold sets the +maximum size of the counter while the decay rate indicates the exponential half +life for the counter. If the MDS is continually removing items from its cache, +it will reach a steady state of ``-ln(0.5)/rate*threshold`` items removed per +second. + +The defaults are conservative and may need changed for production MDS with +large cache sizes. + + +MDS Recall +---------- + +MDS limits its recall of client state (capabilities/leases) to prevent creating +too much work for itself handling release messages from clients. This is controlled +via the following configurations: + + +The maximum number of capabilities to recall from a single client in a given recall +event:: + + mds_recall_max_caps (default: 5000) + +The threshold and decay rate for the decay counter on a session:: + + mds_recall_max_decay_threshold (default: 16k) + +and:: + + mds_recall_max_decay_rate (default: 2.5 seconds) + +The session decay counter controls the rate of recall for an individual +session. The behavior of the counter works the same as for cache trimming +above. Each capability that is recalled increments the counter. + +There is also a global decay counter that throttles for all session recall:: + + mds_recall_global_max_decay_threshold (default: 64k) + +its decay rate is the same as ``mds_recall_max_decay_rate``. Any recalled +capability for any session also increments this counter. + +If clients are slow to release state, the warning "failing to respond to cache +pressure" or ``MDS_HEALTH_CLIENT_RECALL`` will be reported. Each session's rate +of release is monitored by another decay counter configured by:: + + mds_recall_warning_threshold (default: 32k) + +and:: + + mds_recall_warning_decay_rate (default: 60.0 seconds) + +Each time a capability is released, the counter is incremented. If clients do +not release capabilities quickly enough and there is cache pressure, the +counter will indicate if the client is slow to release state. + +Some workloads and client behaviors may require faster recall of client state +to keep up with capability acquisition. It is recommended to increase the above +counters as needed to resolve any slow recall warnings in the cluster health +state. + + +Session Liveness +---------------- + +The MDS also keeps track of whether sessions are quiescent. If a client session +is not utilizing its capabilities or is otherwise quiet, the MDS will begin +recalling state from the session even if its not under cache pressure. This +helps the MDS avoid future work when the cluster workload is hot and cache +pressure is forcing the MDS to recall state. The expectation is that a client +not utilizing its capabilities is unlikely to use those capabilities anytime +in the near future. + +Determining whether a given session is quiescent is controlled by the following +configuration variables:: + + mds_session_cache_liveness_magnitude (default: 10) + +and:: + + mds_session_cache_liveness_decay_rate (default: 5min) + +The configuration ``mds_session_cache_liveness_decay_rate`` indicates the +half-life for the decay counter tracking the use of capabilities by the client. +Each time a client manipulates or acquires a capability, the MDS will increment +the counter. This is a rough but effective way to monitor utilization of the +client cache. + +The ``mds_session_cache_liveness_magnitude`` is a base-2 magnitude difference +of the liveness decay counter and the number of capabilities outstanding for +the session. So if the client has ``1*2^20`` (1M) capabilities outstanding and +only uses **less** than ``1*2^(20-mds_session_cache_liveness_magnitude)`` (1K +using defaults), the MDS will consider the client to be quiescent and begin +recall. + + +Capability Limit +---------------- + +The MDS also tries to prevent a single client from acquiring too many +capabilities. This helps prevent recovery from taking a long time in some +situations. It is not generally necessary for a client to have such a large +cache. The limit is configured via:: + + mds_max_caps_per_client (default: 1M) + +It is not recommended to set this value above 5M but it may be helpful with +some workloads. diff --git a/ceph/doc/cephfs/cache-size-limits.rst b/ceph/doc/cephfs/cache-size-limits.rst deleted file mode 100644 index 1f6f5d93b..000000000 --- a/ceph/doc/cephfs/cache-size-limits.rst +++ /dev/null @@ -1,14 +0,0 @@ -Understanding MDS Cache Size Limits -=================================== - -This section describes ways to limit MDS cache size. - -You can limit the size of the Metadata Server (MDS) cache by: - -* *A memory limit*: A new behavior introduced in the Luminous release. Use the `mds_cache_memory_limit` parameters. - -In addition, you can specify a cache reservation by using the `mds_cache_reservation` parameter for MDS operations. The cache reservation is limited as a percentage of the memory and is set to 5% by default. The intent of this parameter is to have the MDS maintain an extra reserve of memory for its cache for new metadata operations to use. As a consequence, the MDS should in general operate below its memory limit because it will recall old state from clients in order to drop unused metadata in its cache. - -The `mds_cache_reservation` parameter replaces the `mds_health_cache_threshold` in all situations except when MDS nodes sends a health alert to the Monitors indicating the cache is too large. By default, `mds_health_cache_threshold` is 150% of the maximum cache size. - -Be aware that the cache limit is not a hard limit. Potential bugs in the CephFS client or MDS or misbehaving applications might cause the MDS to exceed its cache size. The `mds_health_cache_threshold` configures the cluster health warning message so that operators can investigate why the MDS cannot shrink its cache. diff --git a/ceph/doc/cephfs/fs-volumes.rst b/ceph/doc/cephfs/fs-volumes.rst index 807340334..dd38e38e4 100644 --- a/ceph/doc/cephfs/fs-volumes.rst +++ b/ceph/doc/cephfs/fs-volumes.rst @@ -85,7 +85,7 @@ FS Subvolume groups Create a subvolume group using:: - $ ceph fs subvolumegroup create [--pool_layout --uid --gid --mode ] + $ ceph fs subvolumegroup create [--pool_layout ] [--uid ] [--gid ] [--mode ] The command succeeds even if the subvolume group already exists. @@ -111,12 +111,8 @@ List subvolume groups using:: $ ceph fs subvolumegroup ls -Create a snapshot (see :doc:`/cephfs/experimental-features`) of a -subvolume group using:: - - $ ceph fs subvolumegroup snapshot create - -This implicitly snapshots all the subvolumes under the subvolume group. +.. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group + snapshots can still be listed and deleted) Remove a snapshot of a subvolume group using:: @@ -135,7 +131,7 @@ FS Subvolumes Create a subvolume using:: - $ ceph fs subvolume create [--size --group_name --pool_layout --uid --gid --mode --namespace-isolated] + $ ceph fs subvolume create [--size ] [--group_name ] [--pool_layout ] [--uid ] [--gid ] [--mode ] [--namespace-isolated] The command succeeds even if the subvolume already exists. @@ -150,16 +146,24 @@ its parent directory and no size limit. Remove a subvolume using:: - $ ceph fs subvolume rm [--group_name --force] + $ ceph fs subvolume rm [--group_name ] [--force] [--retain-snapshots] The command removes the subvolume and its contents. It does this in two steps. -First, it move the subvolume to a trash folder, and then asynchronously purges +First, it moves the subvolume to a trash folder, and then asynchronously purges its contents. The removal of a subvolume fails if it has snapshots, or is non-existent. '--force' flag allows the non-existent subvolume remove command to succeed. +A subvolume can be removed retaining existing snapshots of the subvolume using the +'--retain-snapshots' option. If snapshots are retained, the subvolume is considered +empty for all operations not involving the retained snapshots. + +.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create' + +.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume. + Resize a subvolume using:: $ ceph fs subvolume resize [--group_name ] [--no_shrink] @@ -195,17 +199,32 @@ The output format is json and contains fields as follows. * type: subvolume type indicating whether it's clone or subvolume * pool_namespace: RADOS namespace of the subvolume * features: features supported by the subvolume +* state: current state of the subvolume + +If a subvolume has been removed retaining its snapshots, the output only contains fields as follows. + +* type: subvolume type indicating whether it's clone or subvolume +* features: features supported by the subvolume +* state: current state of the subvolume The subvolume "features" are based on the internal version of the subvolume and is a list containing a subset of the following features, * "snapshot-clone": supports cloning using a subvolumes snapshot as the source * "snapshot-autoprotect": supports automatically protecting snapshots, that are active clone sources, from deletion +* "snapshot-retention": supports removing subvolume contents, retaining any existing snapshots + +The subvolume "state" is based on the current state of the subvolume and contains one of the following values. + +* "complete": subvolume is ready for all operations +* "snapshot-retained": subvolume is removed but its snapshots are retained List subvolumes using:: $ ceph fs subvolume ls [--group_name ] +.. note:: subvolumes that are removed but have snapshots retained, are also listed. + Create a snapshot of a subvolume using:: $ ceph fs subvolume snapshot create [--group_name ] @@ -213,11 +232,13 @@ Create a snapshot of a subvolume using:: Remove a snapshot of a subvolume using:: - $ ceph fs subvolume snapshot rm [--group_name --force] + $ ceph fs subvolume snapshot rm [--group_name ] [--force] Using the '--force' flag allows the command to succeed that would otherwise fail if the snapshot did not exist. +.. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed + List snapshots of a subvolume using:: $ ceph fs subvolume snapshot ls [--group_name ] diff --git a/ceph/doc/cephfs/index.rst b/ceph/doc/cephfs/index.rst index 58839a3f1..629c961bc 100644 --- a/ceph/doc/cephfs/index.rst +++ b/ceph/doc/cephfs/index.rst @@ -79,7 +79,7 @@ Administration Administrative commands Provision/Add/Remove MDS(s) MDS failover and standby configuration - MDS Cache Size Limits + MDS Cache Configuration MDS Configuration Settings Manual: ceph-mds <../../man/8/ceph-mds> Export over NFS diff --git a/ceph/doc/dev/cephadm/compliance-check.rst b/ceph/doc/dev/cephadm/compliance-check.rst new file mode 100644 index 000000000..eea462445 --- /dev/null +++ b/ceph/doc/dev/cephadm/compliance-check.rst @@ -0,0 +1,121 @@ +================ +Compliance Check +================ + +The stability and reliability of a Ceph cluster is dependent not just upon the Ceph daemons, but +also the OS and hardware that Ceph is installed on. This document is intended to promote a design +discussion for providing a "compliance" feature within mgr/cephadm, which would be responsible for +identifying common platform-related issues that could impact Ceph stability and operation. + +The ultimate goal of these checks is to identify issues early and raise a healthcheck WARN +event, to alert the Administrator to the issue. + +Prerequisites +============= +In order to effectively analyse the hosts that Ceph is deployed to, this feature requires a cache +of host-related metadata. The metadata is already available from cephadm's HostFacts class and the +``gather-facts`` cephadm command. For the purposes of this document, we will assume that this +data is available within the mgr/cephadm "cache" structure. + +Some checks will require that the host status is also populated e.g. ONLINE, OFFLINE, MAINTENANCE + +Administrator Interaction +========================= +Not all users will require this feature, and must be able to 'opt out'. For this reason, +mgr/cephadm must provide controls, such as the following; + +.. code-block:: + + ceph cephadm compliance enable | disable | status [--format json] + ceph cephadm compliance ls [--format json] + ceph cephadm compliance enable-check + ceph cephadm compliance disable-check + ceph cephadm compliance set-check-interval + ceph cephadm compliance get-check-interval + +The status option would show the enabled/disabled state of the feature, along with the +check-interval. + +The ``ls`` subcommand would show all checks in the following format; + +``check-name status description`` + +Proposed Integration +==================== +The compliance checks are not required to run all the time, but instead should run at discrete +intervals. The interval would be configurable under via the :code:`set-check-interval` +subcommand (default would be every 12 hours) + + +mgr/cephadm currently executes an event driven (time based) serve loop to act on deploy/remove and +reconcile activity. In order to execute the compliance checks, the compliance check code would be +called from this main serve loop - when the :code:`set-check-interval` is met. + + +Proposed Checks +=============== +All checks would push any errors to a list, so multiple issues can be escalated to the Admin at +the same time. The list below provides a description of each check, with the text following the +name indicating a shortname version *(the shortname is the reference for command Interaction +when enabling or disabling a check)* + + +OS Consistency (OS) +___________________ +* all hosts must use same vendor +* all hosts must be on the same major release (this check would only be applicable to distributions that + offer a long-term-support strategy (RHEL, CentOS, SLES, Ubuntu etc) + + +*src: gather-facts output* + +Linux Kernel Security Mode (LSM) +________________________________ +* All hosts should have a consistent SELINUX/AppArmor configuration + +*src: gather-facts output* + +Services Check (SERVICES) +_________________________ +Hosts that are in an ONLINE state should adhere to the following; + +* all daemons (systemd units) should be enabled +* all daemons should be running (not dead) + +*src: list_daemons output* + +Support Status (SUPPORT) +________________________ +If support status has been detected, it should be consistent across all hosts. At this point +support status is available only for Red Hat machines. + +*src: gather-facts output* + +Network : MTU (MTU) +________________________________ +All network interfaces on the same Ceph network (public/cluster) should have the same MTU + +*src: gather-facts output* + +Network : LinkSpeed (LINKSPEED) +____________________________________________ +All network interfaces on the same Ceph network (public/cluster) should have the same Linkspeed + +*src: gather-facts output* + +Network : Consistency (INTERFACE) +______________________________________________ +All hosts with OSDs should have consistent network configuration - eg. if some hosts do +not separate cluster/public traffic but others do, that is an anomaly that would generate a +compliance check warning. + +*src: gather-facts output* + +Notification Strategy +===================== +If any of the checks fail, mgr/cephadm would raise a WARN level alert + +Futures +======= +The checks highlighted here serve only as a starting point, and we should expect to expand +on the checks over time. diff --git a/ceph/doc/dev/cephadm/host-maintenance.rst b/ceph/doc/dev/cephadm/host-maintenance.rst new file mode 100644 index 000000000..af48dee01 --- /dev/null +++ b/ceph/doc/dev/cephadm/host-maintenance.rst @@ -0,0 +1,104 @@ +================ +Host Maintenance +================ + +All hosts that support Ceph daemons need to support maintenance activity, whether the host +is physical or virtual. This means that management workflows should provide +a simple and consistent way to support this operational requirement. This document defines +the maintenance strategy that could be implemented in cephadm and mgr/cephadm. + + +High Level Design +================= +Placing a host into maintenance, adopts the following workflow; + +#. confirm that the removal of the host does not impact data availability (the following + steps will assume it is safe to proceed) + + * orch host ok-to-stop would be used here + +#. if the host has osd daemons, apply noout to the host subtree to prevent data migration + from triggering during the planned maintenance slot. +#. Stop the ceph target (all daemons stop) +#. Disable the ceph target on that host, to prevent a reboot from automatically starting + ceph services again) + + +Exiting Maintenance, is basically the reverse of the above sequence + +Admin Interaction +================= +The ceph orch command will be extended to support maintenance. + +.. code-block:: + + ceph orch host enter-maintenance [ --check ] + ceph orch host exit-maintenance + +.. note:: In addition, the host's status should be updated to reflect whether it + is in maintenance or not. + +The 'check' Option +__________________ +The orch host ok-to-stop command focuses on ceph daemons (mon, osd, mds), which +provides the first check. However, a ceph cluster also uses other types of daemons +for monitoring, management and non-native protocol support which means the +logic will need to consider service impact too. The 'check' option provides +this additional layer to alert the user of service impact to *secondary* +daemons. + +The list below shows some of these additional daemons. + +* mgr (not included in ok-to-stop checks) +* prometheus, grafana, alertmanager +* rgw +* haproxy +* iscsi gateways +* ganesha gateways + +By using the --check option first, the Admin can choose whether to proceed. This +workflow is obviously optional for the CLI user, but could be integrated into the +UI workflow to help less experienced Administators manage the cluster. + +By adopting this two-phase approach, a UI based workflow would look something +like this. + +#. User selects a host to place into maintenance + + * orchestrator checks for data **and** service impact +#. If potential impact is shown, the next steps depend on the impact type + + * **data availability** : maintenance is denied, informing the user of the issue + * **service availability** : user is provided a list of affected services and + asked to confirm + + +Components Impacted +=================== +Implementing this capability will require changes to the following; + +* cephadm + + * Add maintenance subcommand with the following 'verbs'; enter, exit, check + +* mgr/cephadm + + * add methods to CephadmOrchestrator for enter/exit and check + * data gathering would be skipped for hosts in a maintenance state + +* mgr/orchestrator + + * add CLI commands to OrchestratorCli which expose the enter/exit and check interaction + + +Ideas for Future Work +===================== +#. When a host is placed into maintenance, the time of the event could be persisted. This + would allow the orchestrator layer to establish a maintenance window for the task and + alert if the maintenance window has been exceeded. +#. The maintenance process could support plugins to allow other integration tasks to be + initiated as part of the transition to and from maintenance. This plugin capability could + support actions like; + + * alert suppression to 3rd party monitoring framework(s) + * service level reporting, to record outage windows diff --git a/ceph/doc/dev/cephadm/index.rst b/ceph/doc/dev/cephadm/index.rst new file mode 100644 index 000000000..5f281a2dd --- /dev/null +++ b/ceph/doc/dev/cephadm/index.rst @@ -0,0 +1,13 @@ +=================================== +CEPHADM Developer Documentation +=================================== + +.. rubric:: Contents + +.. toctree:: + :maxdepth: 1 + + + host-maintenance + compliance-check + diff --git a/ceph/doc/man/8/rbd.rst b/ceph/doc/man/8/rbd.rst index cc920a1af..407bf36b7 100644 --- a/ceph/doc/man/8/rbd.rst +++ b/ceph/doc/man/8/rbd.rst @@ -732,24 +732,32 @@ Per client instance `rbd device map` options: * noshare - Disable sharing of client instances with other mappings. -* crc - Enable CRC32C checksumming for data writes (default). +* crc - Enable CRC32C checksumming for msgr1 on-the-wire protocol (default). + For msgr2.1 protocol this option is ignored: full checksumming is always on + in 'crc' mode and always off in 'secure' mode. -* nocrc - Disable CRC32C checksumming for data writes. +* nocrc - Disable CRC32C checksumming for msgr1 on-the-wire protocol. Note + that only payload checksumming is disabled, header checksumming is always on. + For msgr2.1 protocol this option is ignored. -* cephx_require_signatures - Require cephx message signing (since 3.19, - default). +* cephx_require_signatures - Require msgr1 message signing feature (since 3.19, + default). This option is deprecated and will be removed in the future as the + feature has been supported since the Bobtail release. -* nocephx_require_signatures - Don't require cephx message signing (since - 3.19). +* nocephx_require_signatures - Don't require msgr1 message signing feature + (since 3.19). This option is deprecated and will be removed in the future. * tcp_nodelay - Disable Nagle's algorithm on client sockets (since 4.0, default). * notcp_nodelay - Enable Nagle's algorithm on client sockets (since 4.0). -* cephx_sign_messages - Enable message signing (since 4.4, default). +* cephx_sign_messages - Enable message signing for msgr1 on-the-wire protocol + (since 4.4, default). For msgr2.1 protocol this option is ignored: message + signing is built into 'secure' mode and not offered in 'crc' mode. -* nocephx_sign_messages - Disable message signing (since 4.4). +* nocephx_sign_messages - Disable message signing for msgr1 on-the-wire protocol + (since 4.4). For msgr2.1 protocol this option is ignored. * mount_timeout=x - A timeout on various steps in `rbd device map` and `rbd device unmap` sequences (default is 60 seconds). In particular, @@ -844,6 +852,25 @@ Per mapping (block device) `rbd device map` options: backend that the data is incompressible, disabling compression in aggressive mode (since 5.8). +* ms_mode=legacy - Use msgr1 on-the-wire protocol (since 5.11, default). + +* ms_mode=crc - Use msgr2.1 on-the-wire protocol, select 'crc' mode, also + referred to as plain mode (since 5.11). If the daemon denies 'crc' mode, + fail the connection. + +* ms_mode=secure - Use msgr2.1 on-the-wire protocol, select 'secure' mode + (since 5.11). 'secure' mode provides full in-transit encryption ensuring + both confidentiality and authenticity. If the daemon denies 'secure' mode, + fail the connection. + +* ms_mode=prefer-crc - Use msgr2.1 on-the-wire protocol, select 'crc' + mode (since 5.11). If the daemon denies 'crc' mode in favor of 'secure' + mode, agree to 'secure' mode. + +* ms_mode=prefer-secure - Use msgr2.1 on-the-wire protocol, select 'secure' + mode (since 5.11). If the daemon denies 'secure' mode in favor of 'crc' + mode, agree to 'crc' mode. + * udev - Wait for udev device manager to finish executing all matching "add" rules and release the device before exiting (default). This option is not passed to the kernel. diff --git a/ceph/doc/mgr/dashboard.rst b/ceph/doc/mgr/dashboard.rst index 7d815d8b9..cdedfd8e8 100644 --- a/ceph/doc/mgr/dashboard.rst +++ b/ceph/doc/mgr/dashboard.rst @@ -270,6 +270,34 @@ commands:: $ ceph dashboard ac-user-create administrator +Account Lock-out +^^^^^^^^^^^^^^^^ + +It disables a user account if a user repeatedly enters the wrong credentials +for multiple times. It is enabled by default to prevent brute-force or dictionary +attacks. The user can get or set the default number of lock-out attempts using +these commands respectively:: + + $ ceph dashboard get-account-lockout-attempts + $ ceph dashboard set-account-lockout-attempts + +.. warning:: + + This feature can be disabled by setting the default number of lock-out attempts to 0. + However, by disabling this feature, the account is more vulnerable to brute-force or + dictionary based attacks. This can be disabled by:: + + $ ceph dashboard set-account-lockout-attempts 0 + +Enable a Locked User +^^^^^^^^^^^^^^^^^^^^ + +If a user account is disabled as a result of multiple invalid login attempts, then +it needs to be manually enabled by the administrator. This can be done by the following +command:: + + $ ceph dashboard ac-user-enable + Accessing the Dashboard ^^^^^^^^^^^^^^^^^^^^^^^ @@ -479,7 +507,8 @@ will not be visible in Prometheus. After you have set up Grafana and Prometheus, you will need to configure the connection information that the Ceph Dashboard will use to access Grafana. -You need to tell the dashboard on which url Grafana instance is running/deployed:: +You need to tell the dashboard on which URL the Grafana instance is +running/deployed:: $ ceph dashboard set-grafana-api-url # default: '' @@ -503,6 +532,38 @@ e.g. caused by certificates signed by unknown CA or not matching the host name:: You can directly access Grafana Instance as well to monitor your cluster. +Alternative URL for Browsers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Ceph Dashboard backend requires the Grafana URL to be able to verify the +existence of Grafana Dashboards before the frontend even loads them. Due to the +nature of how Grafana is implemented in Ceph Dashboard, this means that two +working connections are required in order to be able to see Grafana graphs in +Ceph Dashboard: + +- The backend (Ceph Mgr module) needs to verify the existence of the requested + graph. If this request succeeds, it lets the frontend know that it can safely + access Grafana. +- The frontend then requests the Grafana graphs directly from the user's + browser using an iframe. The Grafana instance is accessed directly without any + detour through Ceph Dashboard. + +Now, it might be the case that your environment makes it difficult for the +user's browser to directly access the URL configured in Ceph Dashboard. To solve +this issue, a separate URL can be configured which will solely be used to tell +the frontend (the user's browser) which URL it should use to access Grafana. +This setting won't ever be changed automatically, unlike the GRAFANA_API_URL +which is set by :ref:`cephadm` (only if cephadm is used to deploy monitoring +services). + +To change the URL that is returned to the frontend issue the following command:: + + $ ceph dashboard set-grafana-frontend-api-url + +If no value is set for that option, it will simply fall back to the value of the +GRAFANA_API_URL option. If set, it will instruct the browser to use this URL to +access Grafana. + .. _dashboard-sso-support: Enabling Single Sign-On (SSO) diff --git a/ceph/doc/mgr/orchestrator.rst b/ceph/doc/mgr/orchestrator.rst index fba1b5ce0..4cf695992 100644 --- a/ceph/doc/mgr/orchestrator.rst +++ b/ceph/doc/mgr/orchestrator.rst @@ -58,6 +58,8 @@ Status Show current orchestrator mode and high-level status (whether the orchestrator plugin is available and operational) +.. _orchestrator-cli-host-management: + Host Management =============== @@ -70,7 +72,7 @@ Add and remove hosts:: ceph orch host add [] [...] ceph orch host rm -For cephadm, see also :ref:`cephadm-fqdn`. +For cephadm, see also :ref:`cephadm-fqdn` and :ref:`cephadm-removing-hosts`. Host Specification ------------------ diff --git a/ceph/doc/rados/configuration/osd-config-ref.rst b/ceph/doc/rados/configuration/osd-config-ref.rst index 344599fe7..f06ad6bcd 100644 --- a/ceph/doc/rados/configuration/osd-config-ref.rst +++ b/ceph/doc/rados/configuration/osd-config-ref.rst @@ -1127,7 +1127,7 @@ Miscellaneous when osd data is on HDD and osd journal is on SSD. :Type: Float -:Default: ``2`` +:Default: ``1`` ``osd command max records`` diff --git a/ceph/doc/rados/operations/erasure-code-clay.rst b/ceph/doc/rados/operations/erasure-code-clay.rst index ccf3b309c..3c0931423 100644 --- a/ceph/doc/rados/operations/erasure-code-clay.rst +++ b/ceph/doc/rados/operations/erasure-code-clay.rst @@ -88,7 +88,7 @@ Where: :Description: Number of OSDs requested to send data during recovery of a single chunk. *d* needs to be chosen such that - k+1 <= d <= k+m-1. Larger the *d*, the better the savings. + k+1 <= d <= k+m-1. The larger the *d*, the better the savings. :Type: Integer :Required: No. diff --git a/ceph/doc/rados/operations/health-checks.rst b/ceph/doc/rados/operations/health-checks.rst index 4b3d5a7a2..32169dab3 100644 --- a/ceph/doc/rados/operations/health-checks.rst +++ b/ceph/doc/rados/operations/health-checks.rst @@ -1175,3 +1175,16 @@ This warning can silenced by setting the ``mon_warn_on_osd_down_out_interval_zero`` to false:: ceph config global mon mon_warn_on_osd_down_out_interval_zero false + +DASHBOARD_DEBUG +_______________ + +The Dashboard debug mode is enabled. This means, if there is an error +while processing a REST API request, the HTTP error response contains +a Python traceback. This behaviour should be disabled in production +environments because such a traceback might contain and expose sensible +information. + +The debug mode can be disabled with:: + + ceph dashboard debug disable diff --git a/ceph/install-deps.sh b/ceph/install-deps.sh index fb377af75..f56ffd6ff 100755 --- a/ceph/install-deps.sh +++ b/ceph/install-deps.sh @@ -365,7 +365,8 @@ else --enable rhel-7-server-devtools-rpms dts_ver=8 elif test $ID = centos -a $MAJOR_VERSION = 8 ; then - $SUDO dnf config-manager --set-enabled PowerTools + # Enable 'powertools' or 'PowerTools' repo + $SUDO dnf config-manager --set-enabled $(dnf repolist --all 2>/dev/null|gawk 'tolower($0) ~ /^powertools\s/{print $1}') # before EPEL8 and PowerTools provide all dependencies, we use sepia for the dependencies $SUDO dnf config-manager --add-repo http://apt-mirror.front.sepia.ceph.com/lab-extras/8/ $SUDO dnf config-manager --setopt=apt-mirror.front.sepia.ceph.com_lab-extras_8_.gpgcheck=0 --save diff --git a/ceph/monitoring/grafana/dashboards/osd-device-details.json b/ceph/monitoring/grafana/dashboards/osd-device-details.json index 8b819e902..deb6ed7d2 100644 --- a/ceph/monitoring/grafana/dashboards/osd-device-details.json +++ b/ceph/monitoring/grafana/dashboards/osd-device-details.json @@ -126,7 +126,7 @@ "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { diff --git a/ceph/monitoring/prometheus/alerts/ceph_default_alerts.yml b/ceph/monitoring/prometheus/alerts/ceph_default_alerts.yml index 51d19bfca..b07ea80ea 100644 --- a/ceph/monitoring/prometheus/alerts/ceph_default_alerts.yml +++ b/ceph/monitoring/prometheus/alerts/ceph_default_alerts.yml @@ -230,8 +230,8 @@ groups: - alert: pool filling up expr: | ( - predict_linear(ceph_pool_stored[2d], 3600 * 24 * 5) >= - ceph_pool_max_avail + predict_linear(ceph_pool_stored[2d], 3600 * 24 * 5) + >= ceph_pool_stored + ceph_pool_max_avail ) * on(pool_id) group_left(name) ceph_pool_metadata labels: severity: warning @@ -241,3 +241,15 @@ groups: description: > Pool {{ $labels.name }} will be full in less than 5 days assuming the average fill-up rate of the past 48 hours. + + - name: healthchecks + rules: + - alert: Slow OSD Ops + expr: ceph_healthcheck_slow_ops > 0 + for: 30s + labels: + severity: warning + type: ceph_default + annotations: + description: > + {{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded) diff --git a/ceph/qa/distros/all/ubuntu_18.04_podman.yaml b/ceph/qa/distros/all/ubuntu_18.04_podman.yaml index 3d3d99642..214e964ac 100644 --- a/ceph/qa/distros/all/ubuntu_18.04_podman.yaml +++ b/ceph/qa/distros/all/ubuntu_18.04_podman.yaml @@ -9,4 +9,4 @@ tasks: - echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /" | sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list - sudo apt update - sudo apt -y install podman - - echo -e "[registries.search]\nregistries = ['docker.io']" | sudo tee /etc/containers/registries.conf + - echo -e "[[registry]]\nlocation = 'docker.io'\n\n[[registry.mirror]]\nlocation='docker-mirror.front.sepia.ceph.com:5000'\n" | sudo tee /etc/containers/registries.conf diff --git a/ceph/qa/standalone/mon/osd-erasure-code-profile.sh b/ceph/qa/standalone/mon/osd-erasure-code-profile.sh index 364b4c169..0afc5fc0b 100755 --- a/ceph/qa/standalone/mon/osd-erasure-code-profile.sh +++ b/ceph/qa/standalone/mon/osd-erasure-code-profile.sh @@ -222,6 +222,17 @@ function TEST_profile_k_sanity() { m=1 || return 1 } +function TEST_invalid_crush_failure_domain() { + local dir=$1 + + run_mon $dir a || return 1 + + local profile=ec_profile + local crush_failure_domain=invalid_failure_domain + + ! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1 +} + main osd-erasure-code-profile "$@" # Local Variables: diff --git a/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml b/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml index e94728f9e..1315980ed 100644 --- a/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml +++ b/ceph/qa/suites/fs/basic_functional/tasks/volumes.yaml @@ -15,5 +15,6 @@ overrides: tasks: - cephfs_test_runner: + fail_on_skip: false modules: - tasks.cephfs.test_volumes diff --git a/ceph/qa/suites/krbd/basic/ms_mode/.qa b/ceph/qa/suites/krbd/basic/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/krbd/basic/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/krbd/basic/ms_mode/crc.yaml b/ceph/qa/suites/krbd/basic/ms_mode/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/ceph/qa/suites/krbd/basic/ms_mode/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml b/ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/ceph/qa/suites/krbd/basic/ms_mode/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/ceph/qa/suites/krbd/basic/ms_mode/secure.yaml b/ceph/qa/suites/krbd/basic/ms_mode/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/ceph/qa/suites/krbd/basic/ms_mode/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/ceph/qa/suites/krbd/fsx/conf.yaml b/ceph/qa/suites/krbd/fsx/conf.yaml index d4863aa51..30da870b2 100644 --- a/ceph/qa/suites/krbd/fsx/conf.yaml +++ b/ceph/qa/suites/krbd/fsx/conf.yaml @@ -3,5 +3,3 @@ overrides: conf: global: ms die on skipped message: false - client: - rbd default map options: read_from_replica=balance diff --git a/ceph/qa/suites/krbd/fsx/ms_mode$/.qa b/ceph/qa/suites/krbd/fsx/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/krbd/fsx/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml b/ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml new file mode 100644 index 000000000..d11be3887 --- /dev/null +++ b/ceph/qa/suites/krbd/fsx/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,read_from_replica=balance diff --git a/ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml b/ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml new file mode 100644 index 000000000..2b7116c03 --- /dev/null +++ b/ceph/qa/suites/krbd/fsx/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,read_from_replica=balance diff --git a/ceph/qa/suites/krbd/fsx/ms_mode$/prefer-crc.yaml b/ceph/qa/suites/krbd/fsx/ms_mode$/prefer-crc.yaml new file mode 100644 index 000000000..a346c7548 --- /dev/null +++ b/ceph/qa/suites/krbd/fsx/ms_mode$/prefer-crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=prefer-crc,read_from_replica=balance diff --git a/ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml b/ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml new file mode 100644 index 000000000..671b73f9c --- /dev/null +++ b/ceph/qa/suites/krbd/fsx/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure,read_from_replica=balance diff --git a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/crc.yaml b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/legacy.yaml b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/ceph/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/ceph/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/ceph/qa/suites/krbd/rbd/ms_mode/.qa b/ceph/qa/suites/krbd/rbd/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/krbd/rbd/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml b/ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/ceph/qa/suites/krbd/rbd/ms_mode/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml b/ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/ceph/qa/suites/krbd/rbd/ms_mode/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml b/ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/ceph/qa/suites/krbd/rbd/ms_mode/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/ceph/qa/suites/krbd/singleton/ms_mode$/.qa b/ceph/qa/suites/krbd/singleton/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/krbd/singleton/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/krbd/singleton/ms_mode$/crc.yaml b/ceph/qa/suites/krbd/singleton/ms_mode$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/ceph/qa/suites/krbd/singleton/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/ceph/qa/suites/krbd/singleton/ms_mode$/legacy.yaml b/ceph/qa/suites/krbd/singleton/ms_mode$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/ceph/qa/suites/krbd/singleton/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/ceph/qa/suites/krbd/singleton/ms_mode$/prefer-crc.yaml b/ceph/qa/suites/krbd/singleton/ms_mode$/prefer-crc.yaml new file mode 100644 index 000000000..1054473af --- /dev/null +++ b/ceph/qa/suites/krbd/singleton/ms_mode$/prefer-crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=prefer-crc diff --git a/ceph/qa/suites/krbd/singleton/ms_mode$/secure.yaml b/ceph/qa/suites/krbd/singleton/ms_mode$/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/ceph/qa/suites/krbd/singleton/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/ceph/qa/suites/krbd/thrash/ms_mode$/.qa b/ceph/qa/suites/krbd/thrash/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/ceph/qa/suites/krbd/thrash/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml b/ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/ceph/qa/suites/krbd/thrash/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/ceph/qa/suites/krbd/thrash/ms_mode$/legacy.yaml b/ceph/qa/suites/krbd/thrash/ms_mode$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/ceph/qa/suites/krbd/thrash/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/ceph/qa/suites/krbd/thrash/ms_mode$/prefer-crc.yaml b/ceph/qa/suites/krbd/thrash/ms_mode$/prefer-crc.yaml new file mode 100644 index 000000000..1054473af --- /dev/null +++ b/ceph/qa/suites/krbd/thrash/ms_mode$/prefer-crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=prefer-crc diff --git a/ceph/qa/suites/krbd/thrash/ms_mode$/secure.yaml b/ceph/qa/suites/krbd/thrash/ms_mode$/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/ceph/qa/suites/krbd/thrash/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_pages_required.yaml b/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_pages_required.yaml deleted file mode 100644 index 3d23227a0..000000000 --- a/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_pages_required.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tasks: -- workunit: - clients: - all: - - rbd/krbd_stable_pages_required.sh diff --git a/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml b/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml new file mode 100644 index 000000000..cd1ba930f --- /dev/null +++ b/ceph/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_stable_writes.sh diff --git a/ceph/qa/suites/rados/cephadm/smoke/start.yaml b/ceph/qa/suites/rados/cephadm/smoke/start.yaml index b3eb77e80..f45b922bc 100644 --- a/ceph/qa/suites/rados/cephadm/smoke/start.yaml +++ b/ceph/qa/suites/rados/cephadm/smoke/start.yaml @@ -11,3 +11,4 @@ tasks: - ceph orch ls - ceph orch host ls - ceph orch device ls + - ceph orch ls --format yaml diff --git a/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml b/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml index f210fc1c8..27f466ebd 100644 --- a/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml +++ b/ceph/qa/suites/rados/dashboard/tasks/dashboard.yaml @@ -22,6 +22,7 @@ tasks: - \(OSD_HOST_DOWN\) - \(POOL_APP_NOT_ENABLED\) - \(OSDMAP_FLAGS\) + - \(OSD_FLAGS\) - pauserd,pausewr flag\(s\) set - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running - evicting unresponsive client .+ diff --git a/ceph/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml b/ceph/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml new file mode 100644 index 000000000..042c3d78e --- /dev/null +++ b/ceph/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml @@ -0,0 +1,44 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +- - mon.b + - mon.c + - osd.4 + - osd.5 + - osd.6 + - osd.7 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request +- exec: + client.0: + - ceph progress off + +- workunit: + clients: + all: + - mon/pg_autoscaler.sh diff --git a/ceph/qa/suites/rgw/multisite/overrides.yaml b/ceph/qa/suites/rgw/multisite/overrides.yaml index 54e3db24f..c9019e154 100644 --- a/ceph/qa/suites/rgw/multisite/overrides.yaml +++ b/ceph/qa/suites/rgw/multisite/overrides.yaml @@ -13,5 +13,6 @@ overrides: rgw curl low speed time: 300 rgw md log max shards: 4 rgw data log num shards: 4 + rgw sync obj etag verify: true rgw: compression type: random diff --git a/ceph/qa/suites/upgrade/nautilus-x-singleton/1-install/nautilus.yaml b/ceph/qa/suites/upgrade/nautilus-x-singleton/1-install/nautilus.yaml index 08133fe34..e869063ca 100644 --- a/ceph/qa/suites/upgrade/nautilus-x-singleton/1-install/nautilus.yaml +++ b/ceph/qa/suites/upgrade/nautilus-x-singleton/1-install/nautilus.yaml @@ -4,6 +4,7 @@ overrides: - \(MON_DOWN\) - \(MGR_DOWN\) - slow request + - evicting unresponsive client meta: - desc: install ceph/nautilus latest tasks: diff --git a/ceph/qa/suites/upgrade/nautilus-x/parallel/1-ceph-install/nautilus.yaml b/ceph/qa/suites/upgrade/nautilus-x/parallel/1-ceph-install/nautilus.yaml index 4ee426870..2bbbfa9d5 100644 --- a/ceph/qa/suites/upgrade/nautilus-x/parallel/1-ceph-install/nautilus.yaml +++ b/ceph/qa/suites/upgrade/nautilus-x/parallel/1-ceph-install/nautilus.yaml @@ -26,6 +26,7 @@ tasks: - Monitor daemon marked osd - Behind on trimming - Manager daemon + - evicting unresponsive client conf: global: mon warn on pool no app: false diff --git a/ceph/qa/suites/upgrade/nautilus-x/stress-split/1-ceph-install/nautilus.yaml b/ceph/qa/suites/upgrade/nautilus-x/stress-split/1-ceph-install/nautilus.yaml index 8e6f84534..59b666315 100644 --- a/ceph/qa/suites/upgrade/nautilus-x/stress-split/1-ceph-install/nautilus.yaml +++ b/ceph/qa/suites/upgrade/nautilus-x/stress-split/1-ceph-install/nautilus.yaml @@ -14,6 +14,8 @@ tasks: bluestore_warn_on_legacy_statfs: false bluestore warn on no per pool omap: false mon pg warn min per osd: 0 + log-whitelist: + - evicting unresponsive client - exec: osd.0: - ceph osd require-osd-release nautilus diff --git a/ceph/qa/tasks/ceph.py b/ceph/qa/tasks/ceph.py index 5cfffba52..3654ffa27 100644 --- a/ceph/qa/tasks/ceph.py +++ b/ceph/qa/tasks/ceph.py @@ -155,13 +155,12 @@ def ceph_log(ctx, config): while not self.stop_event.is_set(): self.stop_event.wait(timeout=30) try: - run.wait( - ctx.cluster.run( - args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf' - ], - wait=False, - ) + procs = ctx.cluster.run( + args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'], + wait=False, + stderr=StringIO() ) + run.wait(procs) except exceptions.ConnectionLostError as e: # Some tests may power off nodes during test, in which # case we will see connection errors that we should ignore. @@ -175,6 +174,14 @@ def ceph_log(ctx, config): log.debug("Missed logrotate, EOFError") except SSHException: log.debug("Missed logrotate, SSHException") + except run.CommandFailedError as e: + for p in procs: + if p.finished and p.exitstatus != 0: + err = p.stderr.getvalue() + if 'error: error renaming temp state file' in err: + log.info('ignoring transient state error: %s', e) + else: + raise except socket.error as e: if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET): log.debug("Missed logrotate, host unreachable") diff --git a/ceph/qa/tasks/cephadm.py b/ceph/qa/tasks/cephadm.py index a9bc2cbfc..aaf0e68ff 100644 --- a/ceph/qa/tasks/cephadm.py +++ b/ceph/qa/tasks/cephadm.py @@ -1317,13 +1317,17 @@ def registries_add_mirror_to_docker_io(conf, mirror): } else: v2 = config # type: ignore - dockers = [r for r in v2['registry'] if r['prefix'] == 'docker.io'] + dockers = [ + r for r in v2['registry'] if + r.get('prefix') == 'docker.io' or r.get('location') == 'docker.io' + ] if dockers: docker = dockers[0] - docker['mirror'] = [{ - "location": mirror, - "insecure": True, - }] + if 'mirror' not in docker: + docker['mirror'] = [{ + "location": mirror, + "insecure": True, + }] return v2 diff --git a/ceph/qa/tasks/cephfs/cephfs_test_case.py b/ceph/qa/tasks/cephfs/cephfs_test_case.py index 42d78f8ca..3b3263588 100644 --- a/ceph/qa/tasks/cephfs/cephfs_test_case.py +++ b/ceph/qa/tasks/cephfs/cephfs_test_case.py @@ -227,6 +227,9 @@ class CephFSTestCase(CephTestCase): def _session_by_id(self, session_ls): return dict([(s['id'], s) for s in session_ls]) + def perf_dump(self, rank=None, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + def wait_until_evicted(self, client_id, timeout=30): def is_client_evicted(): ls = self._session_list() diff --git a/ceph/qa/tasks/cephfs/mount.py b/ceph/qa/tasks/cephfs/mount.py index 7a833a118..7d9edda27 100644 --- a/ceph/qa/tasks/cephfs/mount.py +++ b/ceph/qa/tasks/cephfs/mount.py @@ -201,7 +201,7 @@ class CephFSMount(object): return self.run_shell(["bash", "-c", Raw(f"'{payload}'")], **kwargs) def run_shell(self, args, wait=True, stdin=None, check_status=True, - omit_sudo=True): + omit_sudo=True, timeout=10800): if isinstance(args, str): args = args.split() @@ -209,7 +209,8 @@ class CephFSMount(object): return self.client_remote.run(args=args, stdout=StringIO(), stderr=StringIO(), wait=wait, stdin=stdin, check_status=check_status, - omit_sudo=omit_sudo) + omit_sudo=omit_sudo, + timeout=timeout) def open_no_data(self, basename): """ diff --git a/ceph/qa/tasks/cephfs/test_client_limits.py b/ceph/qa/tasks/cephfs/test_client_limits.py index 51c3048bd..9cfda4254 100644 --- a/ceph/qa/tasks/cephfs/test_client_limits.py +++ b/ceph/qa/tasks/cephfs/test_client_limits.py @@ -156,6 +156,36 @@ class TestClientLimits(CephFSTestCase): else: raise RuntimeError("expected no client recall warning") + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + max_caps_per_client = 500 + cap_acquisition_throttle = 250 + + self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle) + + # Create 1500 files split across 6 directories, 250 each. + for i in range(1, 7): + self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir + self.mount_a.run_shell_payload("find | wc") + + # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250 + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + def test_client_release_bug(self): """ When a client has a bug (which we will simulate) preventing it from releasing caps, diff --git a/ceph/qa/tasks/cephfs/test_nfs.py b/ceph/qa/tasks/cephfs/test_nfs.py index a90116c6b..0d26d9be1 100644 --- a/ceph/qa/tasks/cephfs/test_nfs.py +++ b/ceph/qa/tasks/cephfs/test_nfs.py @@ -425,12 +425,14 @@ class TestNFS(MgrTestCase): ''' self._test_create_cluster() info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id)) + info_ip = info_output[self.cluster_id][0].pop("ip") host_details = {self.cluster_id: [{ "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(), - "ip": list(set(self._sys_cmd(['hostname', '-I']).decode("utf-8").split())), "port": 2049 }]} + host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split() self.assertDictEqual(info_output, host_details) + self.assertTrue(any([ip in info_ip for ip in host_ip])) self._test_delete_cluster() def test_cluster_set_reset_user_config(self): diff --git a/ceph/qa/tasks/cephfs/test_volume_client.py b/ceph/qa/tasks/cephfs/test_volume_client.py index 08dd2e2bb..3e6c7d63b 100644 --- a/ceph/qa/tasks/cephfs/test_volume_client.py +++ b/ceph/qa/tasks/cephfs/test_volume_client.py @@ -856,7 +856,7 @@ vc.disconnect() volume_id = "volumeid" # Create auth_id - out = self.fs.mon_manager.raw_cluster_cmd( + self.fs.mon_manager.raw_cluster_cmd( "auth", "get-or-create", "client.guest1", "mds", "allow *", "osd", "allow rw", @@ -918,7 +918,7 @@ vc.disconnect() volume_id = "volumeid" # Create auth_id - out = self.fs.mon_manager.raw_cluster_cmd( + self.fs.mon_manager.raw_cluster_cmd( "auth", "get-or-create", "client.guest1", "mds", "allow *", "osd", "allow rw", diff --git a/ceph/qa/tasks/cephfs/test_volumes.py b/ceph/qa/tasks/cephfs/test_volumes.py index 7984cea92..ed78775b6 100644 --- a/ceph/qa/tasks/cephfs/test_volumes.py +++ b/ceph/qa/tasks/cephfs/test_volumes.py @@ -5,9 +5,14 @@ import errno import random import logging import collections +import uuid +import unittest +from hashlib import md5 +from textwrap import dedent from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.exceptions import CommandFailedError +from teuthology.misc import sudo_write_file log = logging.getLogger(__name__) @@ -56,9 +61,22 @@ class TestVolumes(CephFSTestCase): def _check_clone_canceled(self, clone, clone_group=None): self.__check_clone_state("canceled", clone, clone_group, timo=1) - def _verify_clone_attrs(self, subvolume, clone, source_group=None, clone_group=None): - path1 = self._get_subvolume_path(self.volname, subvolume, group_name=source_group) - path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group) + def _get_subvolume_snapshot_path(self, subvolume, snapshot, source_group, subvol_path, source_version): + if source_version == 2: + # v2 + if subvol_path is not None: + (base_path, uuid_str) = os.path.split(subvol_path) + else: + (base_path, uuid_str) = os.path.split(self._get_subvolume_path(self.volname, subvolume, group_name=source_group)) + return os.path.join(base_path, ".snap", snapshot, uuid_str) + + # v1 + base_path = self._get_subvolume_path(self.volname, subvolume, group_name=source_group) + return os.path.join(base_path, ".snap", snapshot) + + def _verify_clone_attrs(self, source_path, clone_path): + path1 = source_path + path2 = clone_path p = self.mount_a.run_shell(["find", path1]) paths = p.stdout.getvalue().strip().split() @@ -92,12 +110,38 @@ class TestVolumes(CephFSTestCase): cval = int(self.mount_a.run_shell(['stat', '-c' '%Y', sink_path]).stdout.getvalue().strip()) self.assertEqual(sval, cval) - def _verify_clone(self, subvolume, clone, source_group=None, clone_group=None, timo=120): - path1 = self._get_subvolume_path(self.volname, subvolume, group_name=source_group) + def _verify_clone_root(self, source_path, clone_path, clone, clone_group, clone_pool): + # verifies following clone root attrs quota, data_pool and pool_namespace + # remaining attributes of clone root are validated in _verify_clone_attrs + + clone_info = json.loads(self._get_subvolume_info(self.volname, clone, clone_group)) + + # verify quota is inherited from source snapshot + src_quota = self.mount_a.getfattr(source_path, "ceph.quota.max_bytes") + self.assertEqual(clone_info["bytes_quota"], "infinite" if src_quota is None else int(src_quota)) + + if clone_pool: + # verify pool is set as per request + self.assertEqual(clone_info["data_pool"], clone_pool) + else: + # verify pool and pool namespace are inherited from snapshot + self.assertEqual(clone_info["data_pool"], + self.mount_a.getfattr(source_path, "ceph.dir.layout.pool")) + self.assertEqual(clone_info["pool_namespace"], + self.mount_a.getfattr(source_path, "ceph.dir.layout.pool_namespace")) + + def _verify_clone(self, subvolume, snapshot, clone, + source_group=None, clone_group=None, clone_pool=None, + subvol_path=None, source_version=2, timo=120): + # pass in subvol_path (subvolume path when snapshot was taken) when subvolume is removed + # but snapshots are retained for clone verification + path1 = self._get_subvolume_snapshot_path(subvolume, snapshot, source_group, subvol_path, source_version) path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group) check = 0 - while check < timo: + # TODO: currently snapshot rentries are not stable if snapshot source entries + # are removed, https://tracker.ceph.com/issues/46747 + while check < timo and subvol_path is None: val1 = int(self.mount_a.getfattr(path1, "ceph.dir.rentries")) val2 = int(self.mount_a.getfattr(path2, "ceph.dir.rentries")) if val1 == val2: @@ -106,7 +150,8 @@ class TestVolumes(CephFSTestCase): time.sleep(1) self.assertTrue(check < timo) - self._verify_clone_attrs(subvolume, clone, source_group=source_group, clone_group=clone_group) + self._verify_clone_root(path1, path2, clone, clone_group, clone_pool) + self._verify_clone_attrs(path1, path2) def _generate_random_volume_name(self, count=1): n = self.volume_start @@ -184,6 +229,25 @@ class TestVolumes(CephFSTestCase): def _delete_test_volume(self): self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + def _do_subvolume_pool_and_namespace_update(self, subvolume, pool=None, pool_namespace=None, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + if pool is not None: + self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool', pool) + + if pool_namespace is not None: + self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool_namespace', pool_namespace) + + def _do_subvolume_attr_update(self, subvolume, uid, gid, mode, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + # mode + self.mount_a.run_shell(['chmod', mode, subvolpath]) + + # ownership + self.mount_a.run_shell(['chown', uid, subvolpath]) + self.mount_a.run_shell(['chgrp', gid, subvolpath]) + def _do_subvolume_io(self, subvolume, subvolume_group=None, create_dir=None, number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE): # get subvolume path for IO @@ -228,6 +292,60 @@ class TestVolumes(CephFSTestCase): trashdir = os.path.join("./", "volumes", "_deleting") self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout) + def _assert_meta_location_and_version(self, vol_name, subvol_name, subvol_group=None, version=2, legacy=False): + if legacy: + subvol_path = self._get_subvolume_path(vol_name, subvol_name, group_name=subvol_group) + m = md5() + m.update(("/"+subvol_path).encode('utf-8')) + meta_filename = "{0}.meta".format(m.digest().hex()) + metapath = os.path.join(".", "volumes", "_legacy", meta_filename) + else: + group = subvol_group if subvol_group is not None else '_nogroup' + metapath = os.path.join(".", "volumes", group, subvol_name, ".meta") + + out = self.mount_a.run_shell(['cat', metapath]) + lines = out.stdout.getvalue().strip().split('\n') + sv_version = -1 + for line in lines: + if line == "version = " + str(version): + sv_version = version + break + self.assertEqual(sv_version, version, "version expected was '{0}' but got '{1}' from meta file at '{2}'".format( + version, sv_version, metapath)) + + def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=True, subvol_type='subvolume', state='complete'): + group = subvol_group if subvol_group is not None else '_nogroup' + basepath = os.path.join("volumes", group, subvol_name) + uuid_str = str(uuid.uuid4()) + createpath = os.path.join(basepath, uuid_str) + self.mount_a.run_shell(['mkdir', '-p', createpath]) + + # create a v1 snapshot, to prevent auto upgrades + if has_snapshot: + snappath = os.path.join(createpath, ".snap", "fake") + self.mount_a.run_shell(['mkdir', '-p', snappath]) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool) + + # create a v1 .meta file + meta_contents = "[GLOBAL]\nversion = 1\ntype = {0}\npath = {1}\nstate = {2}\n".format(subvol_type, "/" + createpath, state) + if state == 'pending': + # add a fake clone source + meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n' + meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta") + sudo_write_file(self.mount_a.client_remote, meta_filepath1, meta_contents) + return createpath + + def _update_fake_trash(self, subvol_name, subvol_group=None, trash_name='fake', create=True): + group = subvol_group if subvol_group is not None else '_nogroup' + trashpath = os.path.join("volumes", group, subvol_name, '.trash', trash_name) + if create: + self.mount_a.run_shell(['mkdir', '-p', trashpath]) + else: + self.mount_a.run_shell(['rmdir', trashpath]) + def setUp(self): super(TestVolumes, self).setUp() self.volname = None @@ -308,6 +426,8 @@ class TestVolumes(CephFSTestCase): That the volume can only be removed when --yes-i-really-mean-it is used and verify that the deleted volume is not listed anymore. """ + for m in self.mounts: + m.umount_wait() try: self._fs_cmd("volume", "rm", self.volname) except CommandFailedError as ce: @@ -325,11 +445,49 @@ class TestVolumes(CephFSTestCase): else: raise RuntimeError("expected the 'fs volume rm' command to fail.") + def test_subvolume_marked(self): + """ + ensure a subvolume is marked with the ceph.dir.subvolume xattr + """ + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # getpath + subvolpath = self._get_subvolume_path(self.volname, subvolume) + + # subdirectory of a subvolume cannot be moved outside the subvolume once marked with + # the xattr ceph.dir.subvolume, hence test by attempting to rename subvol path (incarnation) + # outside the subvolume + dstpath = os.path.join(self.mount_a.mountpoint, 'volumes', '_nogroup', 'new_subvol_location') + srcpath = os.path.join(self.mount_a.mountpoint, subvolpath) + rename_script = dedent(""" + import os + import errno + try: + os.rename("{src}", "{dst}") + except OSError as e: + if e.errno != errno.EXDEV: + raise RuntimeError("invalid error code on renaming subvolume incarnation out of subvolume directory") + else: + raise RuntimeError("expected renaming subvolume incarnation out of subvolume directory to fail") + """) + self.mount_a.run_python(rename_script.format(src=srcpath, dst=dstpath)) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_volume_rm_arbitrary_pool_removal(self): """ That the arbitrary pool added to the volume out of band is removed successfully on volume removal. """ + for m in self.mounts: + m.umount_wait() new_pool = "new_pool" # add arbitrary data pool self.fs.add_data_pool(new_pool) @@ -351,6 +509,8 @@ class TestVolumes(CephFSTestCase): That the volume can only be removed when mon_allowd_pool_delete is set to true and verify that the pools are removed after volume deletion. """ + for m in self.mounts: + m.umount_wait() self.config_set('mon', 'mon_allow_pool_delete', False) try: self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") @@ -420,6 +580,12 @@ class TestVolumes(CephFSTestCase): size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) self.assertEqual(size, nsize) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_shrink(self): """ That a subvolume can be shrinked in size and its quota matches the expected size. @@ -442,6 +608,12 @@ class TestVolumes(CephFSTestCase): size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) self.assertEqual(size, nsize) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_resize_fail_invalid_size(self): """ That a subvolume cannot be resized to an invalid size and the quota did not change @@ -461,15 +633,20 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") else: - raise RuntimeError("expected the 'fs subvolume resize' command to fail") + self.fail("expected the 'fs subvolume resize' command to fail") # verify the quota did not change size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) self.assertEqual(size, osize) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_resize_fail_zero_size(self): """ That a subvolume cannot be resized to a zero size and the quota did not change @@ -489,15 +666,20 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") else: - raise RuntimeError("expected the 'fs subvolume resize' command to fail") + self.fail("expected the 'fs subvolume resize' command to fail") # verify the quota did not change size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) self.assertEqual(size, osize) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_resize_quota_lt_used_size(self): """ That a subvolume can be resized to a size smaller than the current used size @@ -531,12 +713,18 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) except CommandFailedError: - raise RuntimeError("expected the 'fs subvolume resize' command to succeed") + self.fail("expected the 'fs subvolume resize' command to succeed") # verify the quota size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) self.assertEqual(size, nsize) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_resize_fail_quota_lt_used_size_no_shrink(self): """ @@ -571,15 +759,20 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize), "--no_shrink") except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") else: - raise RuntimeError("expected the 'fs subvolume resize' command to fail") + self.fail("expected the 'fs subvolume resize' command to fail") # verify the quota did not change size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) self.assertEqual(size, osize) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_resize_expand_on_full_subvolume(self): """ That the subvolume can be expanded from a full subvolume and future writes succeed. @@ -617,12 +810,18 @@ class TestVolumes(CephFSTestCase): try: self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) except CommandFailedError: - raise RuntimeError("expected filling subvolume {0} with {1} file of size {2}MB" + self.fail("expected filling subvolume {0} with {1} file of size {2}MB" "to succeed".format(subvolname, number_of_files, file_size)) else: - raise RuntimeError("expected filling subvolume {0} with {1} file of size {2}MB" + self.fail("expected filling subvolume {0} with {1} file of size {2}MB" "to fail".format(subvolname, number_of_files, file_size)) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_create_idempotence(self): # create subvolume subvolume = self._generate_random_subvolume_name() @@ -668,6 +867,12 @@ class TestVolumes(CephFSTestCase): self._get_subtrees(status=status, rank=1) self._wait_subtrees([(path, 1)], status=status) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolumegroup_pin_distributed(self): self.fs.set_max_mds(2) status = self.fs.wait_for_daemons() @@ -683,6 +888,13 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) self._wait_distributed_subtrees(10, status=status) + # remove subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_pin_random(self): self.fs.set_max_mds(2) self.fs.wait_for_daemons() @@ -693,6 +905,12 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01") # no verification + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_create_isolated_namespace(self): """ Create subvolume in separate rados namespace @@ -720,10 +938,12 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid pool layout") else: - raise RuntimeError("expected the 'fs subvolume create' command to fail") + self.fail("expected the 'fs subvolume create' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() def test_subvolume_rm_force(self): # test removing non-existing subvolume with --force @@ -731,7 +951,7 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") except CommandFailedError: - raise RuntimeError("expected the 'fs subvolume rm --force' command to succeed") + self.fail("expected the 'fs subvolume rm --force' command to succeed") def test_subvolume_create_with_auto_cleanup_on_fail(self): subvolume = self._generate_random_subvolume_name() @@ -744,10 +964,12 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "getpath", self.volname, subvolume) except CommandFailedError as ce: - if ce.exitstatus != errno.ENOENT: - raise + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of non-existent subvolume") else: - raise RuntimeError("expected the 'fs subvolume getpath' command to fail") + self.fail("expected the 'fs subvolume getpath' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() def test_subvolume_create_with_invalid_size(self): # create subvolume with an invalid size -1 @@ -755,10 +977,12 @@ class TestVolumes(CephFSTestCase): try: self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1") except CommandFailedError as ce: - if ce.exitstatus != errno.EINVAL: - raise + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid size") else: - raise RuntimeError("expected the 'fs subvolume create' command to fail") + self.fail("expected the 'fs subvolume create' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() def test_nonexistent_subvolume_rm(self): # remove non-existing subvolume @@ -803,6 +1027,9 @@ class TestVolumes(CephFSTestCase): # remove subvolume self._fs_cmd("subvolume", "rm", self.volname, subvolume) + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_ls(self): # tests the 'fs subvolume ls' command @@ -816,11 +1043,18 @@ class TestVolumes(CephFSTestCase): # list subvolumes subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) if len(subvolumels) == 0: - raise RuntimeError("Expected the 'fs subvolume ls' command to list the created subvolumes.") + self.fail("Expected the 'fs subvolume ls' command to list the created subvolumes.") else: subvolnames = [subvolume['name'] for subvolume in subvolumels] if collections.Counter(subvolnames) != collections.Counter(subvolumes): - raise RuntimeError("Error creating or listing subvolumes") + self.fail("Error creating or listing subvolumes") + + # remove subvolume + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() def test_subvolume_ls_for_notexistent_default_group(self): # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist @@ -853,6 +1087,12 @@ class TestVolumes(CephFSTestCase): size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes") self.assertEqual(size, None) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_resize_infinite_size_future_writes(self): """ That a subvolume can be resized to an infinite size and the future writes succeed. @@ -885,15 +1125,21 @@ class TestVolumes(CephFSTestCase): try: self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) except CommandFailedError: - raise RuntimeError("expected filling subvolume {0} with {1} file of size {2}MB " + self.fail("expected filling subvolume {0} with {1} file of size {2}MB " "to succeed".format(subvolname, number_of_files, file_size)) + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_info(self): # tests the 'fs subvolume info' command subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", - "type", "uid", "features"] + "type", "uid", "features", "state"] # create subvolume subvolume = self._generate_random_subvolume_name() @@ -901,17 +1147,17 @@ class TestVolumes(CephFSTestCase): # get subvolume metadata subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) - self.assertNotEqual(len(subvol_info), 0, "expected the 'fs subvolume info' command to list metadata of subvolume") for md in subvol_md: - self.assertIn(md, subvol_info.keys(), "'{0}' key not present in metadata of subvolume".format(md)) + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set") self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set") self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty") + self.assertEqual(subvol_info["state"], "complete", "expected state to be complete") - self.assertEqual(len(subvol_info["features"]), 2, - msg="expected 2 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) - for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertEqual(len(subvol_info["features"]), 3, + msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']: self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) nsize = self.DEFAULT_FILE_SIZE*1024*1024 @@ -919,15 +1165,17 @@ class TestVolumes(CephFSTestCase): # get subvolume metadata after quota set subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) - self.assertNotEqual(len(subvol_info), 0, "expected the 'fs subvolume info' command to list metadata of subvolume") + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set") - self.assertNotEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should not be set to infinite if quota is not set") + self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume") + self.assertEqual(subvol_info["state"], "complete", "expected state to be complete") - self.assertEqual(len(subvol_info["features"]), 2, - msg="expected 2 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) - for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertEqual(len(subvol_info["features"]), 3, + msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']: self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) # remove subvolumes @@ -1095,6 +1343,9 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_group_create_with_desired_mode(self): group1, group2 = self._generate_random_group_name(2) # default mode @@ -1178,6 +1429,9 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolume", "rm", self.volname, subvol3, group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) + # verify trash dir is clean + self._wait_for_trash_empty() + def test_subvolume_create_with_desired_uid_gid(self): """ That the subvolume can be created with the desired uid and gid and its uid and gid matches the @@ -1203,6 +1457,9 @@ class TestVolumes(CephFSTestCase): # remove subvolume self._fs_cmd("subvolume", "rm", self.volname, subvolname) + # verify trash dir is clean + self._wait_for_trash_empty() + def test_nonexistent_subvolume_group_rm(self): group = "non_existent_group" @@ -1286,10 +1543,10 @@ class TestVolumes(CephFSTestCase): tests the 'fs subvolume snapshot info' command """ - snap_metadata = ["created_at", "data_pool", "has_pending_clones", "size"] + snap_md = ["created_at", "data_pool", "has_pending_clones", "size"] subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + snapshot, snap_missing = self._generate_random_snapshot_name(2) # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -1301,12 +1558,18 @@ class TestVolumes(CephFSTestCase): self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) - self.assertNotEqual(len(snap_info), 0) - for md in snap_metadata: - if md not in snap_info: - raise RuntimeError("%s not present in the metadata of subvolume snapshot" % md) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) self.assertEqual(snap_info["has_pending_clones"], "no") + # snapshot info for non-existent snapshot + try: + self._get_subvolume_snapshot_info(self.volname, subvolume, snap_missing) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot info of non-existent snapshot") + else: + self.fail("expected snapshot info of non-existent snapshot to fail") + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) @@ -1419,12 +1682,41 @@ class TestVolumes(CephFSTestCase): subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) if len(subvolsnapshotls) == 0: - raise RuntimeError("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots") + self.fail("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots") else: snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] if collections.Counter(snapshotnames) != collections.Counter(snapshots): - raise RuntimeError("Error creating or listing subvolume snapshots") + self.fail("Error creating or listing subvolume snapshots") + + # remove snapshot + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_snapshot_unsupported_status(self): + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # snapshot group + try: + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOSYS, "invalid error code on subvolumegroup snapshot create") + else: + self.fail("expected subvolumegroup snapshot create command to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_create_and_rm(self): subvolume = self._generate_random_subvolume_name() group = self._generate_random_group_name() @@ -1451,6 +1743,7 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_idempotence(self): subvolume = self._generate_random_subvolume_name() group = self._generate_random_group_name() @@ -1480,6 +1773,7 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + @unittest.skip("skipping subvolumegroup snapshot tests") def test_nonexistent_subvolume_group_snapshot_rm(self): subvolume = self._generate_random_subvolume_name() group = self._generate_random_group_name() @@ -1515,6 +1809,7 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) + @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_rm_force(self): # test removing non-existing subvolume group snapshot with --force group = self._generate_random_group_name() @@ -1525,6 +1820,7 @@ class TestVolumes(CephFSTestCase): except CommandFailedError: raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed") + @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_ls(self): # tests the 'fs subvolumegroup snapshot ls' command @@ -1583,11 +1879,12 @@ class TestVolumes(CephFSTestCase): self.mgr_cluster.mgr_fail(mgr) self.wait_until_evicted(sessions[0]['id']) - def test_subvolume_upgrade(self): + def test_subvolume_upgrade_legacy_to_v1(self): """ poor man's upgrade test -- rather than going through a full upgrade cycle, emulate subvolumes by going through the wormhole and verify if they are accessible. + further ensure that a legacy volume is not updated to v2. """ subvolume1, subvolume2 = self._generate_random_subvolume_name(2) group = self._generate_random_group_name() @@ -1614,6 +1911,10 @@ class TestVolumes(CephFSTestCase): self.assertEqual(createpath1[1:], subvolpath1) self.assertEqual(createpath2[1:], subvolpath2) + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvolume1, version=1, legacy=True) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1, legacy=True) + # remove subvolume self._fs_cmd("subvolume", "rm", self.volname, subvolume1) self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) @@ -1624,78 +1925,785 @@ class TestVolumes(CephFSTestCase): # remove group self._fs_cmd("subvolumegroup", "rm", self.volname, group) - def test_subvolume_rm_with_snapshots(self): + def test_subvolume_no_upgrade_v1_sanity(self): + """ + poor man's upgrade test -- theme continues... + + This test is to ensure v1 subvolumes are retained as is, due to a snapshot being present, and runs through + a series of operations on the v1 subvolume to ensure they work as expected. + """ + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid", "features", "state"] + snap_md = ["created_at", "data_pool", "has_pending_clones", "size"] + subvolume = self._generate_random_subvolume_name() snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + mode = "777" + uid = "1000" + gid = "1000" - # create subvolume - self._fs_cmd("subvolume", "create", self.volname, subvolume) - - # snapshot subvolume - self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + # emulate a v1 subvolume -- in the default group + subvolume_path = self._create_v1_subvolume(subvolume) - # remove subvolume -- should fail with ENOTEMPTY since it has snapshots - try: - self._fs_cmd("subvolume", "rm", self.volname, subvolume) - except CommandFailedError as ce: - if ce.exitstatus != errno.ENOTEMPTY: - raise RuntimeError("invalid error code returned when deleting subvolume with snapshots") - else: - raise RuntimeError("expected subvolume deletion to fail") + # getpath + subvolpath = self._get_subvolume_path(self.volname, subvolume) + self.assertEqual(subvolpath, subvolume_path) - # remove snapshot - self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + # ls + subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes))) + self.assertEqual(subvolumes[0]['name'], subvolume, + "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name'])) - # remove subvolume - self._fs_cmd("subvolume", "rm", self.volname, subvolume) + # info + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) - # verify trash dir is clean - self._wait_for_trash_empty() + self.assertEqual(subvol_info["state"], "complete", + msg="expected state to be 'complete', found '{0}".format(subvol_info["state"])) + self.assertEqual(len(subvol_info["features"]), 2, + msg="expected 1 feature, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) - def test_subvolume_snapshot_protect_unprotect_sanity(self): - """ - Snapshot protect/unprotect commands are deprecated. This test exists to ensure that - invoking the command does not cause errors, till they are removed from a subsequent release. - """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + # resize + nsize = self.DEFAULT_FILE_SIZE*1024*1024*10 + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) - # create subvolume - self._fs_cmd("subvolume", "create", self.volname, subvolume) + # create (idempotent) (change some attrs, to ensure attrs are preserved from the snapshot on clone) + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid) # do some IO - self._do_subvolume_io(subvolume, number_of_files=64) + self._do_subvolume_io(subvolume, number_of_files=8) - # snapshot subvolume + # snap-create self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) - # now, protect snapshot - self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) - - # schedule a clone - self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + # clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) # check clone status - self._wait_for_clone_to_complete(clone) - - # now, unprotect snapshot - self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) + self._wait_for_clone_to_complete(clone1) - # remove snapshot - self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + # ensure clone is v2 + self._assert_meta_location_and_version(self.volname, clone1, version=2) # verify clone - self._verify_clone(subvolume, clone) + self._verify_clone(subvolume, snapshot, clone1, source_version=1) - # remove subvolumes - self._fs_cmd("subvolume", "rm", self.volname, subvolume) - self._fs_cmd("subvolume", "rm", self.volname, clone) + # clone (older snapshot) + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, 'fake', clone2) + + # check clone status + self._wait_for_clone_to_complete(clone2) + + # ensure clone is v2 + self._assert_meta_location_and_version(self.volname, clone2, version=2) + + # verify clone + # TODO: rentries will mismatch till this is fixed https://tracker.ceph.com/issues/46747 + #self._verify_clone(subvolume, 'fake', clone2, source_version=1) + + # snap-info + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snap-ls + subvol_snapshots = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + self.assertEqual(len(subvol_snapshots), 2, "subvolume ls count mismatch, expected 2', found {0}".format(len(subvol_snapshots))) + snapshotnames = [snapshot['name'] for snapshot in subvol_snapshots] + for name in [snapshot, 'fake']: + self.assertIn(name, snapshotnames, msg="expected snapshot '{0}' in subvolume snapshot ls".format(name)) + + # snap-rm + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, "fake") + + # ensure volume is still at version 1 + self._assert_meta_location_and_version(self.volname, subvolume, version=1) + + # rm + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_no_upgrade_v1_to_v2(self): + """ + poor man's upgrade test -- theme continues... + ensure v1 to v2 upgrades are not done automatically due to various states of v1 + """ + subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3) + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in the default group + subvol1_path = self._create_v1_subvolume(subvolume1) + + # emulate a v1 subvolume -- in a custom group + subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group) + + # emulate a v1 subvolume -- in a clone pending state + self._create_v1_subvolume(subvolume3, subvol_type='clone', has_snapshot=False, state='pending') + + # this would attempt auto-upgrade on access, but fail to do so as snapshots exist + subvolpath1 = self._get_subvolume_path(self.volname, subvolume1) + self.assertEqual(subvolpath1, subvol1_path) + + subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group) + self.assertEqual(subvolpath2, subvol2_path) + + # this would attempt auto-upgrade on access, but fail to do so as volume is not complete + # use clone status, as only certain operations are allowed in pending state + status = json.loads(self._fs_cmd("clone", "status", self.volname, subvolume3)) + self.assertEqual(status["status"]["state"], "pending") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, "fake") + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume2, "fake", group) + + # ensure metadata file is in v1 location, with version retained as v1 + self._assert_meta_location_and_version(self.volname, subvolume1, version=1) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume3) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on rm of subvolume undergoing clone") + else: + self.fail("expected rm of subvolume undergoing clone to fail") + + # ensure metadata file is in v1 location, with version retained as v1 + self._assert_meta_location_and_version(self.volname, subvolume3, version=1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume3, "--force") + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_upgrade_v1_to_v2(self): + """ + poor man's upgrade test -- theme continues... + ensure v1 to v2 upgrades work + """ + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in the default group + subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False) + + # emulate a v1 subvolume -- in a custom group + subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group, has_snapshot=False) + + # this would attempt auto-upgrade on access + subvolpath1 = self._get_subvolume_path(self.volname, subvolume1) + self.assertEqual(subvolpath1, subvol1_path) + + subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group) + self.assertEqual(subvolpath2, subvol2_path) + + # ensure metadata file is in v2 location, with version retained as v2 + self._assert_meta_location_and_version(self.volname, subvolume1, version=2) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=2) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_rm_with_snapshots(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove subvolume -- should fail with ENOTEMPTY since it has snapshots + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOTEMPTY: + raise RuntimeError("invalid error code returned when deleting subvolume with snapshots") + else: + raise RuntimeError("expected subvolume deletion to fail") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_without_snapshots(self): + """ + ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume + """ + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove with snapshot retention (should remove volume, no snapshots to retain) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_with_snapshots(self): + """ + ensure retain snapshots based delete of a subvolume with snapshots retains the subvolume + also test allowed and dis-allowed operations on a retained subvolume + """ + snap_md = ["created_at", "data_pool", "has_pending_clones", "size"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove subvolume -- should fail with ENOTEMPTY since it has snapshots + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of retained subvolume with snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + ## test allowed ops in retained state + # ls + subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes))) + self.assertEqual(subvolumes[0]['name'], subvolume, + "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name'])) + + # snapshot info + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # rm --force (allowed but should fail) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + # rm (allowed but should fail) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + ## test disallowed ops + # getpath + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots") + else: + self.fail("expected getpath of subvolume with retained snapshots to fail") + + # resize + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on resize of subvolume with retained snapshots") + else: + self.fail("expected resize of subvolume with retained snapshots to fail") + + # snap-create + try: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, "fail") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot create of subvolume with retained snapshots") + else: + self.fail("expected snapshot create of subvolume with retained snapshots to fail") + + # remove snapshot (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_invalid_recreate(self): + """ + ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate subvolume with an invalid pool + data_pool = "invalid_pool" + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on recreate of subvolume with invalid poolname") + else: + self.fail("expected recreate of subvolume with invalid poolname to fail") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # getpath + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots") + else: + self.fail("expected getpath of subvolume with retained snapshots to fail") + + # remove snapshot (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_trash_busy_recreate(self): + """ + ensure retained subvolume recreate fails if its trash is not yet purged + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fake a trash entry + self._update_fake_trash(subvolume) + + # recreate subvolume + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of subvolume with purge pending") + else: + self.fail("expected recreate of subvolume with purge pending to fail") + + # clear fake trash entry + self._update_fake_trash(subvolume, create=False) + + # recreate subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_trash_busy_recreate_clone(self): + """ + ensure retained clone recreate fails if its trash is not yet purged + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # snapshot clone + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot) + + # remove clone with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots") + + # fake a trash entry + self._update_fake_trash(clone) + + # clone subvolume snapshot (recreate) + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of clone with purge pending") + else: + self.fail("expected recreate of clone with purge pending to fail") + + # clear fake trash entry + self._update_fake_trash(clone, create=False) + + # recreate subvolume + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_recreate_subvolume(self): + """ + ensure a retained subvolume can be recreated and further snapshotted + """ + snap_md = ["created_at", "data_pool", "has_pending_clones", "size"] + + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # recreate retained subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "complete", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # snapshot info (older snapshot) + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot1)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snap-create (new snapshot) + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2) + + # remove with retain snapshots + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # list snapshots + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + self.assertEqual(len(subvolsnapshotls), 2, "Expected the 'fs subvolume snapshot ls' command to list the" + " created subvolume snapshots") + snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] + for snap in [snapshot1, snapshot2]: + self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap)) + + # remove snapshots (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_clone(self): + """ + clone a snapshot from a snapshot retained subvolume + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # store path for clone verification + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, subvol_path=subvol_path) + + # remove snapshots (removes retained volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) # verify trash dir is clean self._wait_for_trash_empty() - def test_subvolume_snapshot_clone(self): + def test_subvolume_retain_snapshot_recreate(self): + """ + recreate a subvolume from one of its retained snapshots + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # store path for clone verification + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate retained subvolume using its own snapshot to clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, subvolume) + + # check clone status + self._wait_for_clone_to_complete(subvolume) + + # verify clone + self._verify_clone(subvolume, snapshot, subvolume, subvol_path=subvol_path) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_retain_snapshot_with_snapshots(self): + """ + retain snapshots of a cloned subvolume and check disallowed operations + """ + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # store path for clone verification + subvol1_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot1, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot1, clone, subvol_path=subvol1_path) + + # create a snapshot on the clone + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot2) + + # retain a clone + self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots") + + # list snapshots + clonesnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, clone)) + self.assertEqual(len(clonesnapshotls), 1, "Expected the 'fs subvolume snapshot ls' command to list the" + " created subvolume snapshots") + snapshotnames = [snapshot['name'] for snapshot in clonesnapshotls] + for snap in [snapshot2]: + self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap)) + + ## check disallowed operations on retained clone + # clone-status + try: + self._fs_cmd("clone", "status", self.volname, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone status of clone with retained snapshots") + else: + self.fail("expected clone status of clone with retained snapshots to fail") + + # clone-cancel + try: + self._fs_cmd("clone", "cancel", self.volname, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone cancel of clone with retained snapshots") + else: + self.fail("expected clone cancel of clone with retained snapshots to fail") + + # remove snapshots (removes subvolumes as all are in retained state) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot2) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_clone_from_newer_snapshot(self): + """ + clone a subvolume from recreated subvolume's latest snapshot + """ + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + clone = self._generate_random_clone_name(1) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # get and store path for clone verification + subvol2_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot newer subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume's newer snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot2, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot2, clone, subvol_path=subvol2_path) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_protect_unprotect_sanity(self): + """ + Snapshot protect/unprotect commands are deprecated. This test exists to ensure that + invoking the command does not cause errors, till they are removed from a subsequent release. + """ subvolume = self._generate_random_subvolume_name() snapshot = self._generate_random_snapshot_name() clone = self._generate_random_clone_name() @@ -1709,17 +2717,56 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + # now, protect snapshot + self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) + # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) # check clone status self._wait_for_clone_to_complete(clone) + # now, unprotect snapshot + self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + # verify clone - self._verify_clone(subvolume, clone) + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) @@ -1771,12 +2818,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone) + # verify clone + self._verify_clone(subvolume, snapshot, clone, clone_pool=new_pool) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone) - subvol_path = self._get_subvolume_path(self.volname, clone) desired_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool") self.assertEqual(desired_pool, new_pool) @@ -1796,6 +2843,9 @@ class TestVolumes(CephFSTestCase): mode = "777" uid = "1000" gid = "1000" + new_uid = "1001" + new_gid = "1001" + new_mode = "700" # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid) @@ -1806,17 +2856,64 @@ class TestVolumes(CephFSTestCase): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + # change subvolume attrs (to ensure clone picks up snapshot attrs) + self._do_subvolume_attr_update(subvolume, new_uid, new_gid, new_mode) + # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) # check clone status self._wait_for_clone_to_complete(clone) + # verify clone + self._verify_clone(subvolume, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_inherit_snapshot_namespace_and_size(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024*12 + + # create subvolume, in an isolated namespace with a specified size + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated", "--size", str(osize)) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # create a pool different from current subvolume pool + subvol_path = self._get_subvolume_path(self.volname, subvolume) + default_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + self.fs.add_data_pool(new_pool) + + # update source subvolume pool + self._do_subvolume_pool_and_namespace_update(subvolume, pool=new_pool, pool_namespace="") + + # schedule a clone, with NO --pool specification + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + # verify clone - self._verify_clone(subvolume, clone) + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) @@ -1845,12 +2942,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone1) + # verify clone + self._verify_clone(subvolume, snapshot, clone1) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone1) - # now the clone is just like a normal subvolume -- snapshot the clone and fork # another clone. before that do some IO so it's can be differentiated. self._do_subvolume_io(clone1, create_dir="data", number_of_files=32) @@ -1864,12 +2961,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone2) + # verify clone + self._verify_clone(clone1, snapshot, clone2) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot) - # verify clone - self._verify_clone(clone1, clone2) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, clone1) @@ -1902,12 +2999,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone, clone_group=group) + # verify clone + self._verify_clone(subvolume, snapshot, clone, clone_group=group) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone, clone_group=group) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, clone, group) @@ -1942,12 +3039,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone) + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_group=group) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) - # verify clone - self._verify_clone(subvolume, clone, source_group=group) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) self._fs_cmd("subvolume", "rm", self.volname, clone) @@ -1984,12 +3081,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone, clone_group=c_group) + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_group=s_group, clone_group=c_group) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group) - # verify clone - self._verify_clone(subvolume, clone, source_group=s_group, clone_group=c_group) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume, s_group) self._fs_cmd("subvolume", "rm", self.volname, clone, c_group) @@ -2006,6 +3103,7 @@ class TestVolumes(CephFSTestCase): yet another poor man's upgrade test -- rather than going through a full upgrade cycle, emulate old types subvolumes by going through the wormhole and verify clone operation. + further ensure that a legacy volume is not updated to v2, but clone is. """ subvolume = self._generate_random_subvolume_name() snapshot = self._generate_random_snapshot_name() @@ -2015,12 +3113,19 @@ class TestVolumes(CephFSTestCase): createpath = os.path.join(".", "volumes", "_nogroup", subvolume) self.mount_a.run_shell(['mkdir', '-p', createpath]) + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool) + # do some IO self._do_subvolume_io(subvolume, number_of_files=64) # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvolume, version=1, legacy=True) + # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -2035,11 +3140,14 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone) + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_version=1) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone) + # ensure metadata file is in v2 location, with required version v2 + self._assert_meta_location_and_version(self.volname, clone) # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) @@ -2081,12 +3189,12 @@ class TestVolumes(CephFSTestCase): subvolpath = self._get_subvolume_path(self.volname, clone) self.assertNotEqual(subvolpath, None) + # verify clone + self._verify_clone(subvolume, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, clone) @@ -2126,12 +3234,12 @@ class TestVolumes(CephFSTestCase): subvolpath = self._get_subvolume_path(self.volname, clone) self.assertNotEqual(subvolpath, None) + # verify clone + self._verify_clone(subvolume, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, clone) @@ -2171,12 +3279,12 @@ class TestVolumes(CephFSTestCase): subvolpath = self._get_subvolume_path(self.volname, clone) self.assertNotEqual(subvolpath, None) + # verify clone + self._verify_clone(subvolume, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, clone) @@ -2242,12 +3350,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone) + # verify clone + self._verify_clone(subvolume1, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot) - # verify clone - self._verify_clone(subvolume1, clone) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume1) self._fs_cmd("subvolume", "rm", self.volname, subvolume2) @@ -2288,7 +3396,7 @@ class TestVolumes(CephFSTestCase): self._wait_for_clone_to_complete(clone1) # verify clone - self._verify_clone(subvolume, clone1) + self._verify_clone(subvolume, snapshot, clone1, clone_pool=new_pool) # wait a bit so that subsequent I/O will give pool full error time.sleep(120) @@ -2339,12 +3447,12 @@ class TestVolumes(CephFSTestCase): # check clone status self._wait_for_clone_to_complete(clone) + # verify clone + self._verify_clone(subvolume, snapshot, clone) + # remove snapshot self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) - # verify clone - self._verify_clone(subvolume, clone) - # remove subvolumes self._fs_cmd("subvolume", "rm", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, clone) diff --git a/ceph/qa/tasks/mgr/dashboard/helper.py b/ceph/qa/tasks/mgr/dashboard/helper.py index 1a7a6951c..64cbba9e3 100644 --- a/ceph/qa/tasks/mgr/dashboard/helper.py +++ b/ceph/qa/tasks/mgr/dashboard/helper.py @@ -96,18 +96,19 @@ class DashboardTestCase(MgrTestCase): cls._ceph_cmd(set_roles_args) @classmethod - def login(cls, username, password): + def login(cls, username, password, set_cookies=False): if cls._loggedin: cls.logout() - cls._post('/api/auth', {'username': username, 'password': password}) + cls._post('/api/auth', {'username': username, + 'password': password}, set_cookies=set_cookies) cls._assertEq(cls._resp.status_code, 201) cls._token = cls.jsonBody()['token'] cls._loggedin = True @classmethod - def logout(cls): + def logout(cls, set_cookies=False): if cls._loggedin: - cls._post('/api/auth/logout') + cls._post('/api/auth/logout', set_cookies=set_cookies) cls._assertEq(cls._resp.status_code, 200) cls._token = None cls._loggedin = False @@ -195,29 +196,49 @@ class DashboardTestCase(MgrTestCase): def tearDownClass(cls): super(DashboardTestCase, cls).tearDownClass() - # pylint: disable=inconsistent-return-statements + # pylint: disable=inconsistent-return-statements, too-many-branches @classmethod - def _request(cls, url, method, data=None, params=None): + def _request(cls, url, method, data=None, params=None, set_cookies=False): url = "{}{}".format(cls._base_uri, url) log.info("Request %s to %s", method, url) headers = {} + cookies = {} if cls._token: - headers['Authorization'] = "Bearer {}".format(cls._token) - - if method == 'GET': - cls._resp = cls._session.get(url, params=params, verify=False, - headers=headers) - elif method == 'POST': - cls._resp = cls._session.post(url, json=data, params=params, - verify=False, headers=headers) - elif method == 'DELETE': - cls._resp = cls._session.delete(url, json=data, params=params, - verify=False, headers=headers) - elif method == 'PUT': - cls._resp = cls._session.put(url, json=data, params=params, - verify=False, headers=headers) + if set_cookies: + cookies['token'] = cls._token + else: + headers['Authorization'] = "Bearer {}".format(cls._token) + + if set_cookies: + if method == 'GET': + cls._resp = cls._session.get(url, params=params, verify=False, + headers=headers, cookies=cookies) + elif method == 'POST': + cls._resp = cls._session.post(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + elif method == 'DELETE': + cls._resp = cls._session.delete(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + elif method == 'PUT': + cls._resp = cls._session.put(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + else: + assert False else: - assert False + if method == 'GET': + cls._resp = cls._session.get(url, params=params, verify=False, + headers=headers) + elif method == 'POST': + cls._resp = cls._session.post(url, json=data, params=params, + verify=False, headers=headers) + elif method == 'DELETE': + cls._resp = cls._session.delete(url, json=data, params=params, + verify=False, headers=headers) + elif method == 'PUT': + cls._resp = cls._session.put(url, json=data, params=params, + verify=False, headers=headers) + else: + assert False try: if not cls._resp.ok: # Output response for easier debugging. @@ -231,8 +252,8 @@ class DashboardTestCase(MgrTestCase): raise ex @classmethod - def _get(cls, url, params=None): - return cls._request(url, 'GET', params=params) + def _get(cls, url, params=None, set_cookies=False): + return cls._request(url, 'GET', params=params, set_cookies=set_cookies) @classmethod def _view_cache_get(cls, url, retries=5): @@ -253,16 +274,16 @@ class DashboardTestCase(MgrTestCase): return res @classmethod - def _post(cls, url, data=None, params=None): - cls._request(url, 'POST', data, params) + def _post(cls, url, data=None, params=None, set_cookies=False): + cls._request(url, 'POST', data, params, set_cookies=set_cookies) @classmethod - def _delete(cls, url, data=None, params=None): - cls._request(url, 'DELETE', data, params) + def _delete(cls, url, data=None, params=None, set_cookies=False): + cls._request(url, 'DELETE', data, params, set_cookies=set_cookies) @classmethod - def _put(cls, url, data=None, params=None): - cls._request(url, 'PUT', data, params) + def _put(cls, url, data=None, params=None, set_cookies=False): + cls._request(url, 'PUT', data, params, set_cookies=set_cookies) @classmethod def _assertEq(cls, v1, v2): @@ -281,8 +302,8 @@ class DashboardTestCase(MgrTestCase): # pylint: disable=too-many-arguments @classmethod - def _task_request(cls, method, url, data, timeout): - res = cls._request(url, method, data) + def _task_request(cls, method, url, data, timeout, set_cookies=False): + res = cls._request(url, method, data, set_cookies=set_cookies) cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404]) if cls._resp.status_code == 403: @@ -334,16 +355,16 @@ class DashboardTestCase(MgrTestCase): return res_task['exception'] @classmethod - def _task_post(cls, url, data=None, timeout=60): - return cls._task_request('POST', url, data, timeout) + def _task_post(cls, url, data=None, timeout=60, set_cookies=False): + return cls._task_request('POST', url, data, timeout, set_cookies=set_cookies) @classmethod - def _task_delete(cls, url, timeout=60): - return cls._task_request('DELETE', url, None, timeout) + def _task_delete(cls, url, timeout=60, set_cookies=False): + return cls._task_request('DELETE', url, None, timeout, set_cookies=set_cookies) @classmethod - def _task_put(cls, url, data=None, timeout=60): - return cls._task_request('PUT', url, data, timeout) + def _task_put(cls, url, data=None, timeout=60, set_cookies=False): + return cls._task_request('PUT', url, data, timeout, set_cookies=set_cookies) @classmethod def cookies(cls): diff --git a/ceph/qa/tasks/mgr/dashboard/test_auth.py b/ceph/qa/tasks/mgr/dashboard/test_auth.py index e76708a9c..e1c9b8e63 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_auth.py +++ b/ceph/qa/tasks/mgr/dashboard/test_auth.py @@ -30,6 +30,7 @@ class AuthTest(DashboardTestCase): self.assertIn('delete', perms) def test_a_set_login_credentials(self): + # test with Authorization header self.create_user('admin2', 'admin2', ['administrator']) self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}) self.assertStatus(201) @@ -37,7 +38,16 @@ class AuthTest(DashboardTestCase): self._validate_jwt_token(data['token'], "admin2", data['permissions']) self.delete_user('admin2') + # test with Cookies set + self.create_user('admin2', 'admin2', ['administrator']) + self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin2", data['permissions']) + self.delete_user('admin2') + def test_login_valid(self): + # test with Authorization header self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) self.assertStatus(201) data = self.jsonBody() @@ -51,7 +61,22 @@ class AuthTest(DashboardTestCase): }, allow_unknown=False)) self._validate_jwt_token(data['token'], "admin", data['permissions']) + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + def test_login_invalid(self): + # test with Authorization header self._post("/api/auth", {'username': 'admin', 'password': 'inval'}) self.assertStatus(400) self.assertJsonBody({ @@ -60,7 +85,17 @@ class AuthTest(DashboardTestCase): "detail": "Invalid credentials" }) + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + def test_login_without_password(self): + # test with Authorization header self.create_user('admin2', '', ['administrator']) self._post("/api/auth", {'username': 'admin2', 'password': ''}) self.assertStatus(400) @@ -71,7 +106,70 @@ class AuthTest(DashboardTestCase): }) self.delete_user('admin2') + # test with Cookies set + self.create_user('admin2', '', ['administrator']) + self._post("/api/auth", {'username': 'admin2', 'password': ''}, set_cookies=True) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self.delete_user('admin2') + + def test_lockout_user(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3']) + for _ in range(3): + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3']) + for _ in range(3): + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + def test_logout(self): + # test with Authorization header self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) self.assertStatus(201) data = self.jsonBody() @@ -86,7 +184,23 @@ class AuthTest(DashboardTestCase): self.assertStatus(401) self.set_jwt_token(None) + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin", data['permissions']) + self.set_jwt_token(data['token']) + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + self.assertJsonBody({ + "redirect_url": "#/login" + }) + self._get("/api/host", set_cookies=True) + self.assertStatus(401) + self.set_jwt_token(None) + def test_token_ttl(self): + # test with Authorization header self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) self.assertStatus(201) @@ -99,7 +213,21 @@ class AuthTest(DashboardTestCase): self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) self.set_jwt_token(None) + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True) + self.assertStatus(200) + time.sleep(6) + self._get("/api/host", set_cookies=True) + self.assertStatus(401) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + def test_remove_from_blacklist(self): + # test with Authorization header self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) self.assertStatus(201) @@ -119,11 +247,37 @@ class AuthTest(DashboardTestCase): self._post("/api/auth/logout") self.assertStatus(200) + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call adds the token to the blocklist + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + self._get("/api/host", set_cookies=True) + self.assertStatus(401) + time.sleep(6) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call removes expired tokens from the blocklist + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + def test_unauthorized(self): + # test with Authorization header self._get("/api/host") self.assertStatus(401) + # test with Cookies set + self._get("/api/host", set_cookies=True) + self.assertStatus(401) + def test_invalidate_token_by_admin(self): + # test with Authorization header self._get("/api/host") self.assertStatus(401) self.create_user('user', 'user', ['read-only']) @@ -147,7 +301,32 @@ class AuthTest(DashboardTestCase): self.assertStatus(200) self.delete_user("user") + # test with Cookies set + self._get("/api/host", set_cookies=True) + self.assertStatus(401) + self.create_user('user', 'user', ['read-only']) + time.sleep(1) + self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True) + self.assertStatus(200) + time.sleep(1) + self._ceph_cmd(['dashboard', 'ac-user-set-password', '--force-password', + 'user', 'user2']) + time.sleep(1) + self._get("/api/host", set_cookies=True) + self.assertStatus(401) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True) + self.assertStatus(200) + self.delete_user("user") + def test_check_token(self): + # test with Authorization header self.login("admin", "admin") self._post("/api/auth/check", {"token": self.jsonBody()["token"]}) self.assertStatus(200) @@ -160,7 +339,21 @@ class AuthTest(DashboardTestCase): }, allow_unknown=False)) self.logout() + # test with Cookies set + self.login("admin", "admin", set_cookies=True) + self._post("/api/auth/check", {"token": self.jsonBody()["token"]}, set_cookies=True) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "username": JLeaf(str), + "permissions": JObj(sub_elems={}, allow_unknown=True), + "sso": JLeaf(bool), + "pwdUpdateRequired": JLeaf(bool) + }, allow_unknown=False)) + self.logout(set_cookies=True) + def test_check_wo_token(self): + # test with Authorization header self.login("admin", "admin") self._post("/api/auth/check", {"token": ""}) self.assertStatus(200) @@ -169,3 +362,13 @@ class AuthTest(DashboardTestCase): "login_url": JLeaf(str) }, allow_unknown=False)) self.logout() + + # test with Cookies set + self.login("admin", "admin", set_cookies=True) + self._post("/api/auth/check", {"token": ""}, set_cookies=True) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "login_url": JLeaf(str) + }, allow_unknown=False)) + self.logout(set_cookies=True) diff --git a/ceph/qa/tasks/mgr/dashboard/test_osd.py b/ceph/qa/tasks/mgr/dashboard/test_osd.py index 3f6c03e85..914b84cc2 100644 --- a/ceph/qa/tasks/mgr/dashboard/test_osd.py +++ b/ceph/qa/tasks/mgr/dashboard/test_osd.py @@ -237,36 +237,139 @@ class OsdTest(DashboardTestCase): class OsdFlagsTest(DashboardTestCase): def __init__(self, *args, **kwargs): super(OsdFlagsTest, self).__init__(*args, **kwargs) - self._initial_flags = sorted( # These flags cannot be unset - ['sortbitwise', 'recovery_deletes', 'purged_snapdirs', - 'pglog_hardlimit']) + self._initial_flags = ['sortbitwise', 'recovery_deletes', 'purged_snapdirs', + 'pglog_hardlimit'] # These flags cannot be unset @classmethod - def _get_cluster_osd_flags(cls): - return sorted( - json.loads(cls._ceph_cmd(['osd', 'dump', - '--format=json']))['flags_set']) + def _put_flags(cls, flags, ids=None): + url = '/api/osd/flags' + data = {'flags': flags} - @classmethod - def _put_flags(cls, flags): - cls._put('/api/osd/flags', data={'flags': flags}) - return sorted(cls._resp.json()) + if ids: + url = url + '/individual' + data['ids'] = ids + + cls._put(url, data=data) + return cls._resp.json() def test_list_osd_flags(self): flags = self._get('/api/osd/flags') self.assertStatus(200) self.assertEqual(len(flags), 4) - self.assertEqual(sorted(flags), self._initial_flags) + self.assertCountEqual(flags, self._initial_flags) def test_add_osd_flag(self): flags = self._put_flags([ 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout', 'pause', 'pglog_hardlimit' ]) - self.assertEqual(flags, sorted([ + self.assertCountEqual(flags, [ 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout', 'pause', 'pglog_hardlimit' - ])) + ]) # Restore flags self._put_flags(self._initial_flags) + + def test_get_indiv_flag(self): + initial = self._get('/api/osd/flags/individual') + self.assertStatus(200) + self.assertSchema(initial, JList(JObj({ + 'osd': int, + 'flags': JList(str) + }))) + + self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + flags_added = self._get('/api/osd/flags/individual') + self.assertStatus(200) + for osd in flags_added: + if osd['osd'] in [0, 1, 2]: + self.assertIn('noout', osd['flags']) + self.assertIn('noin', osd['flags']) + for osd_initial in initial: + if osd['osd'] == osd_initial['osd']: + self.assertGreater(len(osd['flags']), len(osd_initial['flags'])) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + flags_removed = self._get('/api/osd/flags/individual') + self.assertStatus(200) + for osd in flags_removed: + if osd['osd'] in [0, 1, 2]: + self.assertNotIn('noout', osd['flags']) + self.assertNotIn('noin', osd['flags']) + + def test_add_indiv_flag(self): + flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': True} + svc_id = 0 + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], ['noout'], [], ['noup', 'nodown', 'noin']) + self._check_indiv_flags_osd([svc_id], ['noout'], ['noup', 'nodown', 'noin']) + + self._ceph_cmd(['osd', 'unset-group', 'noout', 'osd.{}'.format(svc_id)]) + + def test_add_multiple_indiv_flags(self): + flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True} + svc_id = 0 + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], ['noout', 'noin'], [], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown']) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.{}'.format(svc_id)]) + + def test_add_multiple_indiv_flags_multiple_osds(self): + flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True} + svc_id = [0, 1, 2] + + resp = self._put_flags(flags_update, svc_id) + self._check_indiv_flags_resp(resp, svc_id, ['noout', 'noin'], [], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown']) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + + def test_remove_indiv_flag(self): + flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': False} + svc_id = 0 + self._ceph_cmd(['osd', 'set-group', 'noout', 'osd.{}'.format(svc_id)]) + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], [], ['noout'], ['noup', 'nodown', 'noin']) + self._check_indiv_flags_osd([svc_id], [], ['noup', 'nodown', 'noin', 'noout']) + + def test_remove_multiple_indiv_flags(self): + flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False} + svc_id = 0 + self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.{}'.format(svc_id)]) + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], [], ['noout', 'noin'], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown']) + + def test_remove_multiple_indiv_flags_multiple_osds(self): + flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False} + svc_id = [0, 1, 2] + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + + resp = self._put_flags(flags_update, svc_id) + self._check_indiv_flags_resp(resp, svc_id, [], ['noout', 'noin'], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown']) + + def _check_indiv_flags_resp(self, resp, ids, added, removed, ignored): + self.assertStatus(200) + self.assertCountEqual(resp['ids'], ids) + self.assertCountEqual(resp['added'], added) + self.assertCountEqual(resp['removed'], removed) + + for flag in ignored: + self.assertNotIn(flag, resp['added']) + self.assertNotIn(flag, resp['removed']) + + def _check_indiv_flags_osd(self, ids, activated_flags, deactivated_flags): + osds = json.loads(self._ceph_cmd(['osd', 'dump', '--format=json']))['osds'] + for osd in osds: + if osd['osd'] in ids: + for flag in activated_flags: + self.assertIn(flag, osd['state']) + for flag in deactivated_flags: + self.assertNotIn(flag, osd['state']) diff --git a/ceph/qa/tasks/mgr/mgr_test_case.py b/ceph/qa/tasks/mgr/mgr_test_case.py index 37baeb20e..8687b5f29 100644 --- a/ceph/qa/tasks/mgr/mgr_test_case.py +++ b/ceph/qa/tasks/mgr/mgr_test_case.py @@ -1,6 +1,8 @@ import json import logging +from unittest import SkipTest + from teuthology import misc from tasks.ceph_test_case import CephTestCase @@ -99,7 +101,7 @@ class MgrTestCase(CephTestCase): assert cls.mgr_cluster is not None if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED: - cls.skipTest( + raise SkipTest( "Only have {0} manager daemons, {1} are required".format( len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED)) diff --git a/ceph/qa/tasks/mgr/test_dashboard.py b/ceph/qa/tasks/mgr/test_dashboard.py index 9f904a6e0..b30175f4f 100644 --- a/ceph/qa/tasks/mgr/test_dashboard.py +++ b/ceph/qa/tasks/mgr/test_dashboard.py @@ -28,6 +28,16 @@ class TestDashboard(MgrTestCase): "mgr/dashboard/standby_error_status_code", "500") + def wait_until_webserver_available(self, url): + def _check_connection(): + try: + requests.get(url, allow_redirects=False, verify=False) + return True + except requests.ConnectionError: + pass + return False + self.wait_until_true(_check_connection, timeout=30) + def test_standby(self): original_active_id = self.mgr_cluster.get_active_id() original_uri = self._get_uri("dashboard") @@ -48,6 +58,9 @@ class TestDashboard(MgrTestCase): self.assertNotEqual(original_uri, failed_over_uri) + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + # The original active daemon should have come back up as a standby # and be doing redirects to the new active daemon. r = requests.get(original_uri, allow_redirects=False, verify=False) @@ -55,7 +68,7 @@ class TestDashboard(MgrTestCase): self.assertEqual(r.headers['Location'], failed_over_uri) # Ensure that every URL redirects to the active daemon. - r = requests.get("{}/runtime.js".format(original_uri), + r = requests.get("{}/runtime.js".format(original_uri.strip('/')), allow_redirects=False, verify=False) self.assertEqual(r.status_code, 303) @@ -85,6 +98,9 @@ class TestDashboard(MgrTestCase): self.assertNotEqual(original_uri, failed_over_uri) + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + # Redirection should be disabled now, instead a 500 must be returned. r = requests.get(original_uri, allow_redirects=False, verify=False) self.assertEqual(r.status_code, 500) diff --git a/ceph/qa/tasks/mgr/test_progress.py b/ceph/qa/tasks/mgr/test_progress.py index 40cd3a0ca..9ba549e6b 100644 --- a/ceph/qa/tasks/mgr/test_progress.py +++ b/ceph/qa/tasks/mgr/test_progress.py @@ -44,6 +44,88 @@ class TestProgress(MgrTestCase): log.info(json.dumps(p, indent=2)) return p['events'] + def _completed_events(self): + """ + This function returns all events that are completed + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['completed'] + + def is_osd_marked_out(self, ev): + return ev['message'].endswith('marked out') + + def is_osd_marked_in(self, ev): + return ev['message'].endswith('marked in') + + def _get_osd_in_out_events(self, marked='both'): + """ + Return the event that deals with OSDs being + marked in, out or both + """ + + marked_in_events = [] + marked_out_events = [] + + events_in_progress = self._events_in_progress() + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_events.append(ev) + elif self.is_osd_marked_in(ev): + marked_in_events.append(ev) + + if marked == 'both': + return [marked_in_events] + [marked_out_events] + elif marked == 'in': + return marked_in_events + else: + return marked_out_events + + def _osd_in_out_events_count(self, marked='both'): + """ + Return the event that deals with OSDs being + marked in, out or both + """ + + marked_in_events = [] + marked_out_events = [] + + events_in_progress = self._events_in_progress() + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_events.append(ev) + elif self.is_osd_marked_in(ev): + marked_in_events.append(ev) + + if marked == 'both': + return [marked_in_events] + [marked_out_events] + elif marked == 'in': + return marked_in_events + else: + return marked_out_events + + def _osd_in_out_events_count(self, marked='both'): + """ + Count the number of on going recovery events that deals with + OSDs being marked in, out or both. + """ + events_in_progress = self._events_in_progress() + marked_in_count = 0 + marked_out_count = 0 + + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_count += 1 + elif self.is_osd_marked_in(ev): + marked_in_count += 1 + + if marked == 'both': + return marked_in_count + marked_out_count + elif marked == 'in': + return marked_in_count + else: + return marked_out_count + def _setup_pool(self, size=None): self.mgr_cluster.mon_manager.create_pool(self.POOL) if size is not None: @@ -105,9 +187,10 @@ class TestProgress(MgrTestCase): 'osd', 'out', str(osd_id)) # Wait for a progress event to pop up - self.wait_until_equal(lambda: len(self._all_events()), 1, - timeout=self.EVENT_CREATION_PERIOD) - ev = self._all_events()[0] + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD*2, + period=1) + ev = self._get_osd_in_out_events('out')[0] log.info(json.dumps(ev, indent=1)) self.assertIn("Rebalancing after osd.0 marked out", ev['message']) @@ -125,8 +208,9 @@ class TestProgress(MgrTestCase): try: # Wait for progress event marked in to pop up - self.wait_until_equal(lambda: len(self._events_in_progress()), 1, - timeout=self.EVENT_CREATION_PERIOD) + self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1, + timeout=self.EVENT_CREATION_PERIOD*2, + period=1) except RuntimeError as ex: if not "Timed out after" in str(ex): raise ex @@ -134,12 +218,17 @@ class TestProgress(MgrTestCase): log.info("There was no PGs affected by osd being marked in") return None - new_event = self._events_in_progress()[0] - log.info(json.dumps(new_event, indent=1)) - self.assertIn("Rebalancing after osd.0 marked in", new_event['message']) - + new_event = self._get_osd_in_out_events('in')[0] return new_event + def _no_events_anywhere(self): + """ + Whether there are any live or completed events + """ + p = self._get_progress() + total_events = len(p['events']) + len(p['completed']) + return total_events == 0 + def _is_quiet(self): """ Whether any progress events are live. @@ -261,4 +350,53 @@ class TestProgress(MgrTestCase): # Check that no event is created time.sleep(self.EVENT_CREATION_PERIOD) - self.assertEqual(len(self._all_events()), osd_count - pool_size) + self.assertEqual( + self._osd_in_out_completed_events_count('out'), + osd_count - pool_size) + + def test_turn_off_module(self): + """ + When the the module is turned off, there should not + be any on going events or completed events. + Also module should not accept any kind of Remote Event + coming in from other module, however, once it is turned + back, on creating an event should be working as it is. + """ + + pool_size = 3 + self._setup_pool(size=pool_size) + self._write_some_data(self.WRITE_PERIOD) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off") + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + time.sleep(self.EVENT_CREATION_PERIOD) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', '0') + + time.sleep(self.EVENT_CREATION_PERIOD) + + self.assertTrue(self._no_events_anywhere()) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on") + + self._write_some_data(self.WRITE_PERIOD) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + # Wait for a progress event to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD*2, + period=1) + + ev1 = self._get_osd_in_out_events('out')[0] + + log.info(json.dumps(ev1, indent=1)) + + self.wait_until_true(lambda: self._is_complete(ev1['id']), + timeout=self.RECOVERY_PERIOD) + self.assertTrue(self._is_quiet()) diff --git a/ceph/qa/tasks/radosgw_admin_rest.py b/ceph/qa/tasks/radosgw_admin_rest.py index df3fa7a1d..95fe5b8ac 100644 --- a/ceph/qa/tasks/radosgw_admin_rest.py +++ b/ceph/qa/tasks/radosgw_admin_rest.py @@ -464,6 +464,11 @@ def task(ctx, config): assert out['usage']['rgw.main']['num_objects'] == 1 assert out['usage']['rgw.main']['size_kb'] > 0 + # TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'}) + assert ret == 404 + assert out['Code'] == 'NoSuchBucket' + # reclaim it key.delete() diff --git a/ceph/qa/tasks/vstart_runner.py b/ceph/qa/tasks/vstart_runner.py index 50f5cc48f..14cc5939d 100644 --- a/ceph/qa/tasks/vstart_runner.py +++ b/ceph/qa/tasks/vstart_runner.py @@ -1268,7 +1268,10 @@ class LocalContext(object): self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id) def __del__(self): - shutil.rmtree(self.teuthology_config['test_path']) + test_path = self.teuthology_config['test_path'] + # opt_create_cluster_only does not create the test path + if test_path: + shutil.rmtree(test_path) def teardown_cluster(): log.info('\ntearing down the cluster...') diff --git a/ceph/qa/workunits/fs/misc/subvolume.sh b/ceph/qa/workunits/fs/misc/subvolume.sh new file mode 100755 index 000000000..75716a6cf --- /dev/null +++ b/ceph/qa/workunits/fs/misc/subvolume.sh @@ -0,0 +1,63 @@ +#!/bin/sh -x + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +set -e + +mkdir group +mkdir group/subvol1 + +setfattr -n ceph.dir.subvolume -v 1 group/subvol1 + +# rename subvolume +mv group/subvol1 group/subvol2 + +# move file out of the subvolume +touch group/subvol2/file1 +expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/file1')" +# move file into the subvolume +touch group/file2 +expect_failure python3 -c "import os; os.rename('group/file2', 'group/subvol2/file2')" + +# create hardlink within subvolume +ln group/subvol2/file1 group/subvol2/file1_ + +# create hardlink out of subvolume +expect_failure ln group/subvol2/file1 group/file1_ +expect_failure ln group/file2 group/subvol1/file2_ + +# create snapshot at subvolume root +mkdir group/subvol2/.snap/s1 + +# create snapshot at descendent dir of subvolume +mkdir group/subvol2/dir +expect_failure mkdir group/subvol2/dir/.snap/s2 + +mkdir group/subvol3 +setfattr -n ceph.dir.subvolume -v 1 group/subvol3 + +# move file across subvolumes +expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/subvol3/file1')" + +# create hardlink across subvolumes +expect_failure ln group/subvol2/file1 group/subvol3/file1 + +# create subvolume inside existing subvolume +expect_failure setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir + +# clear subvolume flag +setfattr -n ceph.dir.subvolume -v 0 group/subvol2 +mkdir group/subvol2/dir/.snap/s2 + +# parent subvolume override child subvolume +setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir +setfattr -n ceph.dir.subvolume -v 1 group/subvol2 +expect_failure mkdir group/subvol2/dir/.snap/s3 + +rmdir group/subvol2/.snap/s1 +rmdir group/subvol2/dir/.snap/s2 +rm -rf group + +echo OK diff --git a/ceph/qa/workunits/rbd/krbd_stable_pages_required.sh b/ceph/qa/workunits/rbd/krbd_stable_writes.sh similarity index 91% rename from ceph/qa/workunits/rbd/krbd_stable_pages_required.sh rename to ceph/qa/workunits/rbd/krbd_stable_writes.sh index 28b545f6d..d00e5fd04 100755 --- a/ceph/qa/workunits/rbd/krbd_stable_pages_required.sh +++ b/ceph/qa/workunits/rbd/krbd_stable_writes.sh @@ -8,7 +8,7 @@ function assert_dm() { local devno devno=$(sudo dmsetup info -c --noheadings -o Major,Minor $name) - grep -q $val /sys/dev/block/$devno/bdi/stable_pages_required + grep -q $val /sys/dev/block/$devno/queue/stable_writes } function dmsetup_reload() { @@ -22,7 +22,7 @@ function dmsetup_reload() { sudo dmsetup resume $name } -IMAGE_NAME="stable-pages-required-test" +IMAGE_NAME="stable-writes-test" rbd create --size 1 $IMAGE_NAME DEV=$(sudo rbd map $IMAGE_NAME) @@ -31,11 +31,11 @@ fallocate -l 1M loopfile LOOP_DEV=$(sudo losetup -f --show loopfile) [[ $(blockdev --getsize64 $DEV) -eq 1048576 ]] -grep -q 1 /sys/block/${DEV#/dev/}/bdi/stable_pages_required +grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes rbd resize --size 2 $IMAGE_NAME [[ $(blockdev --getsize64 $DEV) -eq 2097152 ]] -grep -q 1 /sys/block/${DEV#/dev/}/bdi/stable_pages_required +grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes cat <_conf.finalize_reexpand_meta(); common_init_finish(g_ceph_context); - + init_async_signal_handler(); register_async_signal_handler(SIGHUP, sighup_handler); diff --git a/ceph/src/cephadm/cephadm b/ceph/src/cephadm/cephadm index ea45474b2..8de809d75 100755 --- a/ceph/src/cephadm/cephadm +++ b/ceph/src/cephadm/cephadm @@ -48,7 +48,6 @@ import os import platform import pwd import random -import re import select import shutil import socket @@ -59,6 +58,7 @@ import tempfile import time import errno import struct +from enum import Enum try: from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO except ImportError: @@ -93,7 +93,7 @@ if sys.version_info > (3, 0): container_path = '' cached_stdin = None -DATEFMT = '%Y-%m-%dT%H:%M:%S.%f' +DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' # Log and console output config logging_config = { @@ -924,12 +924,22 @@ class FileLock(object): ################################## # Popen wrappers, lifted from ceph-volume -def call(command, # type: List[str] - desc=None, # type: Optional[str] - verbose=False, # type: bool - verbose_on_failure=True, # type: bool - timeout=DEFAULT_TIMEOUT, # type: Optional[int] - **kwargs): +class CallVerbosity(Enum): + SILENT = 0 + # log stdout/stderr to logger.debug + DEBUG = 1 + # On a non-zero exit status, it will forcefully set + # logging ON for the terminal + VERBOSE_ON_FAILURE = 2 + # log at info (instead of debug) level. + VERBOSE = 3 + + +def call(command: List[str], + desc: Optional[str] = None, + verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs) -> Tuple[str, str, int]: """ Wrap subprocess.Popen to @@ -937,14 +947,12 @@ def call(command, # type: List[str] - decode utf-8 - cleanly return out, err, returncode - If verbose=True, log at info (instead of debug) level. - - :param verbose_on_failure: On a non-zero exit status, it will forcefully set - logging ON for the terminal :param timeout: timeout in seconds """ - if not desc: + if desc is None: desc = command[0] + if desc: + desc += ': ' timeout = timeout or args.timeout logger.debug("Running command: %s" % ' '.join(command)) @@ -977,7 +985,7 @@ def call(command, # type: List[str] if end_time and (time.time() >= end_time): stop = True if process.poll() is None: - logger.info(desc + ':timeout after %s seconds' % timeout) + logger.info(desc + 'timeout after %s seconds' % timeout) process.kill() if reads and process.poll() is not None: # we want to stop, but first read off anything remaining @@ -1007,55 +1015,58 @@ def call(command, # type: List[str] lines = message.split('\n') out_buffer = lines.pop() for line in lines: - if verbose: - logger.info(desc + ':stdout ' + line) - else: - logger.debug(desc + ':stdout ' + line) + if verbosity == CallVerbosity.VERBOSE: + logger.info(desc + 'stdout ' + line) + elif verbosity != CallVerbosity.SILENT: + logger.debug(desc + 'stdout ' + line) elif fd == process.stderr.fileno(): err += message message = err_buffer + message lines = message.split('\n') err_buffer = lines.pop() for line in lines: - if verbose: - logger.info(desc + ':stderr ' + line) - else: - logger.debug(desc + ':stderr ' + line) + if verbosity == CallVerbosity.VERBOSE: + logger.info(desc + 'stderr ' + line) + elif verbosity != CallVerbosity.SILENT: + logger.debug(desc + 'stderr ' + line) else: assert False except (IOError, OSError): pass - if verbose: - logger.debug(desc + ':profile rt=%s, stop=%s, exit=%s, reads=%s' + if verbosity == CallVerbosity.VERBOSE: + logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s' % (time.time()-start_time, stop, process.poll(), reads)) returncode = process.wait() if out_buffer != '': - if verbose: - logger.info(desc + ':stdout ' + out_buffer) - else: - logger.debug(desc + ':stdout ' + out_buffer) + if verbosity == CallVerbosity.VERBOSE: + logger.info(desc + 'stdout ' + out_buffer) + elif verbosity != CallVerbosity.SILENT: + logger.debug(desc + 'stdout ' + out_buffer) if err_buffer != '': - if verbose: - logger.info(desc + ':stderr ' + err_buffer) - else: - logger.debug(desc + ':stderr ' + err_buffer) + if verbosity == CallVerbosity.VERBOSE: + logger.info(desc + 'stderr ' + err_buffer) + elif verbosity != CallVerbosity.SILENT: + logger.debug(desc + 'stderr ' + err_buffer) - if returncode != 0 and verbose_on_failure and not verbose: + if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE: # dump stdout + stderr logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command))) for line in out.splitlines(): - logger.info(desc + ':stdout ' + line) + logger.info(desc + 'stdout ' + line) for line in err.splitlines(): - logger.info(desc + ':stderr ' + line) + logger.info(desc + 'stderr ' + line) return out, err, returncode -def call_throws(command, **kwargs): - # type: (List[str], Any) -> Tuple[str, str, int] - out, err, ret = call(command, **kwargs) +def call_throws(command: List[str], + desc: Optional[str] = None, + verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs) -> Tuple[str, str, int]: + out, err, ret = call(command, desc, verbosity, timeout, **kwargs) if ret: raise RuntimeError('Failed command: %s' % ' '.join(command)) return out, err, ret @@ -1166,7 +1177,7 @@ def get_file_timestamp(fn): return datetime.datetime.fromtimestamp( mt, tz=datetime.timezone.utc ).strftime(DATEFMT) - except Exception as e: + except Exception: return None @@ -1188,11 +1199,11 @@ def try_convert_datetime(s): p = re.compile(r'(\.[\d]{6})[\d]*') s = p.sub(r'\1', s) - # replace trailling Z with -0000, since (on python 3.6.8) it won't parse + # replace trailing Z with -0000, since (on python 3.6.8) it won't parse if s and s[-1] == 'Z': s = s[:-1] + '-0000' - # cut off the redundnat 'CST' part that strptime can't parse, if + # cut off the redundant 'CST' part that strptime can't parse, if # present. v = s.split(' ') s = ' '.join(v[0:3]) @@ -1409,13 +1420,16 @@ def get_last_local_ceph_image(): [container_path, 'images', '--filter', 'label=ceph=True', '--filter', 'dangling=false', - '--format', '{{.Repository}} {{.Tag}}']) - for line in out.splitlines(): - if len(line.split()) == 2: - repository, tag = line.split() - r = '{}:{}'.format(repository, tag) - logger.info('Using recent ceph image %s' % r) - return r + '--format', '{{.Repository}}@{{.Digest}}']) + return _filter_last_local_ceph_image(out) + + +def _filter_last_local_ceph_image(out): + # str -> Optional[str] + for image in out.splitlines(): + if image and not image.endswith('@'): + logger.info('Using recent ceph image %s' % image) + return image return None @@ -1627,7 +1641,7 @@ def check_unit(unit_name): installed = False try: out, err, code = call(['systemctl', 'is-enabled', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) if code == 0: enabled = True installed = True @@ -1641,7 +1655,7 @@ def check_unit(unit_name): state = 'unknown' try: out, err, code = call(['systemctl', 'is-active', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) out = out.strip() if out in ['active']: state = 'running' @@ -2177,10 +2191,10 @@ def _write_container_cmd_to_bash(file_obj, container, comment=None, background=F # unit file, makes it easier to read and grok. file_obj.write('# ' + comment + '\n') # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually - file_obj.write('! '+ ' '.join(container.rm_cmd()) + '\n') + file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n') # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` if 'podman' in container_path: - file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + '\n') + file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n') # container run command file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n') @@ -2292,9 +2306,9 @@ def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, unit_name = get_unit_name(fsid, daemon_type, daemon_id) call(['systemctl', 'stop', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'reset-failed', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) if enable: call_throws(['systemctl', 'enable', unit_name]) if start: @@ -2339,7 +2353,7 @@ class Firewalld(object): else: return - out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbose_on_failure=False) + out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) if ret: logger.info('Enabling firewalld service %s in current zone...' % svc) out, err, ret = call([self.cmd, '--permanent', '--add-service', svc]) @@ -2357,7 +2371,7 @@ class Firewalld(object): for port in fw_ports: tcp_port = str(port) + '/tcp' - out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False) + out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) if ret: logger.info('Enabling firewalld port %s in current zone...' % tcp_port) out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port]) @@ -2367,6 +2381,7 @@ class Firewalld(object): else: logger.debug('firewalld port %s is enabled in current zone' % tcp_port) + out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False) def apply_rules(self): # type: () -> None if not self.available: @@ -2485,7 +2500,6 @@ Before=ceph-{fsid}.target LimitNOFILE=1048576 LimitNPROC=1048576 EnvironmentFile=-/etc/environment -ExecStartPre=-{container_path} rm ceph-{fsid}-%i ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run ExecStop=-{container_path} stop ceph-{fsid}-%i ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop @@ -2792,7 +2806,14 @@ def command_bootstrap(): '--allow-overwrite to overwrite' % f) dirname = os.path.dirname(f) if dirname and not os.path.exists(dirname): - raise Error('%s directory %s does not exist' % (f, dirname)) + fname = os.path.basename(f) + logger.info(f"Creating directory {dirname} for {fname}") + try: + # use makedirs to create intermediate missing dirs + os.makedirs(dirname, 0o755) + except PermissionError: + raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.") + if not args.skip_prepare_host: command_prepare_host() @@ -3608,7 +3629,7 @@ def command_ceph_volume(): privileged=True, volume_mounts=mounts, ) - out, err, code = call_throws(c.run_cmd(), verbose=True) + out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE) if not code: print(out) @@ -3626,7 +3647,10 @@ def command_unit(): call_throws([ 'systemctl', args.command, - unit_name]) + unit_name], + verbosity=CallVerbosity.VERBOSE, + desc='' + ) ################################## @@ -3813,7 +3837,7 @@ def list_daemons(detail=True, legacy_dir=None): '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field, 'ceph-%s-%s' % (fsid, j) ], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) if not code: (container_id, image_name, image_id, start, version) = out.strip().split(',') @@ -3975,7 +3999,7 @@ class AdoptOsd(object): args=['lvm', 'list', '--format=json'], privileged=True ) - out, err, code = call_throws(c.run_cmd(), verbose=False) + out, err, code = call_throws(c.run_cmd()) if not code: try: js = json.loads(out) @@ -4305,11 +4329,11 @@ def command_rm_daemon(): 'this command may destroy precious data!') call(['systemctl', 'stop', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'reset-failed', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'disable', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) data_dir = get_data_dir(args.fsid, daemon_type, daemon_id) if daemon_type in ['mon', 'osd', 'prometheus'] and \ not args.force_delete_data: @@ -4344,25 +4368,25 @@ def command_rm_cluster(): continue unit_name = get_unit_name(args.fsid, d['name']) call(['systemctl', 'stop', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'reset-failed', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'disable', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) # cluster units for unit_name in ['ceph-%s.target' % args.fsid]: call(['systemctl', 'stop', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'reset-failed', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) call(['systemctl', 'disable', unit_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-', '\\x2d')) call(['systemctl', 'stop', slice_name], - verbose_on_failure=False) + verbosity=CallVerbosity.DEBUG) # rm units call_throws(['rm', '-f', args.unit_dir + @@ -4655,13 +4679,13 @@ class Apt(Packager): def install(self, ls): logger.info('Installing packages %s...' % ls) - call_throws(['apt', 'install', '-y'] + ls) + call_throws(['apt-get', 'install', '-y'] + ls) def install_podman(self): if self.distro == 'ubuntu': logger.info('Setting up repo for podman...') self.add_kubic_repo() - call_throws(['apt', 'update']) + call_throws(['apt-get', 'update']) logger.info('Attempting podman install...') try: @@ -5436,7 +5460,6 @@ class HostFacts(): up_secs, _ = raw_time.split() return float(up_secs) - @property def kernel_security(self): # type: () -> Dict[str, str] """Determine the security features enabled in the kernel - SELinux, AppArmor""" @@ -5501,6 +5524,23 @@ class HostFacts(): "description": "Linux Security Module framework is not available" } + @property + def kernel_parameters(self): + # type: () -> Dict[str, str] + """Get kernel parameters required/used in Ceph clusters""" + + k_param = {} + out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT) + if out: + param_list = out.split('\n') + param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list} + + # return only desired parameters + if 'net.ipv4.ip_nonlocal_bind' in param_dict: + k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] + + return k_param + def dump(self): # type: () -> str """Return the attributes of this HostFacts object as json""" diff --git a/ceph/src/cephadm/tests/test_cephadm.py b/ceph/src/cephadm/tests/test_cephadm.py index c9e2769ed..5487f43b3 100644 --- a/ceph/src/cephadm/tests/test_cephadm.py +++ b/ceph/src/cephadm/tests/test_cephadm.py @@ -278,6 +278,15 @@ default via fe80::2480:28ec:5097:3fe2 dev wlp2s0 proto ra metric 20600 pref medi result = cd.dict_get_join({'a': 1}, 'a') assert result == 1 + def test_last_local_images(self): + out = ''' +docker.io/ceph/daemon-base@ +docker.io/ceph/ceph:v15.2.5 +docker.io/ceph/daemon-base:octopus + ''' + image = cd._filter_last_local_ceph_image(out) + assert image == 'docker.io/ceph/ceph:v15.2.5' + class TestCustomContainer(unittest.TestCase): cc: cd.CustomContainer diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc index e34c99b2f..06254493e 100755 --- a/ceph/src/client/Client.cc +++ b/ceph/src/client/Client.cc @@ -124,6 +124,8 @@ #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED) +using namespace TOPNSPC::common; + void client_flush_set_callback(void *p, ObjectCacher::ObjectSet *oset) { Client *client = static_cast(p); @@ -150,9 +152,11 @@ int Client::CommandHook::call( std::lock_guard l{m_client->client_lock}; if (command == "mds_requests") m_client->dump_mds_requests(f); - else if (command == "mds_sessions") - m_client->dump_mds_sessions(f); - else if (command == "dump_cache") + else if (command == "mds_sessions") { + bool cap_dump = false; + cmd_getval(cmdmap, "cap_dump", cap_dump); + m_client->dump_mds_sessions(f, cap_dump); + } else if (command == "dump_cache") m_client->dump_cache(f); else if (command == "kick_stale_sessions") m_client->_kick_stale_sessions(); @@ -468,6 +472,7 @@ void Client::dump_status(Formatter *f) f->dump_int("osd_epoch", osd_epoch); f->dump_int("osd_epoch_barrier", cap_epoch_barrier); f->dump_bool("blacklisted", blacklisted); + f->dump_string("fs_name", mdsmap->get_fs_name()); } } @@ -519,7 +524,8 @@ void Client::_finish_init() lderr(cct) << "error registering admin socket command: " << cpp_strerror(-ret) << dendl; } - ret = admin_socket->register_command("mds_sessions", + ret = admin_socket->register_command("mds_sessions " + "name=cap_dump,type=CephBool,req=false", &m_command_hook, "show mds session state"); if (ret < 0) { @@ -1534,7 +1540,7 @@ void Client::connect_mds_targets(mds_rank_t mds) } } -void Client::dump_mds_sessions(Formatter *f) +void Client::dump_mds_sessions(Formatter *f, bool cap_dump) { f->dump_int("id", get_nodeid().v); entity_inst_t inst(messenger->get_myname(), messenger->get_myaddr_legacy()); @@ -1544,7 +1550,7 @@ void Client::dump_mds_sessions(Formatter *f) f->open_array_section("sessions"); for (const auto &p : mds_sessions) { f->open_object_section("session"); - p.second.dump(f); + p.second.dump(f, cap_dump); f->close_section(); } f->close_section(); @@ -1912,6 +1918,7 @@ void Client::encode_dentry_release(Dentry *dn, MetaRequest *req, rel.item.dname_len = dn->name.length(); rel.item.dname_seq = dn->lease_seq; rel.dname = dn->name; + dn->lease_mds = -1; } ldout(cct, 25) << __func__ << " exit(dn:" << dn << ")" << dendl; @@ -4211,7 +4218,7 @@ void Client::remove_session_caps(MetaSession *s, int err) int Client::_do_remount(bool retry_on_error) { - uint64_t max_retries = g_conf().get_val("mds_max_retries_on_remount_failure"); + uint64_t max_retries = cct->_conf.get_val("mds_max_retries_on_remount_failure"); errno = 0; int r = remount_cb(callback_handle); @@ -7345,7 +7352,7 @@ unsigned Client::statx_to_mask(unsigned int flags, unsigned int want) mask |= CEPH_CAP_AUTH_SHARED; if (want & (CEPH_STATX_NLINK|CEPH_STATX_CTIME|CEPH_STATX_VERSION)) mask |= CEPH_CAP_LINK_SHARED; - if (want & (CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION)) + if (want & (CEPH_STATX_NLINK|CEPH_STATX_ATIME|CEPH_STATX_MTIME|CEPH_STATX_CTIME|CEPH_STATX_SIZE|CEPH_STATX_BLOCKS|CEPH_STATX_VERSION)) mask |= CEPH_CAP_FILE_SHARED; if (want & (CEPH_STATX_VERSION|CEPH_STATX_CTIME)) mask |= CEPH_CAP_XATTR_SHARED; @@ -8130,6 +8137,7 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, string dn_name; while (true) { + int mask = caps; if (!dirp->inode->is_complete_and_ordered()) return -EAGAIN; if (pd == dir->readdir_cache.end()) @@ -8147,7 +8155,10 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, } int idx = pd - dir->readdir_cache.begin(); - int r = _getattr(dn->inode, caps, dirp->perms); + if (dn->inode->is_dir()) { + mask |= CEPH_STAT_RSTAT; + } + int r = _getattr(dn->inode, mask, dirp->perms); if (r < 0) return r; @@ -8231,7 +8242,7 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p, uint64_t next_off = 1; int r; - r = _getattr(diri, caps, dirp->perms); + r = _getattr(diri, caps | CEPH_STAT_RSTAT, dirp->perms); if (r < 0) return r; @@ -8264,7 +8275,7 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p, in = diri->get_first_parent()->dir->parent_inode; int r; - r = _getattr(in, caps, dirp->perms); + r = _getattr(in, caps | CEPH_STAT_RSTAT, dirp->perms); if (r < 0) return r; @@ -8330,7 +8341,11 @@ int Client::readdir_r_cb(dir_result_t *d, add_dirent_cb_t cb, void *p, int r; if (check_caps) { - r = _getattr(entry.inode, caps, dirp->perms); + int mask = caps; + if(entry.inode->is_dir()){ + mask |= CEPH_STAT_RSTAT; + } + r = _getattr(entry.inode, mask, dirp->perms); if (r < 0) return r; } @@ -9224,7 +9239,7 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) int want, have = 0; bool movepos = false; std::unique_ptr onuninline; - int64_t r = 0; + int64_t rc = 0; const auto& conf = cct->_conf; Inode *in = f->inode.get(); utime_t lat; @@ -9242,8 +9257,9 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) loff_t start_pos = offset; if (in->inline_version == 0) { - r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true); + auto r = _getattr(in, CEPH_STAT_CAP_INLINE_DATA, f->actor_perms, true); if (r < 0) { + rc = r; goto done; } ceph_assert(in->inline_version > 0); @@ -9254,9 +9270,12 @@ retry: want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else want = CEPH_CAP_FILE_CACHE; - r = get_caps(f, CEPH_CAP_FILE_RD, want, &have, -1); - if (r < 0) { - goto done; + { + auto r = get_caps(f, CEPH_CAP_FILE_RD, want, &have, -1); + if (r < 0) { + rc = r; + goto done; + } } if (f->flags & O_DIRECT) have &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO); @@ -9278,12 +9297,12 @@ retry: bl->substr_of(in->inline_data, offset, len - offset); bl->append_zero(endoff - len); } - r = endoff - offset; + rc = endoff - offset; } else if ((uint64_t)offset < endoff) { bl->append_zero(endoff - offset); - r = endoff - offset; + rc = endoff - offset; } else { - r = 0; + rc = 0; } goto success; } @@ -9296,27 +9315,31 @@ retry: if (f->flags & O_RSYNC) { _flush_range(in, offset, size); } - r = _read_async(f, offset, size, bl); - if (r < 0) + rc = _read_async(f, offset, size, bl); + if (rc < 0) goto done; } else { if (f->flags & O_DIRECT) _flush_range(in, offset, size); bool checkeof = false; - r = _read_sync(f, offset, size, bl, &checkeof); - if (r < 0) + rc = _read_sync(f, offset, size, bl, &checkeof); + if (rc < 0) goto done; if (checkeof) { - offset += r; - size -= r; + offset += rc; + size -= rc; put_cap_ref(in, CEPH_CAP_FILE_RD); have = 0; // reverify size - r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms); - if (r < 0) - goto done; + { + auto r = _getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms); + if (r < 0) { + rc = r; + goto done; + } + } // eof? short read. if ((uint64_t)offset < in->size) @@ -9325,10 +9348,10 @@ retry: } success: - ceph_assert(r >= 0); + ceph_assert(rc >= 0); if (movepos) { // adjust fd pos - f->pos = start_pos + r; + f->pos = start_pos + rc; } lat = ceph_clock_now(); @@ -9348,7 +9371,7 @@ done: in->mark_caps_dirty(CEPH_CAP_FILE_WR); check_caps(in, 0); } else - r = ret; + rc = ret; } if (have) { put_cap_ref(in, CEPH_CAP_FILE_RD); @@ -9356,7 +9379,7 @@ done: if (movepos) { unlock_fh_pos(f); } - return r; + return rc; } Client::C_Readahead::C_Readahead(Client *c, Fh *f) : @@ -9874,6 +9897,8 @@ int Client::ftruncate(int fd, loff_t length, const UserPerm& perms) if (f->flags & O_PATH) return -EBADF; #endif + if ((f->mode & CEPH_FILE_MODE_WR) == 0) + return -EBADF; struct stat attr; attr.st_size = length; return _setattr(f->inode, &attr, CEPH_SETATTR_SIZE, perms); @@ -11463,6 +11488,9 @@ int Client::_getxattr(Inode *in, const char *name, void *value, size_t size, if (vxattr->flags & VXATTR_RSTAT) { flags |= CEPH_STAT_RSTAT; } + if (vxattr->flags & VXATTR_DIRSTAT) { + flags |= CEPH_CAP_FILE_SHARED; + } r = _getattr(in, flags, perms, true); if (r != 0) { // Error from getattr! @@ -11998,18 +12026,21 @@ size_t Client::_vxattrcb_snap_btime(Inode *in, char *val, size_t size) (long unsigned)in->snap_btime.nsec()); } +size_t Client::_vxattrcb_cluster_fsid(Inode *in, char *val, size_t size) +{ + return snprintf(val, size, "%s", monclient->get_fsid().to_string().c_str()); +} + +size_t Client::_vxattrcb_client_id(Inode *in, char *val, size_t size) +{ + auto name = messenger->get_myname(); + return snprintf(val, size, "%s%ld", name.type_str(), name.num()); +} + #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2 -#define XATTR_NAME_CEPH(_type, _name) \ -{ \ - name: CEPH_XATTR_NAME(_type, _name), \ - getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \ - readonly: true, \ - exists_cb: NULL, \ - flags: 0, \ -} -#define XATTR_NAME_CEPH2(_type, _name, _flags) \ +#define XATTR_NAME_CEPH(_type, _name, _flags) \ { \ name: CEPH_XATTR_NAME(_type, _name), \ getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \ @@ -12047,14 +12078,14 @@ const Client::VXattr Client::_dir_vxattrs[] = { XATTR_LAYOUT_FIELD(dir, layout, object_size), XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), - XATTR_NAME_CEPH(dir, entries), - XATTR_NAME_CEPH(dir, files), - XATTR_NAME_CEPH(dir, subdirs), - XATTR_NAME_CEPH2(dir, rentries, VXATTR_RSTAT), - XATTR_NAME_CEPH2(dir, rfiles, VXATTR_RSTAT), - XATTR_NAME_CEPH2(dir, rsubdirs, VXATTR_RSTAT), - XATTR_NAME_CEPH2(dir, rbytes, VXATTR_RSTAT), - XATTR_NAME_CEPH2(dir, rctime, VXATTR_RSTAT), + XATTR_NAME_CEPH(dir, entries, VXATTR_DIRSTAT), + XATTR_NAME_CEPH(dir, files, VXATTR_DIRSTAT), + XATTR_NAME_CEPH(dir, subdirs, VXATTR_DIRSTAT), + XATTR_NAME_CEPH(dir, rentries, VXATTR_RSTAT), + XATTR_NAME_CEPH(dir, rfiles, VXATTR_RSTAT), + XATTR_NAME_CEPH(dir, rsubdirs, VXATTR_RSTAT), + XATTR_NAME_CEPH(dir, rbytes, VXATTR_RSTAT), + XATTR_NAME_CEPH(dir, rctime, VXATTR_RSTAT), { name: "ceph.quota", getxattr_cb: &Client::_vxattrcb_quota, @@ -12104,6 +12135,24 @@ const Client::VXattr Client::_file_vxattrs[] = { { name: "" } /* Required table terminator */ }; +const Client::VXattr Client::_common_vxattrs[] = { + { + name: "ceph.cluster_fsid", + getxattr_cb: &Client::_vxattrcb_cluster_fsid, + readonly: true, + exists_cb: nullptr, + flags: 0, + }, + { + name: "ceph.client_id", + getxattr_cb: &Client::_vxattrcb_client_id, + readonly: true, + exists_cb: nullptr, + flags: 0, + }, + { name: "" } /* Required table terminator */ +}; + const Client::VXattr *Client::_get_vxattrs(Inode *in) { if (in->is_dir()) @@ -12124,7 +12173,16 @@ const Client::VXattr *Client::_match_vxattr(Inode *in, const char *name) vxattr++; } } + + // for common vxattrs + vxattr = _common_vxattrs; + while (!vxattr->name.empty()) { + if (vxattr->name == name) + return vxattr; + vxattr++; + } } + return NULL; } diff --git a/ceph/src/client/Client.h b/ceph/src/client/Client.h index c8ea7aa5f..762a5ca86 100644 --- a/ceph/src/client/Client.h +++ b/ceph/src/client/Client.h @@ -778,7 +778,7 @@ protected: void _sync_write_commit(Inode *in); void dump_mds_requests(Formatter *f); - void dump_mds_sessions(Formatter *f); + void dump_mds_sessions(Formatter *f, bool cap_dump=false); int make_request(MetaRequest *req, const UserPerm& perms, InodeRef *ptarget = 0, bool *pcreated = 0, @@ -1007,9 +1007,11 @@ private: /* Flags for VXattr */ static const unsigned VXATTR_RSTAT = 0x1; + static const unsigned VXATTR_DIRSTAT = 0x2; static const VXattr _dir_vxattrs[]; static const VXattr _file_vxattrs[]; + static const VXattr _common_vxattrs[]; @@ -1159,6 +1161,9 @@ private: bool _vxattrcb_snap_btime_exists(Inode *in); size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size); + size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size); + size_t _vxattrcb_client_id(Inode *in, char *val, size_t size); + static const VXattr *_get_vxattrs(Inode *in); static const VXattr *_match_vxattr(Inode *in, const char *name); diff --git a/ceph/src/client/MetaSession.cc b/ceph/src/client/MetaSession.cc index 177be1939..b5160a843 100644 --- a/ceph/src/client/MetaSession.cc +++ b/ceph/src/client/MetaSession.cc @@ -5,6 +5,7 @@ #include "messages/MClientCapRelease.h" #include "MetaSession.h" +#include "Inode.h" #include "common/Formatter.h" @@ -21,7 +22,7 @@ const char *MetaSession::get_state_name() const } } -void MetaSession::dump(Formatter *f) const +void MetaSession::dump(Formatter *f, bool cap_dump) const { f->dump_int("mds", mds_num); f->dump_object("addrs", addrs); @@ -31,6 +32,13 @@ void MetaSession::dump(Formatter *f) const f->dump_stream("last_cap_renew_request") << last_cap_renew_request; f->dump_unsigned("cap_renew_seq", cap_renew_seq); f->dump_int("num_caps", caps.size()); + if (cap_dump) { + f->open_array_section("caps"); + for (const auto& cap : caps) { + f->dump_object("cap", *cap); + } + f->close_section(); + } f->dump_string("state", get_state_name()); } diff --git a/ceph/src/client/MetaSession.h b/ceph/src/client/MetaSession.h index c0901305a..c215b2689 100644 --- a/ceph/src/client/MetaSession.h +++ b/ceph/src/client/MetaSession.h @@ -66,7 +66,7 @@ struct MetaSession { const char *get_state_name() const; - void dump(Formatter *f) const; + void dump(Formatter *f, bool cap_dump=false) const; void enqueue_cap_release(inodeno_t ino, uint64_t cap_id, ceph_seq_t iseq, ceph_seq_t mseq, epoch_t osd_barrier); diff --git a/ceph/src/client/fuse_ll.cc b/ceph/src/client/fuse_ll.cc index 82e979ffe..1e6bca4fb 100644 --- a/ceph/src/client/fuse_ll.cc +++ b/ceph/src/client/fuse_ll.cc @@ -135,7 +135,8 @@ static int getgroups(fuse_req_t req, gid_t **sgids) static void get_fuse_groups(UserPerm& perms, fuse_req_t req) { - if (g_conf().get_val("fuse_set_user_groups")) { + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + if (cfuse->client->cct->_conf.get_val("fuse_set_user_groups")) { gid_t *gids = NULL; int count = getgroups(req, &gids); diff --git a/ceph/src/cls/rgw/cls_rgw.cc b/ceph/src/cls/rgw/cls_rgw.cc index cba9a2d73..84e536af0 100644 --- a/ceph/src/cls/rgw/cls_rgw.cc +++ b/ceph/src/cls/rgw/cls_rgw.cc @@ -623,6 +623,7 @@ static int check_index(cls_method_context_t hctx, calc_header->tag_timeout = existing_header->tag_timeout; calc_header->ver = existing_header->ver; + calc_header->syncstopped = existing_header->syncstopped; map keys; string start_obj; @@ -968,9 +969,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist entry.index_ver = header.ver; /* resetting entry flags, entry might have been previously a delete * marker */ - entry.flags = (entry.key.instance.empty() ? - 0 : - rgw_bucket_dir_entry::FLAG_VER); + entry.flags &= rgw_bucket_dir_entry::FLAG_VER; if (op.tag.size()) { map::iterator pinter = entry.pending_map.find(op.tag); diff --git a/ceph/src/cls/user/cls_user.cc b/ceph/src/cls/user/cls_user.cc index 64018fa1b..f6f7ab032 100644 --- a/ceph/src/cls/user/cls_user.cc +++ b/ceph/src/cls/user/cls_user.cc @@ -414,6 +414,9 @@ static int cls_user_reset_stats(cls_method_context_t hctx, } add_header_stats(&header.stats, e); } + if (!keys.empty()) { + from_index = keys.rbegin()->first; + } } while (truncated); bufferlist bl; diff --git a/ceph/src/common/buffer.cc b/ceph/src/common/buffer.cc index 0446ec88c..b98427799 100644 --- a/ceph/src/common/buffer.cc +++ b/ceph/src/common/buffer.cc @@ -1165,6 +1165,8 @@ static ceph::spinlock debug_lock; std::unique_ptr nb) { unsigned pos = 0; + int mempool = _buffers.front().get_mempool(); + nb->reassign_to_mempool(mempool); for (auto& node : _buffers) { nb->copy_in(pos, node.length(), node.c_str(), false); pos += node.length(); diff --git a/ceph/src/common/config.cc b/ceph/src/common/config.cc index dd4958d40..53007f925 100644 --- a/ceph/src/common/config.cc +++ b/ceph/src/common/config.cc @@ -1107,17 +1107,19 @@ void md_config_t::early_expand_meta( bool md_config_t::finalize_reexpand_meta(ConfigValues& values, const ConfigTracker& tracker) { - for (auto& [name, value] : may_reexpand_meta) { - set_val(values, tracker, name, value); - } - - if (!may_reexpand_meta.empty()) { - // meta expands could have modified anything. Copy it all out again. - update_legacy_vals(values); - return true; - } else { - return false; + std::vector reexpands; + reexpands.swap(may_reexpand_meta); + for (auto& name : reexpands) { + // always refresh the options if they are in the may_reexpand_meta + // map, because the options may have already been expanded with old + // meta. + const auto &opt_iter = schema.find(name); + ceph_assert(opt_iter != schema.end()); + const Option &opt = opt_iter->second; + _refresh(values, opt); } + + return !may_reexpand_meta.empty(); } Option::value_t md_config_t::_expand_meta( @@ -1201,7 +1203,7 @@ Option::value_t md_config_t::_expand_meta( } else if (var == "pid") { out += stringify(getpid()); if (o) { - may_reexpand_meta[o->name] = *str; + may_reexpand_meta.push_back(o->name); } } else if (var == "cctid") { out += stringify((unsigned long long)this); diff --git a/ceph/src/common/config.h b/ceph/src/common/config.h index e07993701..7bbf8bb74 100644 --- a/ceph/src/common/config.h +++ b/ceph/src/common/config.h @@ -100,7 +100,7 @@ public: std::map ignored_mon_values; /// original raw values saved that may need to re-expand at certain time - mutable std::map may_reexpand_meta; + mutable std::vector may_reexpand_meta; /// encoded, cached copy of of values + ignored_mon_values ceph::bufferlist values_bl; diff --git a/ceph/src/common/config_proxy.h b/ceph/src/common/config_proxy.h index 9c0850fd0..7ca5a54af 100644 --- a/ceph/src/common/config_proxy.h +++ b/ceph/src/common/config_proxy.h @@ -199,7 +199,6 @@ public: rev_obs_map_t rev_obs; if (config.finalize_reexpand_meta(values, obs_mgr)) { _gather_changes(values.changed, &rev_obs, nullptr); - values.changed.clear(); } call_observers(locker, rev_obs); @@ -256,7 +255,6 @@ public: if (!values.cluster.empty()) { // meta expands could have modified anything. Copy it all out again. _gather_changes(values.changed, &rev_obs, oss); - values.changed.clear(); } call_observers(locker, rev_obs); @@ -268,6 +266,7 @@ public: [this, rev_obs](md_config_obs_t *obs, const std::string &key) { map_observer_changes(obs, key, rev_obs); }, oss); + changes.clear(); } int set_val(const std::string_view key, const std::string& s, std::stringstream* err_ss=nullptr) { @@ -290,7 +289,6 @@ public: rev_obs_map_t rev_obs; _gather_changes(values.changed, &rev_obs, nullptr); - values.changed.clear(); call_observers(locker, rev_obs); return ret; @@ -301,7 +299,6 @@ public: rev_obs_map_t rev_obs; _gather_changes(values.changed, &rev_obs, oss); - values.changed.clear(); call_observers(locker, rev_obs); return ret; diff --git a/ceph/src/common/legacy_config_opts.h b/ceph/src/common/legacy_config_opts.h index b8bf69595..68364bbb9 100644 --- a/ceph/src/common/legacy_config_opts.h +++ b/ceph/src/common/legacy_config_opts.h @@ -898,6 +898,8 @@ OPTION(bdev_nvme_unbind_from_kernel, OPT_BOOL) OPTION(bdev_nvme_retry_count, OPT_INT) // -1 means by default which is 4 OPTION(bdev_enable_discard, OPT_BOOL) OPTION(bdev_async_discard, OPT_BOOL) +OPTION(bdev_flock_retry_interval, OPT_FLOAT) +OPTION(bdev_flock_retry, OPT_INT) OPTION(objectstore_blackhole, OPT_BOOL) @@ -1063,6 +1065,7 @@ OPTION(bluestore_debug_enforce_settings, OPT_STR) OPTION(bluestore_volume_selection_policy, OPT_STR) OPTION(bluestore_volume_selection_reserved_factor, OPT_DOUBLE) OPTION(bluestore_volume_selection_reserved, OPT_INT) +OPTION(bluestore_kv_sync_util_logging_s, OPT_DOUBLE) OPTION(kstore_max_ops, OPT_U64) OPTION(kstore_max_bytes, OPT_U64) @@ -1443,6 +1446,7 @@ OPTION(rgw_curl_low_speed_limit, OPT_INT) // low speed limit for certain curl ca OPTION(rgw_curl_low_speed_time, OPT_INT) // low speed time for certain curl calls OPTION(rgw_copy_obj_progress, OPT_BOOL) // should dump progress during long copy operations? OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT) // min bytes between copy progress output +OPTION(rgw_sync_obj_etag_verify, OPT_BOOL) // verify if the copied object from remote is identical to source OPTION(rgw_obj_tombstone_cache_size, OPT_INT) // how many objects in tombstone cache, which is used in multi-zone sync to keep // track of removed objects' mtime diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc index 66d4ee9d0..693ed4c5f 100644 --- a/ceph/src/common/options.cc +++ b/ceph/src/common/options.cc @@ -3539,11 +3539,11 @@ std::vector