From b5b8bbf502232d622a7da9967b7d060f200f8a5a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fabian=20Gr=C3=BCnbichler?= Date: Wed, 30 Aug 2017 08:42:40 +0200 Subject: [PATCH] update sources to v12.2.0 --- ceph/CMakeLists.txt | 4 +- ceph/alpine/APKBUILD | 6 +- ceph/ceph.spec | 8 +- ceph/ceph.spec.in | 2 + ceph/cmake/modules/FindLZ4.cmake | 21 +- ceph/debian/ceph-osd.install | 3 + ceph/debian/changelog | 6 + ceph/doc/ceph-volume/index.rst | 30 + ceph/doc/ceph-volume/intro.rst | 19 + ceph/doc/ceph-volume/lvm/activate.rst | 74 +++ ceph/doc/ceph-volume/lvm/index.rst | 24 + ceph/doc/ceph-volume/lvm/prepare.rst | 145 +++++ ceph/doc/ceph-volume/lvm/scan.rst | 9 + ceph/doc/ceph-volume/lvm/systemd.rst | 46 ++ ceph/doc/conf.py | 2 + ceph/doc/dev/ceph-volume/index.rst | 13 + ceph/doc/dev/ceph-volume/lvm.rst | 127 ++++ ceph/doc/dev/ceph-volume/plugins.rst | 65 ++ ceph/doc/dev/ceph-volume/systemd.rst | 37 ++ ceph/doc/dev/index-old.rst | 1 + ceph/doc/glossary.rst | 77 ++- ceph/doc/index.rst | 1 + ceph/doc/install/manual-deployment.rst | 128 ++-- ceph/doc/man/8/CMakeLists.txt | 2 + ceph/doc/man/8/ceph-volume-systemd.rst | 56 ++ ceph/doc/man/8/ceph-volume.rst | 122 ++++ ceph/doc/man/8/ceph.rst | 14 +- ceph/doc/mgr/administrator.rst | 3 +- .../rados/command/list-inconsistent-obj.json | 14 +- ceph/doc/rados/man/index.rst | 2 + ceph/doc/rbd/rbd-mirroring.rst | 7 + ceph/qa/machine_types/schedule_subset.sh | 12 +- ceph/qa/run-standalone.sh | 114 +++- ceph/qa/standalone/README | 11 +- ceph/qa/standalone/ceph-helpers.sh | 97 ++- .../erasure-code/test-erasure-code.sh | 14 +- .../erasure-code/test-erasure-eio.sh | 22 +- ceph/qa/standalone/misc/test-ceph-helpers.sh | 2 +- ceph/qa/standalone/mon/misc.sh | 4 +- ceph/qa/standalone/mon/mon-bind.sh | 2 +- .../mon/osd-erasure-code-profile.sh | 2 +- ceph/qa/standalone/mon/test_pool_quota.sh | 2 +- ceph/qa/standalone/osd/osd-dup.sh | 4 +- .../qa/standalone/scrub/osd-recovery-scrub.sh | 129 ++++ ceph/qa/standalone/scrub/osd-scrub-repair.sh | 604 ++++++++++++------ ceph/qa/standalone/scrub/osd-scrub-snaps.sh | 12 +- ceph/qa/standalone/special/test-failure.sh | 48 ++ .../powercycle/osd/whitelist_health.yaml | 1 + .../1-jewel-install/jewel.yaml | 2 + .../jewel-x/parallel/0-cluster/start.yaml | 4 +- .../parallel/1-jewel-install/jewel.yaml | 1 + .../parallel/2-workload/rados_loadgenbig.yaml | 11 - .../stress-split/1-jewel-install/jewel.yaml | 2 + ceph/qa/tasks/ceph_deploy.py | 4 +- ceph/qa/tasks/rgw.py | 6 +- ceph/qa/tasks/rgw_multisite.py | 4 +- ceph/qa/tasks/util/rados.py | 18 +- ceph/qa/workunits/mon/crush_ops.sh | 14 + ceph/qa/workunits/rbd/rbd_mirror.sh | 18 + ceph/qa/workunits/rbd/rbd_mirror_helpers.sh | 10 + ceph/src/.git_version | 4 +- .../ceph_volume/devices/lvm/api.py | 118 ++-- .../ceph_volume/devices/lvm/common.py | 6 +- .../ceph_volume/devices/lvm/create.py | 18 +- .../ceph_volume/devices/lvm/prepare.py | 109 ++-- .../ceph-volume/ceph_volume/systemd/main.py | 1 - .../ceph_volume/tests/devices/lvm/test_api.py | 43 +- .../tests/devices/lvm/test_prepare.py | 15 + .../functional/centos7/create/group_vars/all | 4 +- .../functional/xenial/create/group_vars/all | 4 +- .../tests/util/test_arg_validators.py | 24 + .../ceph_volume/util/arg_validators.py | 29 + ceph/src/ceph_mgr.cc | 3 + ceph/src/client/Client.cc | 10 +- ceph/src/client/Client.h | 1 + ceph/src/client/fuse_ll.cc | 11 +- ceph/src/cls/log/cls_log.cc | 4 +- ceph/src/common/AsyncReserver.h | 10 + ceph/src/common/LogClient.cc | 19 +- ceph/src/common/LogClient.h | 2 +- ceph/src/common/ipaddr.cc | 10 +- ceph/src/common/options.cc | 2 +- ceph/src/common/pick_address.cc | 31 +- ceph/src/common/pick_address.h | 6 + ceph/src/common/scrub_types.cc | 13 +- ceph/src/common/scrub_types.h | 6 +- ceph/src/compressor/Compressor.cc | 4 + ceph/src/compressor/Compressor.h | 2 + ceph/src/crush/CrushCompiler.cc | 13 +- ceph/src/crush/CrushWrapper.cc | 96 ++- ceph/src/crush/CrushWrapper.h | 4 + ceph/src/include/ipaddr.h | 2 +- ceph/src/include/rados/rados_types.hpp | 9 +- ceph/src/mds/MDSDaemon.cc | 40 +- ceph/src/messages/MStatfs.h | 4 +- ceph/src/mon/FSCommands.cc | 13 +- ceph/src/mon/MonCommands.h | 9 + ceph/src/mon/Monitor.cc | 9 +- ceph/src/mon/OSDMonitor.cc | 102 ++- ceph/src/mon/PGMap.cc | 9 +- ceph/src/mon/mon_types.h | 14 +- ceph/src/os/bluestore/BlueStore.cc | 86 ++- ceph/src/osd/OSD.cc | 27 +- ceph/src/osd/OSDMapMapping.h | 2 +- ceph/src/osd/PG.cc | 18 +- ceph/src/osd/PGBackend.cc | 81 ++- ceph/src/osd/PrimaryLogPG.cc | 4 + ceph/src/pybind/mgr/dashboard/rbd_iscsi.py | 19 +- ceph/src/rgw/rgw_auth_s3.cc | 18 +- ceph/src/rgw/rgw_op.cc | 8 +- ceph/src/rgw/rgw_quota.cc | 3 + ceph/src/rgw/rgw_rest.cc | 2 +- ceph/src/test/CMakeLists.txt | 5 +- ceph/src/test/cli-integration/rbd/unmap.t | 1 + ceph/src/test/test_ipaddr.cc | 34 +- ceph/src/tools/ceph_objectstore_tool.cc | 12 +- ceph/src/tools/rados/rados.cc | 13 +- .../image_replayer/CreateImageRequest.cc | 2 + 118 files changed, 2659 insertions(+), 751 deletions(-) create mode 100644 ceph/doc/ceph-volume/index.rst create mode 100644 ceph/doc/ceph-volume/intro.rst create mode 100644 ceph/doc/ceph-volume/lvm/activate.rst create mode 100644 ceph/doc/ceph-volume/lvm/index.rst create mode 100644 ceph/doc/ceph-volume/lvm/prepare.rst create mode 100644 ceph/doc/ceph-volume/lvm/scan.rst create mode 100644 ceph/doc/ceph-volume/lvm/systemd.rst create mode 100644 ceph/doc/dev/ceph-volume/index.rst create mode 100644 ceph/doc/dev/ceph-volume/lvm.rst create mode 100644 ceph/doc/dev/ceph-volume/plugins.rst create mode 100644 ceph/doc/dev/ceph-volume/systemd.rst create mode 100644 ceph/doc/man/8/ceph-volume-systemd.rst create mode 100644 ceph/doc/man/8/ceph-volume.rst create mode 100755 ceph/qa/standalone/scrub/osd-recovery-scrub.sh create mode 100755 ceph/qa/standalone/special/test-failure.sh delete mode 100644 ceph/qa/suites/upgrade/jewel-x/parallel/2-workload/rados_loadgenbig.yaml create mode 100644 ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py create mode 100644 ceph/src/ceph-volume/ceph_volume/util/arg_validators.py diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt index bc7dd87ee..17468f255 100644 --- a/ceph/CMakeLists.txt +++ b/ceph/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.11) project(ceph) -set(VERSION 12.1.4) +set(VERSION 12.2.0) if(POLICY CMP0046) # Tweak policies (this one disables "missing" dependency warning) @@ -274,7 +274,7 @@ find_package(snappy REQUIRED) option(WITH_LZ4 "LZ4 compression support" OFF) if(WITH_LZ4) - find_package(LZ4 REQUIRED) + find_package(LZ4 1.7 REQUIRED) set(HAVE_LZ4 ${LZ4_FOUND}) endif(WITH_LZ4) diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD index 48999f82c..ac9156f6d 100644 --- a/ceph/alpine/APKBUILD +++ b/ceph/alpine/APKBUILD @@ -1,7 +1,7 @@ # Contributor: John Coyle # Maintainer: John Coyle pkgname=ceph -pkgver=12.1.4 +pkgver=12.2.0 pkgrel=0 pkgdesc="Ceph is a distributed object store and file system" pkgusers="ceph" @@ -63,7 +63,7 @@ makedepends=" xmlstarlet yasm " -source="ceph-12.1.4.tar.bz2" +source="ceph-12.2.0.tar.bz2" subpackages=" $pkgname-base $pkgname-common @@ -116,7 +116,7 @@ _sysconfdir=/etc _udevrulesdir=/etc/udev/rules.d _python_sitelib=/usr/lib/python2.7/site-packages -builddir=$srcdir/ceph-12.1.4 +builddir=$srcdir/ceph-12.2.0 build() { export CEPH_BUILD_VIRTUALENV=$builddir diff --git a/ceph/ceph.spec b/ceph/ceph.spec index cd0076237..458191e58 100644 --- a/ceph/ceph.spec +++ b/ceph/ceph.spec @@ -61,7 +61,7 @@ # main package definition ################################################################################# Name: ceph -Version: 12.1.4 +Version: 12.2.0 Release: 0%{?dist} %if 0%{?fedora} || 0%{?rhel} Epoch: 2 @@ -76,7 +76,7 @@ License: LGPL-2.1 and CC-BY-SA-1.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and Group: System/Filesystems %endif URL: http://ceph.com/ -Source0: http://ceph.com/download/ceph-12.1.4.tar.bz2 +Source0: http://ceph.com/download/ceph-12.2.0.tar.bz2 %if 0%{?suse_version} %if 0%{?is_opensuse} ExclusiveArch: x86_64 aarch64 ppc64 ppc64le @@ -772,7 +772,7 @@ python-rbd, python-rgw or python-cephfs instead. # common ################################################################################# %prep -%autosetup -p1 -n ceph-12.1.4 +%autosetup -p1 -n ceph-12.2.0 %build %if 0%{with cephfs_java} @@ -975,6 +975,8 @@ rm -rf %{buildroot} %{_mandir}/man8/ceph-detect-init.8* %{_mandir}/man8/ceph-create-keys.8* %{_mandir}/man8/ceph-disk.8* +%{_mandir}/man8/ceph-volume.8* +%{_mandir}/man8/ceph-volume-systemd.8* %{_mandir}/man8/ceph-run.8* %{_mandir}/man8/crushtool.8* %{_mandir}/man8/osdmaptool.8* diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in index 66babf6c2..4749bc717 100644 --- a/ceph/ceph.spec.in +++ b/ceph/ceph.spec.in @@ -975,6 +975,8 @@ rm -rf %{buildroot} %{_mandir}/man8/ceph-detect-init.8* %{_mandir}/man8/ceph-create-keys.8* %{_mandir}/man8/ceph-disk.8* +%{_mandir}/man8/ceph-volume.8* +%{_mandir}/man8/ceph-volume-systemd.8* %{_mandir}/man8/ceph-run.8* %{_mandir}/man8/crushtool.8* %{_mandir}/man8/osdmaptool.8* diff --git a/ceph/cmake/modules/FindLZ4.cmake b/ceph/cmake/modules/FindLZ4.cmake index 562586532..27d4bc585 100644 --- a/ceph/cmake/modules/FindLZ4.cmake +++ b/ceph/cmake/modules/FindLZ4.cmake @@ -5,11 +5,30 @@ # LZ4_FOUND # LZ4_INCLUDE_DIR # LZ4_LIBRARY +# LZ4_VERSION_STRING +# LZ4_VERSION_MAJOR +# LZ4_VERSION_MINOR +# LZ4_VERSION_RELEASE find_path(LZ4_INCLUDE_DIR NAMES lz4.h) + +if(LZ4_INCLUDE_DIR AND EXISTS "${LZ4_INCLUDE_DIR}/lz4.h") + foreach(ver "MAJOR" "MINOR" "RELEASE") + file(STRINGS "${LZ4_INCLUDE_DIR}/lz4.h" LZ4_VER_${ver}_LINE + REGEX "^#define[ \t]+LZ4_VERSION_${ver}[ \t]+[0-9]+[ \t]+.*$") + string(REGEX REPLACE "^#define[ \t]+LZ4_VERSION_${ver}[ \t]+([0-9]+)[ \t]+.*$" + "\\1" LZ4_VERSION_${ver} "${LZ4_VER_${ver}_LINE}") + unset(${LZ4_VER_${ver}_LINE}) + endforeach() + set(LZ4_VERSION_STRING + "${LZ4_VERSION_MAJOR}.${LZ4_VERSION_MINOR}.${LZ4_VERSION_RELEASE}") +endif() + find_library(LZ4_LIBRARY NAMES lz4) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(LZ4 DEFAULT_MSG LZ4_LIBRARY LZ4_INCLUDE_DIR) +find_package_handle_standard_args(LZ4 + REQUIRED_VARS LZ4_LIBRARY LZ4_INCLUDE_DIR + VERSION_VAR LZ4_VERSION_STRING) mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY) diff --git a/ceph/debian/ceph-osd.install b/ceph/debian/ceph-osd.install index bda15ad17..262082cfd 100644 --- a/ceph/debian/ceph-osd.install +++ b/ceph/debian/ceph-osd.install @@ -11,8 +11,11 @@ usr/lib/libosd_tp.so* usr/lib/python*/dist-packages/ceph_disk* usr/sbin/ceph-disk usr/sbin/ceph-volume +usr/sbin/ceph-volume-systemd usr/lib/python*/dist-packages/ceph_volume/* usr/lib/python*/dist-packages/ceph_volume-* usr/share/man/man8/ceph-clsinfo.8 usr/share/man/man8/ceph-disk.8 +usr/share/man/man8/ceph-volume.8 +usr/share/man/man8/ceph-volume-systemd.8 usr/share/man/man8/ceph-osd.8 diff --git a/ceph/debian/changelog b/ceph/debian/changelog index f6ac9bf92..3fef34d2b 100644 --- a/ceph/debian/changelog +++ b/ceph/debian/changelog @@ -1,3 +1,9 @@ +ceph (12.2.0-1) stable; urgency=medium + + * New upstream release + + -- Ceph Release Team Mon, 28 Aug 2017 16:30:16 +0000 + ceph (12.1.4-1) stable; urgency=medium * New upstream release diff --git a/ceph/doc/ceph-volume/index.rst b/ceph/doc/ceph-volume/index.rst new file mode 100644 index 000000000..d34e80294 --- /dev/null +++ b/ceph/doc/ceph-volume/index.rst @@ -0,0 +1,30 @@ +.. _ceph-volume: + +ceph-volume +=========== +Deploy OSDs with different device technologies like lvm or physical disks using +pluggable tools (:doc:`lvm/index` itself is treated like a plugin). It tries to +follow the workflow of ``ceph-disk`` for deploying OSDs, with a predictable, +and robust way of preparing, activating, and starting OSDs. + +:ref:`Overview ` | +:ref:`Plugin Guide ` | + + +**Command Line Subcommands** +Although currently there is support for ``lvm``, the plan is to support other +technologies, including plain disks. + +* :ref:`ceph-volume-lvm` + +.. toctree:: + :hidden: + :maxdepth: 3 + :caption: Contents: + + intro + lvm/index + lvm/activate + lvm/prepare + lvm/scan + lvm/systemd diff --git a/ceph/doc/ceph-volume/intro.rst b/ceph/doc/ceph-volume/intro.rst new file mode 100644 index 000000000..386914888 --- /dev/null +++ b/ceph/doc/ceph-volume/intro.rst @@ -0,0 +1,19 @@ +.. _ceph-volume-overview: + +Overview +-------- +The ``ceph-volume`` tool aims to be a single purpose command line tool to deploy +logical volumes as OSDs, trying to maintain a similar API to ``ceph-disk`` when +preparing, activating, and creating OSDs. + +It deviates from ``ceph-disk`` by not interacting or relying on the udev rules +that come installed for Ceph. These rules allow automatic detection of +previously setup devices that are in turn fed into ``ceph-disk`` to activate +them. + + +``ceph-volume lvm`` +------------------- +By making use of :term:`LVM tags`, the :ref:`ceph-volume-lvm` sub-command is +able to store and later re-discover and query devices associated with OSDs so +that they can later activated. diff --git a/ceph/doc/ceph-volume/lvm/activate.rst b/ceph/doc/ceph-volume/lvm/activate.rst new file mode 100644 index 000000000..b9f30d69f --- /dev/null +++ b/ceph/doc/ceph-volume/lvm/activate.rst @@ -0,0 +1,74 @@ +.. _ceph-volume-lvm-activate: + +``activate`` +============ +Once :ref:`ceph-volume-lvm-prepare` is completed, and all the various steps +that entails are done, the volume is ready to get "activated". + +This activation process enables a systemd unit that persists the OSD ID and its +UUID (also called ``fsid`` in Ceph CLI tools), so that at boot time it can +understand what OSD is enabled and needs to be mounted. + +.. note:: The execution of this call is fully idempotent, and there is no + side-effects when running multiple times + +New OSDs +-------- +To activate newly prepared OSDs both the :term:`OSD id` and :term:`OSD uuid` +need to be supplied. For example:: + + ceph-volume activate --filestore 0 0263644D-0BF1-4D6D-BC34-28BD98AE3BC8 + +.. note:: The UUID is stored in the ``osd_fsid`` file in the OSD path, which is + generated when :ref:`ceph-volume-lvm-prepare` is used. + +requiring uuids +^^^^^^^^^^^^^^^ +The :term:`OSD uuid` is being required as an extra step to ensure that the +right OSD is being activated. It is entirely possible that a previous OSD with +the same id exists and would end up activating the incorrect one. + + +Discovery +--------- +With either existing OSDs or new ones being activated, a *discovery* process is +performed using :term:`LVM tags` to enable the systemd units. + +The systemd unit will capture the :term:`OSD id` and :term:`OSD uuid` and +persist it. Internally, the activation will enable it like:: + + systemctl enable ceph-volume@$id-$uuid-lvm + +For example:: + + systemctl enable ceph-volume@0-8715BEB4-15C5-49DE-BA6F-401086EC7B41-lvm + +Would start the discovery process for the OSD with an id of ``0`` and a UUID of +``8715BEB4-15C5-49DE-BA6F-401086EC7B41``. + +.. note:: for more details on the systemd workflow see :ref:`ceph-volume-systemd` + +The systemd unit will look for the matching OSD device, and by looking at its +:term:`LVM tags` will proceed to: + +# mount the device in the corresponding location (by convention this is + ``/var/lib/ceph/osd/-/``) + +# ensure that all required devices are ready for that OSD + +# start the ``ceph-osd@0`` systemd unit + +Existing OSDs +------------- +For exsiting OSDs that have been deployed with different tooling, the only way +to port them over to the new mechanism is to prepare them again (losing data). +See :ref:`ceph-volume-lvm-existing-osds` for details on how to proceed. + +Summary +------- +To recap the ``activate`` process: + +#. require both :term:`OSD id` and :term:`OSD uuid` +#. enable the system unit with matching id and uuid +#. the systemd unit will ensure all devices are ready and mounted (if needed) +#. the matching ``ceph-osd`` systemd unit will get started diff --git a/ceph/doc/ceph-volume/lvm/index.rst b/ceph/doc/ceph-volume/lvm/index.rst new file mode 100644 index 000000000..5c1ef0117 --- /dev/null +++ b/ceph/doc/ceph-volume/lvm/index.rst @@ -0,0 +1,24 @@ +.. _ceph-volume-lvm: + +``lvm`` +======= +Implements the functionality needed to deploy OSDs from the ``lvm`` subcommand: +``ceph-volume lvm`` + +**Command Line Subcommands** + +* :ref:`ceph-volume-lvm-prepare` + +* :ref:`ceph-volume-lvm-activate` + +.. not yet implemented +.. * :ref:`ceph-volume-lvm-scan` + +**Internal functionality** + +There are other aspects of the ``lvm`` subcommand that are internal and not +exposed to the user, these sections explain how these pieces work together, +clarifying the workflows of the tool. + +:ref:`Systemd Units ` | +:ref:`lvm ` diff --git a/ceph/doc/ceph-volume/lvm/prepare.rst b/ceph/doc/ceph-volume/lvm/prepare.rst new file mode 100644 index 000000000..add0f185d --- /dev/null +++ b/ceph/doc/ceph-volume/lvm/prepare.rst @@ -0,0 +1,145 @@ +.. _ceph-volume-lvm-prepare: + +``prepare`` +=========== +This subcommand allows a :term:`filestore` setup (:term:`bluestore` support is +planned) and currently consumes only logical volumes for both the data and +journal. It will not create or modify the logical volumes except for adding +extra metadata. + +.. note:: This is part of a two step process to deploy an OSD. If looking for + a single-call way, please see :ref:`ceph-volume-lvm-create` + +To help identify volumes, the process of preparing a volume (or volumes) to +work with Ceph, the tool will assign a few pieces of metadata information using +:term:`LVM tags`. + +:term:`LVM tags` makes volumes easy to discover later, and help identify them as +part of a Ceph system, and what role they have (journal, filestore, bluestore, +etc...) + +Although initially :term:`filestore` is supported (and supported by default) +the back end can be specified with: + + +* :ref:`--filestore ` +* ``--bluestore`` + +.. when available, this will need to be updated to: +.. * :ref:`--bluestore ` + +.. _ceph-volume-lvm-prepare_filestore: + +``filestore`` +------------- +This is the default OSD backend and allows preparation of logical volumes for +a :term:`filestore` OSD. + +The process is *very* strict, it requires two logical volumes that are ready to +be used. No special preparation is needed for these volumes other than +following the minimum size requirements for data and journal. + +The API call looks like:: + + ceph-volume prepare --filestore --data data --journal journal + +The journal *must* be a logical volume, just like the data volume, and that +argument is always required even if both live under the same group. + +A generated uuid is used to ask the cluster for a new OSD. These two pieces are +crucial for identifying an OSD and will later be used throughout the +:ref:`ceph-volume-lvm-activate` process. + +The OSD data directory is created using the following convention:: + + /var/lib/ceph/osd/- + +At this point the data volume is mounted at this location, and the journal +volume is linked:: + + ln -s /path/to/journal /var/lib/ceph/osd/-/journal + +The monmap is fetched using the bootstrap key from the OSD:: + + /usr/bin/ceph --cluster ceph --name client.bootstrap-osd + --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring + mon getmap -o /var/lib/ceph/osd/-/activate.monmap + +``ceph-osd`` will be called to populate the OSD directory, that is already +mounted, re-using all the pieces of information from the initial steps:: + + ceph-osd --cluster ceph --mkfs --mkkey -i \ + --monmap /var/lib/ceph/osd/-/activate.monmap --osd-data \ + /var/lib/ceph/osd/- --osd-journal /var/lib/ceph/osd/-/journal \ + --osd-uuid --keyring /var/lib/ceph/osd/-/keyring \ + --setuser ceph --setgroup ceph + +.. _ceph-volume-lvm-existing-osds: + +Existing OSDs +------------- +For existing clusters that want to use this new system and have OSDs that are +already running there are a few things to take into account: + +.. warning:: this process will forcefully format the data device, destroying + existing data, if any. + +* OSD paths should follow this convention:: + + /var/lib/ceph/osd/- + +* Preferably, no other mechanisms to mount the volume should exist, and should + be removed (like fstab mount points) +* There is currently no support for encrypted volumes + +The one time process for an existing OSD, with an ID of 0 and +using a ``"ceph"`` cluster name would look like:: + + ceph-volume lvm prepare --filestore --osd-id 0 --osd-fsid E3D291C1-E7BF-4984-9794-B60D9FA139CB + +The command line tool will not contact the monitor to generate an OSD ID and +will format the LVM device in addition to storing the metadata on it so that it +can later be startednot contact the monitor to generate an OSD ID and will +format the LVM device in addition to storing the metadata on it so that it can +later be started (for detailed metadata description see :ref:`ceph-volume-lvm-tags`). + + +.. _ceph-volume-lvm-prepare_bluestore: + +``bluestore`` +------------- +This subcommand is planned but not currently implemented. + + +Storing metadata +---------------- +The following tags will get applied as part of the prepartion process +regardless of the type of volume (journal or data) and also regardless of the +OSD backend: + +* ``cluster_fsid`` +* ``data_device`` +* ``journal_device`` +* ``encrypted`` +* ``osd_fsid`` +* ``osd_id`` +* ``block`` +* ``db`` +* ``wal`` +* ``lockbox_device`` + +.. note:: For the complete lvm tag conventions see :ref:`ceph-volume-lvm-tag-api` + + +Summary +------- +To recap the ``prepare`` process: + +#. Accept only logical volumes for data and journal (both required) +#. Generate a UUID for the OSD +#. Ask the monitor get an OSD ID reusing the generated UUID +#. OSD data directory is created and data volume mounted +#. Journal is symlinked from data volume to journal location +#. monmap is fetched for activation +#. devices is mounted and data directory is populated by ``ceph-osd`` +#. data and journal volumes are assigned all the Ceph metadata using lvm tags diff --git a/ceph/doc/ceph-volume/lvm/scan.rst b/ceph/doc/ceph-volume/lvm/scan.rst new file mode 100644 index 000000000..96d2719ac --- /dev/null +++ b/ceph/doc/ceph-volume/lvm/scan.rst @@ -0,0 +1,9 @@ +scan +==== +This sub-command will allow to discover Ceph volumes previously setup by the +tool by looking into the system's logical volumes and their tags. + +As part of the the :ref:`ceph-volume-lvm-prepare` process, the logical volumes are assigned +a few tags with important pieces of information. + +.. note:: This sub-command is not yet implemented diff --git a/ceph/doc/ceph-volume/lvm/systemd.rst b/ceph/doc/ceph-volume/lvm/systemd.rst new file mode 100644 index 000000000..7162e0433 --- /dev/null +++ b/ceph/doc/ceph-volume/lvm/systemd.rst @@ -0,0 +1,46 @@ +.. _ceph-volume-systemd: + +systemd +======= +As part of the :ref:`ceph-volume-lvm-activate` process, a few systemd units will get enabled +that will use the OSD id and uuid as part of their name. These units will be +run when the system boots, and will proceed to activate their corresponding +volumes. + +The API for activation requires both the :term:`OSD id` and :term:`OSD uuid`, +which get persisted by systemd. Internally, the activation process enables the +systemd unit using the following convention:: + + ceph-volume@- + +Where ``type`` is the sub-command used to parse the extra metadata, and ``extra +metadata`` is any additional information needed by the sub-command to be able +to activate the OSD. For example an OSD with an ID of 0, for the ``lvm`` +sub-command would look like:: + + systemctl enable ceph-volume@lvm-0-0A3E1ED2-DA8A-4F0E-AA95-61DEC71768D6 + + +Process +------- +The systemd unit is a :term:`systemd oneshot` service, meant to start at boot after the +local filesystem is ready to be used. + +Upon startup, it will identify the logical volume using :term:`LVM tags`, +finding a matching ID and later ensuring it is the right one with +the :term:`OSD uuid`. + +After identifying the correct volume it will then proceed to mount it by using +the OSD destination conventions, that is:: + + /var/lib/ceph/osd/- + +For our example OSD with an id of ``0``, that means the identified device will +be mounted at:: + + + /var/lib/ceph/osd/ceph-0 + +Once that process is complete, a call will be made to start the OSD:: + + systemctl start ceph-osd@0 diff --git a/ceph/doc/conf.py b/ceph/doc/conf.py index c4d8cb073..49b6ecde2 100644 --- a/ceph/doc/conf.py +++ b/ceph/doc/conf.py @@ -18,6 +18,8 @@ if tags.has('man'): 'install/*', 'mon/*', 'rados/*', + 'mgr/*', + 'ceph-volume/*', 'radosgw/*', 'rbd/*', 'start/*'] diff --git a/ceph/doc/dev/ceph-volume/index.rst b/ceph/doc/dev/ceph-volume/index.rst new file mode 100644 index 000000000..b6f9dc045 --- /dev/null +++ b/ceph/doc/dev/ceph-volume/index.rst @@ -0,0 +1,13 @@ +=================================== +ceph-volume developer documentation +=================================== + +.. rubric:: Contents + +.. toctree:: + :maxdepth: 1 + + + plugins + lvm + systemd diff --git a/ceph/doc/dev/ceph-volume/lvm.rst b/ceph/doc/dev/ceph-volume/lvm.rst new file mode 100644 index 000000000..f89424a22 --- /dev/null +++ b/ceph/doc/dev/ceph-volume/lvm.rst @@ -0,0 +1,127 @@ + +.. _ceph-volume-lvm-api: + +LVM +=== +The backend of ``ceph-volume lvm`` is LVM, it relies heavily on the usage of +tags, which is a way for LVM to allow extending its volume metadata. These +values can later be queried against devices and it is how they get discovered +later. + +.. warning:: These APIs are not meant to be public, but are documented so that + it is clear what the tool is doing behind the scenes. Do not alter + any of these values. + + +.. _ceph-volume-lvm-tag-api: + +Tag API +------- +The process of identifying logical volumes as part of Ceph relies on applying +tags on all volumes. It follows a naming convention for the namespace that +looks like:: + + ceph.= + +All tags are prefixed by the ``ceph`` keyword do claim ownership of that +namespace and make it easily identifiable. This is how the OSD ID would be used +in the context of lvm tags:: + + ceph.osd_id=0 + + +.. _ceph-volume-lvm-tags: + +Metadata +-------- +The following describes all the metadata from Ceph OSDs that is stored on an +LVM volume: + + +``type`` +-------- +Describes if the device is a an OSD or Journal, with the ability to expand to +other types when supported (for example a lockbox) + +Example:: + + ceph.type=osd + + +``cluster_fsid`` +---------------- +Example:: + + ceph.cluster_fsid=7146B649-AE00-4157-9F5D-1DBFF1D52C26 + +``data_device`` +--------------- +Example:: + + ceph.data_device=/dev/ceph/data-0 + +``journal_device`` +------------------ +Example:: + + ceph.journal_device=/dev/ceph/journal-0 + +``encrypted`` +------------- +Example for enabled encryption with ``luks``:: + + ceph.encrypted=luks + +For plain dmcrypt:: + + ceph.encrypted=dmcrypt + +For disabled encryption:: + + ceph.encrypted=0 + +``osd_fsid`` +------------ +Example:: + + ceph.osd_fsid=88ab9018-f84b-4d62-90b4-ce7c076728ff + +``osd_id`` +---------- +Example:: + + ceph.osd_id=1 + +``block`` +--------- +Just used on :term:`bluestore` backends. + +Example:: + + ceph.block=/dev/mapper/vg-block-0 + +``db`` +------ +Just used on :term:`bluestore` backends. + +Example:: + + ceph.db=/dev/mapper/vg-db-0 + +``wal`` +------- +Just used on :term:`bluestore` backends. + +Example:: + + ceph.wal=/dev/mapper/vg-wal-0 + + +``lockbox_device`` +------------------ +Only used when encryption is enabled, to store keys in an unencrypted +volume. + +Example:: + + ceph.lockbox_device=/dev/mapper/vg-lockbox-0 diff --git a/ceph/doc/dev/ceph-volume/plugins.rst b/ceph/doc/dev/ceph-volume/plugins.rst new file mode 100644 index 000000000..95bc761e2 --- /dev/null +++ b/ceph/doc/dev/ceph-volume/plugins.rst @@ -0,0 +1,65 @@ +.. _ceph-volume-plugins: + +Plugins +======= +``ceph-volume`` started initially to provide support for using ``lvm`` as +the underlying system for an OSD. It is included as part of the tool but it is +treated like a plugin. + +This modularity, allows for other device or device-like technologies to be able +to consume and re-use the utilities and workflows provided. + +Adding Plugins +-------------- +As a Python tool, plugins ``setuptools`` entry points. For a new plugin to be +available, it should have an entry similar to this in its ``setup.py`` file: + +.. code-block:: python + + setup( + ... + entry_points = dict( + ceph_volume_handlers = [ + 'my_command = my_package.my_module:MyClass', + ], + ), + +The ``MyClass`` should be a class that accepts ``sys.argv`` as its argument, +``ceph-volume`` will pass that in at instantiation and call them ``main`` +method. + +This is how a plugin for ``ZFS`` could look like for example: + +.. code-block:: python + + class ZFS(object): + + help_menu = 'Deploy OSDs with ZFS' + _help = """ + Use ZFS as the underlying technology for OSDs + + --verbose Increase the verbosity level + """ + + def __init__(self, argv): + self.argv = argv + + def main(self): + parser = argparse.ArgumentParser() + args = parser.parse_args(self.argv) + ... + +And its entry point (via ``setuptools``) in ``setup.py`` would looke like: + +.. code-block:: python + + entry_points = { + 'ceph_volume_handlers': [ + 'zfs = ceph_volume_zfs.zfs:ZFS', + ], + }, + +After installation, the ``zfs`` subcommand would be listed and could be used +as:: + + ceph-volume zfs diff --git a/ceph/doc/dev/ceph-volume/systemd.rst b/ceph/doc/dev/ceph-volume/systemd.rst new file mode 100644 index 000000000..8553430ee --- /dev/null +++ b/ceph/doc/dev/ceph-volume/systemd.rst @@ -0,0 +1,37 @@ +.. _ceph-volume-systemd-api: + +systemd +======= +The workflow to *"activate"* an OSD is by relying on systemd unit files and its +ability to persist information as a suffix to the instance name. + +``ceph-volume`` exposes the following convention for unit files:: + + ceph-volume@- + +For example, this is how enabling an OSD could look like for the +:ref:`ceph-volume-lvm` sub command:: + + systemctl enable ceph-volume@lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41 + + +These 3 pieces of persisted information are needed by the sub-command so that +it understands what OSD it needs to activate. + +Since ``lvm`` is not the only subcommand that will be supported, this +is how it will allow other device types to be defined. + +At some point for example, for plain disks, it could be:: + + systemctl enable ceph-volume@disk-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41 + +At startup, the systemd unit will execute a helper script that will parse the +suffix and will end up calling ``ceph-volume`` back. Using the previous +example for lvm, that call will look like:: + + ceph-volume lvm activate 0 8715BEB4-15C5-49DE-BA6F-401086EC7B41 + + +.. warning:: These workflows are not meant to be public, but are documented so that + it is clear what the tool is doing behind the scenes. Do not alter + any of these values. diff --git a/ceph/doc/dev/index-old.rst b/ceph/doc/dev/index-old.rst index 9e8f7c163..8192516b6 100644 --- a/ceph/doc/dev/index-old.rst +++ b/ceph/doc/dev/index-old.rst @@ -39,3 +39,4 @@ in the body of the message. osd_internals/index* mds_internals/index* radosgw/index* + ceph-volume/index* diff --git a/ceph/doc/glossary.rst b/ceph/doc/glossary.rst index 9c2d4e81f..82546408a 100644 --- a/ceph/doc/glossary.rst +++ b/ceph/doc/glossary.rst @@ -4,7 +4,7 @@ Ceph is growing rapidly. As firms deploy Ceph, the technical terms such as "RADOS", "RBD," "RGW" and so forth require corresponding marketing terms -that explain what each component does. The terms in this glossary are +that explain what each component does. The terms in this glossary are intended to complement the existing technical terminology. Sometimes more than one term applies to a definition. Generally, the first @@ -12,21 +12,21 @@ term reflects a term consistent with Ceph's marketing, and secondary terms reflect either technical terms or legacy ways of referring to Ceph systems. -.. glossary:: +.. glossary:: Ceph Project - The aggregate term for the people, software, mission and infrastructure + The aggregate term for the people, software, mission and infrastructure of Ceph. - + cephx The Ceph authentication protocol. Cephx operates like Kerberos, but it has no single point of failure. Ceph Ceph Platform - All Ceph software, which includes any piece of code hosted at + All Ceph software, which includes any piece of code hosted at `http://github.com/ceph`_. - + Ceph System Ceph Stack A collection of two or more components of Ceph. @@ -35,7 +35,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems. Node Host Any single machine or server in a Ceph System. - + Ceph Storage Cluster Ceph Object Store RADOS @@ -45,7 +45,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems. Ceph Cluster Map cluster map - The set of maps comprising the monitor map, OSD map, PG map, MDS map and + The set of maps comprising the monitor map, OSD map, PG map, MDS map and CRUSH map. See `Cluster Map`_ for details. Ceph Object Storage @@ -56,13 +56,13 @@ reflect either technical terms or legacy ways of referring to Ceph systems. RADOS Gateway RGW The S3/Swift gateway component of Ceph. - + Ceph Block Device RBD The block storage component of Ceph. - + Ceph Block Storage - The block storage "product," service or capabilities when used in + The block storage "product," service or capabilities when used in conjunction with ``librbd``, a hypervisor such as QEMU or Xen, and a hypervisor abstraction layer such as ``libvirt``. @@ -73,7 +73,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems. Cloud Platforms Cloud Stacks - Third party cloud provisioning platforms such as OpenStack, CloudStack, + Third party cloud provisioning platforms such as OpenStack, CloudStack, OpenNebula, ProxMox, etc. Object Storage Device @@ -82,7 +82,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems. Sometimes, Ceph users use the term "OSD" to refer to :term:`Ceph OSD Daemon`, though the proper term is "Ceph OSD". - + Ceph OSD Daemon Ceph OSD Daemons Ceph OSD @@ -90,7 +90,29 @@ reflect either technical terms or legacy ways of referring to Ceph systems. disk (:term:`OSD`). Sometimes, Ceph users use the term "OSD" to refer to "Ceph OSD Daemon", though the proper term is "Ceph OSD". - + + OSD id + The integer that defines an OSD. It is generated by the monitors as part + of the creation of a new OSD. + + OSD fsid + This is a unique identifier used to further improve the uniqueness of an + OSD and it is found in the OSD path in a file called ``osd_fsid``. This + ``fsid`` term is used interchangeably with ``uuid`` + + OSD uuid + Just like the OSD fsid, this is the OSD unique identifer and is used + interchangeably with ``fsid`` + + bluestore + OSD BlueStore is a new back end for OSD daemons (kraken and newer + versions). Unlike :term:`filestore` it stores objects directly on the + Ceph block devices without any file system interface. + + filestore + A back end for OSD daemons, where a Journal is needed and files are + written to the filesystem. + Ceph Monitor MON The Ceph monitor software. @@ -106,22 +128,22 @@ reflect either technical terms or legacy ways of referring to Ceph systems. Ceph Clients Ceph Client - The collection of Ceph components which can access a Ceph Storage - Cluster. These include the Ceph Object Gateway, the Ceph Block Device, - the Ceph Filesystem, and their corresponding libraries, kernel modules, + The collection of Ceph components which can access a Ceph Storage + Cluster. These include the Ceph Object Gateway, the Ceph Block Device, + the Ceph Filesystem, and their corresponding libraries, kernel modules, and FUSEs. Ceph Kernel Modules - The collection of kernel modules which can be used to interact with the + The collection of kernel modules which can be used to interact with the Ceph System (e.g,. ``ceph.ko``, ``rbd.ko``). Ceph Client Libraries - The collection of libraries that can be used to interact with components + The collection of libraries that can be used to interact with components of the Ceph System. Ceph Release Any distinct numbered version of Ceph. - + Ceph Point Release Any ad-hoc release that includes only bug or security fixes. @@ -130,11 +152,11 @@ reflect either technical terms or legacy ways of referring to Ceph systems. testing, but may contain new features. Ceph Release Candidate - A major version of Ceph that has undergone initial quality assurance + A major version of Ceph that has undergone initial quality assurance testing and is ready for beta testers. Ceph Stable Release - A major version of Ceph where all features from the preceding interim + A major version of Ceph where all features from the preceding interim releases have been put through quality assurance testing successfully. Ceph Test Framework @@ -144,7 +166,7 @@ reflect either technical terms or legacy ways of referring to Ceph systems. CRUSH Controlled Replication Under Scalable Hashing. It is the algorithm Ceph uses to compute object storage locations. - + ruleset A set of CRUSH data placement rules that applies to a particular pool(s). @@ -152,5 +174,14 @@ reflect either technical terms or legacy ways of referring to Ceph systems. Pools Pools are logical partitions for storing objects. + systemd oneshot + A systemd ``type`` where a command is defined in ``ExecStart`` which will + exit upon completion (it is not intended to daemonize) + + LVM tags + Extensible metadata for LVM volumes and groups. It is used to store + Ceph-specific information about devices and its relationship with + OSDs. + .. _http://github.com/ceph: http://github.com/ceph .. _Cluster Map: ../architecture#cluster-map diff --git a/ceph/doc/index.rst b/ceph/doc/index.rst index 253e2a4f5..ad2bd7193 100644 --- a/ceph/doc/index.rst +++ b/ceph/doc/index.rst @@ -102,6 +102,7 @@ about Ceph, see our `Architecture`_ section. api/index architecture Development + ceph-volume/index release-notes releases Glossary diff --git a/ceph/doc/install/manual-deployment.rst b/ceph/doc/install/manual-deployment.rst index 2e8bb8672..0d789a62c 100644 --- a/ceph/doc/install/manual-deployment.rst +++ b/ceph/doc/install/manual-deployment.rst @@ -12,12 +12,12 @@ default, so it's useful to know about them when setting up your cluster for production. Following the same configuration as `Installation (Quick)`_, we will set up a -cluster with ``node1`` as the monitor node, and ``node2`` and ``node3`` for +cluster with ``node1`` as the monitor node, and ``node2`` and ``node3`` for OSD nodes. -.. ditaa:: +.. ditaa:: /------------------\ /----------------\ | Admin Node | | node1 | | +-------->+ | @@ -43,51 +43,51 @@ Monitor Bootstrapping Bootstrapping a monitor (a Ceph Storage Cluster, in theory) requires a number of things: -- **Unique Identifier:** The ``fsid`` is a unique identifier for the cluster, - and stands for File System ID from the days when the Ceph Storage Cluster was - principally for the Ceph Filesystem. Ceph now supports native interfaces, - block devices, and object storage gateway interfaces too, so ``fsid`` is a +- **Unique Identifier:** The ``fsid`` is a unique identifier for the cluster, + and stands for File System ID from the days when the Ceph Storage Cluster was + principally for the Ceph Filesystem. Ceph now supports native interfaces, + block devices, and object storage gateway interfaces too, so ``fsid`` is a bit of a misnomer. - **Cluster Name:** Ceph clusters have a cluster name, which is a simple string without spaces. The default cluster name is ``ceph``, but you may specify - a different cluster name. Overriding the default cluster name is - especially useful when you are working with multiple clusters and you need to - clearly understand which cluster your are working with. - - For example, when you run multiple clusters in a `federated architecture`_, + a different cluster name. Overriding the default cluster name is + especially useful when you are working with multiple clusters and you need to + clearly understand which cluster your are working with. + + For example, when you run multiple clusters in a `federated architecture`_, the cluster name (e.g., ``us-west``, ``us-east``) identifies the cluster for - the current CLI session. **Note:** To identify the cluster name on the - command line interface, specify the Ceph configuration file with the + the current CLI session. **Note:** To identify the cluster name on the + command line interface, specify the Ceph configuration file with the cluster name (e.g., ``ceph.conf``, ``us-west.conf``, ``us-east.conf``, etc.). Also see CLI usage (``ceph --cluster {cluster-name}``). - -- **Monitor Name:** Each monitor instance within a cluster has a unique name. + +- **Monitor Name:** Each monitor instance within a cluster has a unique name. In common practice, the Ceph Monitor name is the host name (we recommend one - Ceph Monitor per host, and no commingling of Ceph OSD Daemons with + Ceph Monitor per host, and no commingling of Ceph OSD Daemons with Ceph Monitors). You may retrieve the short hostname with ``hostname -s``. -- **Monitor Map:** Bootstrapping the initial monitor(s) requires you to - generate a monitor map. The monitor map requires the ``fsid``, the cluster +- **Monitor Map:** Bootstrapping the initial monitor(s) requires you to + generate a monitor map. The monitor map requires the ``fsid``, the cluster name (or uses the default), and at least one host name and its IP address. -- **Monitor Keyring**: Monitors communicate with each other via a - secret key. You must generate a keyring with a monitor secret and provide +- **Monitor Keyring**: Monitors communicate with each other via a + secret key. You must generate a keyring with a monitor secret and provide it when bootstrapping the initial monitor(s). - + - **Administrator Keyring**: To use the ``ceph`` CLI tools, you must have a ``client.admin`` user. So you must generate the admin user and keyring, and you must also add the ``client.admin`` user to the monitor keyring. -The foregoing requirements do not imply the creation of a Ceph Configuration -file. However, as a best practice, we recommend creating a Ceph configuration +The foregoing requirements do not imply the creation of a Ceph Configuration +file. However, as a best practice, we recommend creating a Ceph configuration file and populating it with the ``fsid``, the ``mon initial members`` and the ``mon host`` settings. You can get and set all of the monitor settings at runtime as well. However, -a Ceph Configuration file may contain only those settings that override the +a Ceph Configuration file may contain only those settings that override the default values. When you add settings to a Ceph configuration file, these -settings override the default settings. Maintaining those settings in a +settings override the default settings. Maintaining those settings in a Ceph configuration file makes it easier to maintain your cluster. The procedure is as follows: @@ -97,52 +97,52 @@ The procedure is as follows: ssh {hostname} - For example:: + For example:: ssh node1 -#. Ensure you have a directory for the Ceph configuration file. By default, - Ceph uses ``/etc/ceph``. When you install ``ceph``, the installer will +#. Ensure you have a directory for the Ceph configuration file. By default, + Ceph uses ``/etc/ceph``. When you install ``ceph``, the installer will create the ``/etc/ceph`` directory automatically. :: - ls /etc/ceph + ls /etc/ceph **Note:** Deployment tools may remove this directory when purging a cluster (e.g., ``ceph-deploy purgedata {node-name}``, ``ceph-deploy purge {node-name}``). -#. Create a Ceph configuration file. By default, Ceph uses +#. Create a Ceph configuration file. By default, Ceph uses ``ceph.conf``, where ``ceph`` reflects the cluster name. :: sudo vim /etc/ceph/ceph.conf -#. Generate a unique ID (i.e., ``fsid``) for your cluster. :: +#. Generate a unique ID (i.e., ``fsid``) for your cluster. :: uuidgen - -#. Add the unique ID to your Ceph configuration file. :: + +#. Add the unique ID to your Ceph configuration file. :: fsid = {UUID} - For example:: + For example:: fsid = a7f64266-0894-4f1e-a635-d0aeaca0e993 -#. Add the initial monitor(s) to your Ceph configuration file. :: +#. Add the initial monitor(s) to your Ceph configuration file. :: mon initial members = {hostname}[,{hostname}] - For example:: + For example:: mon initial members = node1 -#. Add the IP address(es) of the initial monitor(s) to your Ceph configuration - file and save the file. :: +#. Add the IP address(es) of the initial monitor(s) to your Ceph configuration + file and save the file. :: mon host = {ip-address}[,{ip-address}] @@ -160,18 +160,18 @@ The procedure is as follows: #. Generate an administrator keyring, generate a ``client.admin`` user and add - the user to the keyring. :: + the user to the keyring. :: sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *' -#. Add the ``client.admin`` key to the ``ceph.mon.keyring``. :: +#. Add the ``client.admin`` key to the ``ceph.mon.keyring``. :: ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring -#. Generate a monitor map using the hostname(s), host IP address(es) and the FSID. - Save it as ``/tmp/monmap``:: +#. Generate a monitor map using the hostname(s), host IP address(es) and the FSID. + Save it as ``/tmp/monmap``:: monmaptool --create --add {hostname} {ip-address} --fsid {uuid} /tmp/monmap @@ -199,7 +199,7 @@ The procedure is as follows: sudo -u ceph ceph-mon --mkfs -i node1 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring -#. Consider settings for a Ceph configuration file. Common settings include +#. Consider settings for a Ceph configuration file. Common settings include the following:: [global] @@ -215,7 +215,7 @@ The procedure is as follows: osd pool default size = {n} # Write an object n times. osd pool default min size = {n} # Allow writing n copy in a degraded state. osd pool default pg num = {n} - osd pool default pgp num = {n} + osd pool default pgp num = {n} osd crush chooseleaf type = {n} In the foregoing example, the ``[global]`` section of the configuration might @@ -233,7 +233,7 @@ The procedure is as follows: osd pool default size = 2 osd pool default min size = 1 osd pool default pg num = 333 - osd pool default pgp num = 333 + osd pool default pgp num = 333 osd crush chooseleaf type = 1 #. Touch the ``done`` file. @@ -271,13 +271,13 @@ The procedure is as follows: 0 data,1 metadata,2 rbd, -#. Verify that the monitor is running. :: +#. Verify that the monitor is running. :: ceph -s You should see output that the monitor you started is up and running, and you should see a health error indicating that placement groups are stuck - inactive. It should look something like this:: + inactive. It should look something like this:: cluster a7f64266-0894-4f1e-a635-d0aeaca0e993 health HEALTH_ERR 192 pgs stuck inactive; 192 pgs stuck unclean; no osds @@ -295,7 +295,7 @@ Manager daemon configuration On each node where you run a ceph-mon daemon, you should also set up a ceph-mgr daemon. -See :doc:`../mgr/administrator` +See :ref:`mgr-administrator-guide` Adding OSDs =========== @@ -304,7 +304,7 @@ Once you have your initial monitor(s) running, you should add OSDs. Your cluster cannot reach an ``active + clean`` state until you have enough OSDs to handle the number of copies of an object (e.g., ``osd pool default size = 2`` requires at least two OSDs). After bootstrapping your monitor, your cluster has a default -CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to +CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to a Ceph Node. @@ -314,7 +314,7 @@ Short Form Ceph provides the ``ceph-disk`` utility, which can prepare a disk, partition or directory for use with Ceph. The ``ceph-disk`` utility creates the OSD ID by incrementing the index. Additionally, ``ceph-disk`` will add the new OSD to the -CRUSH map under the host for you. Execute ``ceph-disk -h`` for CLI details. +CRUSH map under the host for you. Execute ``ceph-disk -h`` for CLI details. The ``ceph-disk`` utility automates the steps of the `Long Form`_ below. To create the first two OSDs with the short form procedure, execute the following on ``node2`` and ``node3``: @@ -335,7 +335,7 @@ on ``node2`` and ``node3``: sudo ceph-disk activate {data-path} [--activate-key {path}] - For example:: + For example:: sudo ceph-disk activate /dev/hdd1 @@ -372,7 +372,7 @@ OSDs with the long form procedure, execute the following steps for each OSD. ``client.bootstrap-osd`` key is present on the machine. You may alternatively execute this command as ``client.admin`` on a different host where that key is present.:: - + ID=$(echo "{\"cephx_secret\": \"$OSD_SECRET\"}" | \ ceph osd new $UUID -i - \ -n client.bootstrap-osd -k /var/lib/ceph/bootstrap-osd/ceph.keyring) @@ -381,7 +381,7 @@ OSDs with the long form procedure, execute the following steps for each OSD. mkdir /var/lib/ceph/osd/ceph-$ID -#. If the OSD is for a drive other than the OS drive, prepare it +#. If the OSD is for a drive other than the OS drive, prepare it for use with Ceph, and mount it to the directory you just created. :: mkfs.xfs /dev/{DEV} @@ -400,15 +400,15 @@ OSDs with the long form procedure, execute the following steps for each OSD. chown -R ceph:ceph /var/lib/ceph/osd/ceph-$ID -#. After you add an OSD to Ceph, the OSD is in your configuration. However, - it is not yet running. You must start +#. After you add an OSD to Ceph, the OSD is in your configuration. However, + it is not yet running. You must start your new OSD before it can begin receiving data. For modern systemd distributions:: systemctl enable ceph-osd@$ID systemctl start ceph-osd@$ID - + For example:: systemctl enable ceph-osd@12 @@ -427,11 +427,11 @@ In the below instructions, ``{id}`` is an arbitrary name, such as the hostname o #. Create a keyring.:: ceph-authtool --create-keyring /var/lib/ceph/mds/{cluster-name}-{id}/keyring --gen-key -n mds.{id} - + #. Import the keyring and set caps.:: ceph auth add mds.{id} osd "allow rwx" mds "allow" mon "allow profile mds" -i /var/lib/ceph/mds/{cluster}-{id}/keyring - + #. Add to ceph.conf.:: [mds.{id}] @@ -458,24 +458,24 @@ Summary ======= Once you have your monitor and two OSDs up and running, you can watch the -placement groups peer by executing the following:: +placement groups peer by executing the following:: ceph -w -To view the tree, execute the following:: +To view the tree, execute the following:: ceph osd tree - -You should see output that looks something like this:: + +You should see output that looks something like this:: # id weight type name up/down reweight -1 2 root default -2 2 host node1 0 1 osd.0 up 1 -3 1 host node2 - 1 1 osd.1 up 1 + 1 1 osd.1 up 1 -To add (or remove) additional monitors, see `Add/Remove Monitors`_. +To add (or remove) additional monitors, see `Add/Remove Monitors`_. To add (or remove) additional Ceph OSD Daemons, see `Add/Remove OSDs`_. diff --git a/ceph/doc/man/8/CMakeLists.txt b/ceph/doc/man/8/CMakeLists.txt index 8a2204c71..fd6bbae58 100644 --- a/ceph/doc/man/8/CMakeLists.txt +++ b/ceph/doc/man/8/CMakeLists.txt @@ -23,6 +23,8 @@ set(osd_srcs ceph-clsinfo.rst ceph-detect-init.rst ceph-disk.rst + ceph-volume.rst + ceph-volume-systemd.rst ceph-osd.rst osdmaptool.rst) diff --git a/ceph/doc/man/8/ceph-volume-systemd.rst b/ceph/doc/man/8/ceph-volume-systemd.rst new file mode 100644 index 000000000..1a7b481be --- /dev/null +++ b/ceph/doc/man/8/ceph-volume-systemd.rst @@ -0,0 +1,56 @@ +:orphan: + +======================================================= + ceph-volume-systemd -- systemd ceph-volume helper tool +======================================================= + +.. program:: ceph-volume-systemd + +Synopsis +======== + +| **ceph-volume-systemd** *systemd instance name* + + +Description +=========== +:program:`ceph-volume-systemd` is a systemd helper tool that receives input +from (dynamically created) systemd units so that activation of OSDs can +proceed. + +It translates the input into a system call to ceph-volume for activation +purposes only. + + +Examples +======== +Its input is the ``systemd instance name`` (represented by ``%i`` in a systemd +unit), and it should be in the following format:: + + - + +In the case of ``lvm`` a call could look like:: + + /usr/bin/ceph-volume-systemd lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41 + +Which in turn will call ``ceph-volume`` in the following way:: + + ceph-volume lvm trigger 0-8715BEB4-15C5-49DE-BA6F-401086EC7B41 + +Any other subcommand will need to have implemented a ``trigger`` command that +can consume the extra metadata in this format. + + +Availability +============ + +:program:`ceph-volume-systemd` is part of Ceph, a massively scalable, +open-source, distributed storage system. Please refer to the documentation at +http://docs.ceph.com/ for more information. + + +See also +======== + +:doc:`ceph-osd `\(8), +:doc:`ceph-disk `\(8), diff --git a/ceph/doc/man/8/ceph-volume.rst b/ceph/doc/man/8/ceph-volume.rst new file mode 100644 index 000000000..431e82cf1 --- /dev/null +++ b/ceph/doc/man/8/ceph-volume.rst @@ -0,0 +1,122 @@ +:orphan: + +======================================== + ceph-volume -- Ceph OSD deployment tool +======================================== + +.. program:: ceph-volume + +Synopsis +======== + +| **ceph-volume** [-h] [--cluster CLUSTER] [--log-level LOG_LEVEL] +| [--log-path LOG_PATH] + +| **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare* ] + +Description +=========== + +:program:`ceph-volume` is a single purpose command line tool to deploy logical +volumes as OSDs, trying to maintain a similar API to ``ceph-disk`` when +preparing, activating, and creating OSDs. + +It deviates from ``ceph-disk`` by not interacting or relying on the udev rules +that come installed for Ceph. These rules allow automatic detection of +previously setup devices that are in turn fed into ``ceph-disk`` to activate +them. + + +Commands +======== + +lvm +--- + +By making use of LVM tags, the ``lvm`` sub-command is able to store and later +re-discover and query devices associated with OSDs so that they can later +activated. + +Subcommands: + +**activate** +Enables a systemd unit that persists the OSD ID and its UUID (also called +``fsid`` in Ceph CLI tools), so that at boot time it can understand what OSD is +enabled and needs to be mounted. + +Usage:: + + ceph-volume lvm activate --filestore + +Optional Arguments: + +* [-h, --help] show the help message and exit +* [--bluestore] filestore objectstore (not yet implemented) +* [--filestore] filestore objectstore (current default) + + +**prepare** +Prepares a logical volume to be used as an OSD and journal using a ``filestore`` setup +(``bluestore`` support is planned). It will not create or modify the logical volumes +except for adding extra metadata. + +Usage:: + + ceph-volume lvm prepare --filestore --data --journal + +Optional arguments: + +* [-h, --help] show the help message and exit +* [--journal JOURNAL] A logical group name, path to a logical volume, or path to a device +* [--journal-size GB] Size (in GB) A logical group name or a path to a logical volume +* [--bluestore] Use the bluestore objectstore (not currently supported) +* [--filestore] Use the filestore objectstore (currently the only supported object store) +* [--osd-id OSD_ID] Reuse an existing OSD id +* [--osd-fsid OSD_FSID] Reuse an existing OSD fsid + +Required arguments: + +* --data A logical group name or a path to a logical volume + +**create** +Wraps the two-step process to provision a new osd (calling ``prepare`` first +and then ``activate``) into a single one. The reason to prefer ``prepare`` and +then ``activate`` is to gradually introduce new OSDs into a cluster, and +avoiding large amounts of data being rebalanced. + +The single-call process unifies exactly what ``prepare`` and ``activate`` do, +with the convenience of doing it all at once. Flags and general usage are +equivalent to those of the ``prepare`` subcommand. + +**trigger** +This subcommand is not meant to be used directly, and it is used by systemd so +that it proxies input to ``ceph-volume lvm activate`` by parsing the +input from systemd, detecting the UUID and ID associated with an OSD. + +Usage:: + + ceph-volume lvm trigger + +The systemd "data" is expected to be in the format of:: + + - + +The lvs associated with the OSD need to have been prepared previously, +so that all needed tags and metadata exist. + +Positional arguments: + +* Data from a systemd unit containing ID and UUID of the OSD. + +Availability +============ + +:program:`ceph-volume` is part of Ceph, a massively scalable, open-source, distributed storage system. Please refer to +the documentation at http://docs.ceph.com/ for more information. + + +See also +======== + +:doc:`ceph-osd `\(8), +:doc:`ceph-disk `\(8), diff --git a/ceph/doc/man/8/ceph.rst b/ceph/doc/man/8/ceph.rst index a95b4a7e2..2ae380670 100644 --- a/ceph/doc/man/8/ceph.rst +++ b/ceph/doc/man/8/ceph.rst @@ -560,17 +560,19 @@ Usage:: ceph osd create {} {} -Subcommand ``new`` reuses a previously destroyed OSD *id*. The new OSD will -have the specified *uuid*, and the command expects a JSON file containing -the base64 cephx key for auth entity *client.osd.*, as well as optional -base64 cepx key for dm-crypt lockbox access and a dm-crypt key. Specifying -a dm-crypt requires specifying the accompanying lockbox cephx key. +Subcommand ``new`` can be used to create a new OSD or to recreate a previously +destroyed OSD with a specific *id*. The new OSD will have the specified *uuid*, +and the command expects a JSON file containing the base64 cephx key for auth +entity *client.osd.*, as well as optional base64 cepx key for dm-crypt +lockbox access and a dm-crypt key. Specifying a dm-crypt requires specifying +the accompanying lockbox cephx key. Usage:: ceph osd new {} {} -i {} -The secrets JSON file is expected to maintain a form of the following format:: +The secrets JSON file is optional but if provided, is expected to maintain +a form of the following format:: { "cephx_secret": "AQBWtwhZdBO5ExAAIDyjK2Bh16ZXylmzgYYEjg==" diff --git a/ceph/doc/mgr/administrator.rst b/ceph/doc/mgr/administrator.rst index 453cd3fbe..1e8d189da 100644 --- a/ceph/doc/mgr/administrator.rst +++ b/ceph/doc/mgr/administrator.rst @@ -1,3 +1,4 @@ +.. _mgr-administrator-guide: ceph-mgr administrator's guide ============================== @@ -39,7 +40,7 @@ High availability ----------------- In general, you should set up a ceph-mgr on each of the hosts -running a ceph-mon daemon to achieve the same level of availability. +running a ceph-mon daemon to achieve the same level of availability. By default, whichever ceph-mgr instance comes up first will be made active by the monitors, and the others will be standbys. There is diff --git a/ceph/doc/rados/command/list-inconsistent-obj.json b/ceph/doc/rados/command/list-inconsistent-obj.json index b9ee1793f..76ca43e32 100644 --- a/ceph/doc/rados/command/list-inconsistent-obj.json +++ b/ceph/doc/rados/command/list-inconsistent-obj.json @@ -66,7 +66,10 @@ "ec_hash_error", "ec_size_error", "oi_attr_missing", - "oi_attr_corrupted" + "oi_attr_corrupted", + "obj_size_oi_mismatch", + "ss_attr_missing", + "ss_attr_corrupted" ] }, "minItems": 0, @@ -104,6 +107,9 @@ "osd": { "type": "integer" }, + "primary": { + "type": "boolean" + }, "size": { "type": "integer" }, @@ -129,7 +135,10 @@ "ec_hash_error", "ec_size_error", "oi_attr_missing", - "oi_attr_corrupted" + "oi_attr_corrupted", + "obj_size_oi_mismatch", + "ss_attr_missing", + "ss_attr_corrupted" ] }, "minItems": 0, @@ -164,6 +173,7 @@ }, "required": [ "osd", + "primary", "errors" ] } diff --git a/ceph/doc/rados/man/index.rst b/ceph/doc/rados/man/index.rst index 3c789f388..0d0380945 100644 --- a/ceph/doc/rados/man/index.rst +++ b/ceph/doc/rados/man/index.rst @@ -6,6 +6,8 @@ :maxdepth: 1 ../../man/8/ceph-disk.rst + ../../man/8/ceph-volume.rst + ../../man/8/ceph-volume-systemd.rst ../../man/8/ceph.rst ../../man/8/ceph-deploy.rst ../../man/8/ceph-rest-api.rst diff --git a/ceph/doc/rbd/rbd-mirroring.rst b/ceph/doc/rbd/rbd-mirroring.rst index e4db92832..989f1fc32 100644 --- a/ceph/doc/rbd/rbd-mirroring.rst +++ b/ceph/doc/rbd/rbd-mirroring.rst @@ -36,6 +36,13 @@ Ceph clusters. configuration file of the same name (e.g. /etc/ceph/remote.conf). See the `ceph-conf`_ documentation for how to configure multiple clusters. +.. note:: Images in a given pool will be mirrored to a pool with the same name + on the remote cluster. Images using a separate data-pool will use a data-pool + with the same name on the remote cluster. E.g., if an image being mirrored is + in the ``rbd`` pool on the local cluster and using a data-pool called + ``rbd-ec``, pools called ``rbd`` and ``rbd-ec`` must exist on the remote + cluster and will be used for mirroring the image. + Enable Mirroring ---------------- diff --git a/ceph/qa/machine_types/schedule_subset.sh b/ceph/qa/machine_types/schedule_subset.sh index 30a378d20..c26231aa7 100755 --- a/ceph/qa/machine_types/schedule_subset.sh +++ b/ceph/qa/machine_types/schedule_subset.sh @@ -25,8 +25,8 @@ echo "Scheduling " $2 " branch" if [ $2 = "master" ] ; then - # run master branch with --newest option looking for good sha1 7 builds back - teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 --newest 7 -e $5 $6 + # run master branch with --newest option looking for good sha1 7 builds back with /999 jobs + teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 --newest 7 -e $5 $6 elif [ $2 = "hammer" ] ; then # run hammer branch with less jobs teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/56 -e $5 $6 @@ -34,11 +34,11 @@ elif [ $2 = "jewel" ] ; then # run jewel branch with /40 jobs teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6 elif [ $2 = "kraken" ] ; then - # run kraken branch with /40 jobs - teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6 + # run kraken branch with /999 jobs + teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 -e $5 $6 elif [ $2 = "luminous" ] ; then - # run luminous branch with /40 jobs - teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6 + # run luminous branch with /999 jobs + teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 -e $5 $6 else # run NON master branches without --newest teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 -e $5 $6 diff --git a/ceph/qa/run-standalone.sh b/ceph/qa/run-standalone.sh index 185ed41c2..3be6121f6 100755 --- a/ceph/qa/run-standalone.sh +++ b/ceph/qa/run-standalone.sh @@ -1,25 +1,123 @@ -#!/bin/sh -ex +#!/usr/bin/env bash +set -e -if [ ! -e Makefile ]; then +if [ ! -e Makefile -o ! -d bin ]; then echo 'run this from the build dir' exit 1 fi +if [ ! -d /tmp/ceph-disk-virtualenv -o ! -d /tmp/ceph-detect-init-virtualenv ]; then + echo '/tmp/*-virtualenv directories not built. Please run "make check" first.' + exit 1 +fi + if [ `uname` = FreeBSD ]; then # otherwise module prettytable will not be found export PYTHONPATH=/usr/local/lib/python2.7/site-packages exec_mode=+111 + KERNCORE="kern.corefile" + COREPATTERN="core.%N.%P" else + export PYTHONPATH=/usr/lib/python2.7/dist-packages exec_mode=/111 + KERNCORE="kernel.core_pattern" + COREPATTERN="core.%e.%p.%t" +fi + +function finish() { + if [ -n "$precore" ]; then + sudo sysctl -w ${KERNCORE}=${precore} + fi + exit 0 +} + +trap finish TERM HUP INT + +PATH=$(pwd)/bin:$PATH + +# TODO: Use getops +dryrun=false +if [[ "$1" = "--dry-run" ]]; then + dryrun=true + shift +fi + +all=false +if [ "$1" = "" ]; then + all=true fi -for f in `find ../qa/standalone -perm $exec_mode -type f` +select=("$@") + +location="../qa/standalone" + +count=0 +errors=0 +userargs="" +precore="$(sysctl -n $KERNCORE)" +# If corepattern already set, avoid having to use sudo +if [ "$precore" = "$COREPATTERN" ]; then + precore="" +else + sudo sysctl -w ${KERNCORE}=${COREPATTERN} +fi +ulimit -c unlimited +for f in $(cd $location ; find . -perm $exec_mode -type f) do - echo '--- $f ---' - PATH=$PATH:bin \ - CEPH_ROOT=.. \ - CEPH_LIB=lib \ - $f || exit 1 + f=$(echo $f | sed 's/\.\///') + # This is tested with misc/test-ceph-helpers.sh + if [[ "$f" = "ceph-helpers.sh" ]]; then + continue + fi + if [[ "$all" = "false" ]]; then + found=false + for c in "${!select[@]}" + do + # Get command and any arguments of subset of tests ro tun + allargs="${select[$c]}" + arg1=$(echo "$allargs" | cut --delimiter " " --field 1) + # Get user args for this selection for use below + userargs="$(echo $allargs | cut -s --delimiter " " --field 2-)" + if [[ "$arg1" = $(basename $f) ]]; then + found=true + break + fi + if [[ "$arg1" = "$f" ]]; then + found=true + break + fi + done + if [[ "$found" = "false" ]]; then + continue + fi + fi + # Don't run test-failure.sh unless explicitly specified + if [ "$all" = "true" -a "$f" = "special/test-failure.sh" ]; then + continue + fi + + cmd="$location/$f $userargs" + count=$(expr $count + 1) + echo "--- $cmd ---" + if [[ "$dryrun" != "true" ]]; then + if ! PATH=$PATH:bin \ + CEPH_ROOT=.. \ + CEPH_LIB=lib \ + LOCALRUN=yes \ + $cmd ; then + echo "$f .............. FAILED" + errors=$(expr $errors + 1) + fi + fi done +if [ -n "$precore" ]; then + sudo sysctl -w ${KERNCORE}=${precore} +fi + +if [ "$errors" != "0" ]; then + echo "$errors TESTS FAILED, $count TOTAL TESTS" + exit 1 +fi +echo "ALL $count TESTS PASSED" exit 0 diff --git a/ceph/qa/standalone/README b/ceph/qa/standalone/README index b36229596..3082442cb 100644 --- a/ceph/qa/standalone/README +++ b/ceph/qa/standalone/README @@ -12,7 +12,12 @@ You can run them in a git checkout + build directory as well: * The qa/run-standalone.sh will run all of them in sequence. This is slow since there is no parallelism. - * You can run an individual script by passing these environment args. For - example, if you are in the build/ directory, + * You can run individual script(s) by specifying the basename or path below + qa/standalone as arguments to qa/run-standalone.sh. -PATH=$PATH:bin CEPH_ROOT=.. CEPH_LIB=lib ../qa/standalone/mon/misc.sh +../qa/run-standalone.sh misc.sh osd/osd-dup.sh + + * Add support for specifying arguments to selected tests by simply adding + list of tests to each argument. + +../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp" diff --git a/ceph/qa/standalone/ceph-helpers.sh b/ceph/qa/standalone/ceph-helpers.sh index 219e30194..2581930b6 100755 --- a/ceph/qa/standalone/ceph-helpers.sh +++ b/ceph/qa/standalone/ceph-helpers.sh @@ -33,6 +33,7 @@ fi if [ `uname` = FreeBSD ]; then SED=gsed DIFFCOLOPTS="" + KERNCORE="kern.corefile" else SED=sed termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/') @@ -40,6 +41,7 @@ else termwidth="-W ${termwidth}" fi DIFFCOLOPTS="-y $termwidth" + KERNCORE="kernel.core_pattern" fi EXTRA_OPTS="" @@ -152,13 +154,43 @@ function test_setup() { # function teardown() { local dir=$1 + local dumplogs=$2 kill_daemons $dir KILL if [ `uname` != FreeBSD ] \ && [ $(stat -f -c '%T' .) == "btrfs" ]; then __teardown_btrfs $dir fi + local cores="no" + local pattern="$(sysctl -n $KERNCORE)" + # See if we have apport core handling + if [ "${pattern:0:1}" = "|" ]; then + # TODO: Where can we get the dumps? + # Not sure where the dumps really are so this will look in the CWD + pattern="" + fi + # Local we start with core and teuthology ends with core + if ls $(dirname $pattern) | grep -q '^core\|core$' ; then + cores="yes" + if [ -n "$LOCALRUN" ]; then + mkdir /tmp/cores.$$ 2> /dev/null || true + for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do + mv $i /tmp/cores.$$ + done + fi + fi + if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then + display_logs $dir + fi rm -fr $dir rm -rf $(get_asok_dir) + if [ "$cores" = "yes" ]; then + echo "ERROR: Failure due to cores found" + if [ -n "$LOCALRUN" ]; then + echo "Find saved core files in /tmp/cores.$$" + fi + return 1 + fi + return 0 } function __teardown_btrfs() { @@ -406,6 +438,7 @@ function run_mon() { --id $id \ --mon-osd-full-ratio=.99 \ --mon-data-avail-crit=1 \ + --mon-data-avail-warn=5 \ --paxos-propose-interval=0.1 \ --osd-crush-chooseleaf-type=0 \ $EXTRA_OPTS \ @@ -472,10 +505,15 @@ function test_run_mon() { function create_rbd_pool() { ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1 - ceph osd pool create rbd $PG_NUM || return 1 + create_pool rbd $PG_NUM || return 1 rbd pool init rbd } +function create_pool() { + ceph osd pool create "$@" + sleep 1 +} + ####################################################################### function run_mgr() { @@ -1266,7 +1304,7 @@ function test_get_last_scrub_stamp() { run_osd $dir 0 || return 1 create_rbd_pool || return 1 wait_for_clean || return 1 - stamp=$(get_last_scrub_stamp 2.0) + stamp=$(get_last_scrub_stamp 1.0) test -n "$stamp" || return 1 teardown $dir || return 1 } @@ -1466,9 +1504,9 @@ function test_repair() { run_osd $dir 0 || return 1 create_rbd_pool || return 1 wait_for_clean || return 1 - repair 2.0 || return 1 + repair 1.0 || return 1 kill_daemons $dir KILL osd || return 1 - ! TIMEOUT=1 repair 2.0 || return 1 + ! TIMEOUT=1 repair 1.0 || return 1 teardown $dir || return 1 } ####################################################################### @@ -1506,9 +1544,9 @@ function test_pg_scrub() { run_osd $dir 0 || return 1 create_rbd_pool || return 1 wait_for_clean || return 1 - pg_scrub 2.0 || return 1 + pg_scrub 1.0 || return 1 kill_daemons $dir KILL osd || return 1 - ! TIMEOUT=1 pg_scrub 2.0 || return 1 + ! TIMEOUT=1 pg_scrub 1.0 || return 1 teardown $dir || return 1 } @@ -1581,7 +1619,7 @@ function wait_for_scrub() { local sname=${3:-last_scrub_stamp} for ((i=0; i < $TIMEOUT; i++)); do - if test "$last_scrub" != "$(get_last_scrub_stamp $pgid $sname)" ; then + if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then return 0 fi sleep 1 @@ -1598,7 +1636,7 @@ function test_wait_for_scrub() { run_osd $dir 0 || return 1 create_rbd_pool || return 1 wait_for_clean || return 1 - local pgid=2.0 + local pgid=1.0 ceph pg repair $pgid local last_scrub=$(get_last_scrub_stamp $pgid) wait_for_scrub $pgid "$last_scrub" || return 1 @@ -1796,6 +1834,7 @@ function test_flush_pg_stats() bytes_used=`ceph df detail --format=json | jq "$jq_filter.bytes_used"` test $raw_bytes_used > 0 || return 1 test $raw_bytes_used == $bytes_used || return 1 + teardown $dir } ####################################################################### @@ -1840,10 +1879,9 @@ function main() { if run $dir "$@" ; then code=0 else - display_logs $dir code=1 fi - teardown $dir || return 1 + teardown $dir $code || return 1 return $code } @@ -1858,7 +1896,7 @@ function run_tests() { export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one export CEPH_ARGS - CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " export CEPH_CONF=/dev/null @@ -1866,13 +1904,17 @@ function run_tests() { local dir=td/ceph-helpers for func in $funcs ; do - $func $dir || return 1 + if ! $func $dir; then + teardown $dir 1 + return 1 + fi done } if test "$1" = TESTS ; then shift run_tests "$@" + exit $? fi # NOTE: @@ -1915,6 +1957,37 @@ function jq_success() { return 1 } +function inject_eio() { + local pooltype=$1 + shift + local which=$1 + shift + local poolname=$1 + shift + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + if [ "$pooltype" != "ec" ]; then + shard_id="" + fi + set_config osd $osd_id filestore_debug_inject_read_err true || return 1 + local loop=0 + while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \ + inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do + loop=$(expr $loop + 1) + if [ $loop = "10" ]; then + return 1 + fi + sleep 1 + done +} + # Local Variables: # compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config" # End: diff --git a/ceph/qa/standalone/erasure-code/test-erasure-code.sh b/ceph/qa/standalone/erasure-code/test-erasure-code.sh index 95126ead0..6dd5833ad 100755 --- a/ceph/qa/standalone/erasure-code/test-erasure-code.sh +++ b/ceph/qa/standalone/erasure-code/test-erasure-code.sh @@ -57,7 +57,7 @@ function create_erasure_coded_pool() { ceph osd erasure-code-profile set myprofile \ crush-failure-domain=osd || return 1 - ceph osd pool create $poolname 12 12 erasure myprofile \ + create_pool $poolname 12 12 erasure myprofile \ || return 1 wait_for_clean || return 1 } @@ -164,7 +164,7 @@ function TEST_rados_put_get_lrc_advanced() { mapping=DD_ \ crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \ layers='[ [ "DDc", "" ] ]' || return 1 - ceph osd pool create $poolname 12 12 erasure $profile \ + create_pool $poolname 12 12 erasure $profile \ || return 1 rados_put_get $dir $poolname || return 1 @@ -182,7 +182,7 @@ function TEST_rados_put_get_lrc_kml() { plugin=lrc \ k=4 m=2 l=3 \ crush-failure-domain=osd || return 1 - ceph osd pool create $poolname 12 12 erasure $profile \ + create_pool $poolname 12 12 erasure $profile \ || return 1 rados_put_get $dir $poolname || return 1 @@ -202,7 +202,7 @@ function TEST_rados_put_get_isa() { ceph osd erasure-code-profile set profile-isa \ plugin=isa \ crush-failure-domain=osd || return 1 - ceph osd pool create $poolname 1 1 erasure profile-isa \ + create_pool $poolname 1 1 erasure profile-isa \ || return 1 rados_put_get $dir $poolname || return 1 @@ -222,7 +222,7 @@ function TEST_rados_put_get_jerasure() { plugin=jerasure \ k=4 m=2 \ crush-failure-domain=osd || return 1 - ceph osd pool create $poolname 12 12 erasure $profile \ + create_pool $poolname 12 12 erasure $profile \ || return 1 rados_put_get $dir $poolname || return 1 @@ -242,7 +242,7 @@ function TEST_rados_put_get_shec() { plugin=shec \ k=2 m=1 c=1 \ crush-failure-domain=osd || return 1 - ceph osd pool create $poolname 12 12 erasure $profile \ + create_pool $poolname 12 12 erasure $profile \ || return 1 rados_put_get $dir $poolname || return 1 @@ -318,7 +318,7 @@ function TEST_chunk_mapping() { mapping='_DD' \ crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1 ceph osd erasure-code-profile get remap-profile - ceph osd pool create remap-pool 12 12 erasure remap-profile \ + create_pool remap-pool 12 12 erasure remap-profile \ || return 1 # diff --git a/ceph/qa/standalone/erasure-code/test-erasure-eio.sh b/ceph/qa/standalone/erasure-code/test-erasure-eio.sh index 678088e94..b7880169e 100755 --- a/ceph/qa/standalone/erasure-code/test-erasure-eio.sh +++ b/ceph/qa/standalone/erasure-code/test-erasure-eio.sh @@ -60,7 +60,7 @@ function create_erasure_coded_pool() { plugin=jerasure \ k=2 m=1 \ crush-failure-domain=osd || return 1 - ceph osd pool create $poolname 1 1 erasure myprofile \ + create_pool $poolname 1 1 erasure myprofile \ || return 1 wait_for_clean || return 1 } @@ -142,22 +142,6 @@ function rados_put_get() { rm $dir/ORIGINAL } -function inject_eio() { - local objname=$1 - shift - local dir=$1 - shift - local shard_id=$1 - shift - - local poolname=pool-jerasure - local -a initial_osds=($(get_osds $poolname $objname)) - local osd_id=${initial_osds[$shard_id]} - set_config osd $osd_id filestore_debug_inject_read_err true || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \ - injectdataerr $poolname $objname $shard_id || return 1 -} - function rados_get_data_eio() { local dir=$1 shift @@ -170,11 +154,11 @@ function rados_get_data_eio() { # local poolname=pool-jerasure local objname=obj-eio-$$-$shard_id - inject_eio $objname $dir $shard_id || return 1 + inject_eio ec data $poolname $objname $dir $shard_id || return 1 rados_put_get $dir $poolname $objname $recovery || return 1 shard_id=$(expr $shard_id + 1) - inject_eio $objname $dir $shard_id || return 1 + inject_eio ec data $poolname $objname $dir $shard_id || return 1 # Now 2 out of 3 shards get EIO, so should fail rados_get $dir $poolname $objname fail || return 1 } diff --git a/ceph/qa/standalone/misc/test-ceph-helpers.sh b/ceph/qa/standalone/misc/test-ceph-helpers.sh index 3a6788eca..932fcf3fa 100755 --- a/ceph/qa/standalone/misc/test-ceph-helpers.sh +++ b/ceph/qa/standalone/misc/test-ceph-helpers.sh @@ -18,4 +18,4 @@ # GNU Library Public License for more details. # -$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS +$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@" diff --git a/ceph/qa/standalone/mon/misc.sh b/ceph/qa/standalone/mon/misc.sh index 8abf6c5b1..e025e0708 100755 --- a/ceph/qa/standalone/mon/misc.sh +++ b/ceph/qa/standalone/mon/misc.sh @@ -40,7 +40,7 @@ function TEST_osd_pool_get_set() { setup $dir || return 1 run_mon $dir a || return 1 create_rbd_pool || return 1 - ceph osd pool create $TEST_POOL 8 + create_pool $TEST_POOL 8 local flag for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do @@ -82,7 +82,7 @@ function TEST_osd_pool_get_set() { ! ceph osd pool set $TEST_POOL min_size 0 || return 1 local ecpool=erasepool - ceph osd pool create $ecpool 12 12 erasure default || return 1 + create_pool $ecpool 12 12 erasure default || return 1 #erasue pool size=k+m, min_size=k local size=$(ceph osd pool get $ecpool size|awk '{print $2}') local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}') diff --git a/ceph/qa/standalone/mon/mon-bind.sh b/ceph/qa/standalone/mon/mon-bind.sh index f87157c78..a4d774d55 100755 --- a/ceph/qa/standalone/mon/mon-bind.sh +++ b/ceph/qa/standalone/mon/mon-bind.sh @@ -136,7 +136,7 @@ function TEST_put_get() { run_osd $dir 1 || return 1 run_osd $dir 2 || return 1 - ceph osd pool create hello 8 || return 1 + create_pool hello 8 || return 1 echo "hello world" > $dir/hello rados --pool hello put foo $dir/hello || return 1 diff --git a/ceph/qa/standalone/mon/osd-erasure-code-profile.sh b/ceph/qa/standalone/mon/osd-erasure-code-profile.sh index 6bfc2587f..548061299 100755 --- a/ceph/qa/standalone/mon/osd-erasure-code-profile.sh +++ b/ceph/qa/standalone/mon/osd-erasure-code-profile.sh @@ -98,7 +98,7 @@ function TEST_rm() { grep "WRONG does not exist" || return 1 ceph osd erasure-code-profile set $profile || return 1 - ceph osd pool create poolname 12 12 erasure $profile || return 1 + create_pool poolname 12 12 erasure $profile || return 1 ! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1 grep "poolname.*using.*$profile" $dir/out || return 1 ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1 diff --git a/ceph/qa/standalone/mon/test_pool_quota.sh b/ceph/qa/standalone/mon/test_pool_quota.sh index dcf89fd26..85941a895 100755 --- a/ceph/qa/standalone/mon/test_pool_quota.sh +++ b/ceph/qa/standalone/mon/test_pool_quota.sh @@ -34,7 +34,7 @@ function TEST_pool_quota() { run_osd $dir 2 || return 1 local poolname=testquoa - ceph osd pool create $poolname 20 + create_pool $poolname 20 local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'` local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'` diff --git a/ceph/qa/standalone/osd/osd-dup.sh b/ceph/qa/standalone/osd/osd-dup.sh index a1bd1af5d..bcb0fdcc3 100755 --- a/ceph/qa/standalone/osd/osd-dup.sh +++ b/ceph/qa/standalone/osd/osd-dup.sh @@ -2,6 +2,8 @@ source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +[ `uname` = FreeBSD ] && exit 0 + function run() { local dir=$1 shift @@ -38,7 +40,7 @@ function TEST_filestore_to_bluestore() { sleep 5 - ceph osd pool create foo 16 + create_pool foo 16 # write some objects rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 diff --git a/ceph/qa/standalone/scrub/osd-recovery-scrub.sh b/ceph/qa/standalone/scrub/osd-recovery-scrub.sh new file mode 100755 index 000000000..ef9a3318a --- /dev/null +++ b/ceph/qa/standalone/scrub/osd-recovery-scrub.sh @@ -0,0 +1,129 @@ +#! /bin/bash +# +# Copyright (C) 2017 Red Hat +# +# Author: David Zafman +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +function TEST_recovery_scrub() { + local dir=$1 + local poolname=test + + TESTDATA="testdata.$$" + OSDS=8 + PGS=32 + OBJECTS=4 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Create a pool with $PGS pgs + create_pool $poolname $PGS $PGS + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1M count=50 + for i in $(seq 1 $OBJECTS) + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + ceph osd pool set $poolname size 4 + + pids="" + for pg in $(seq 0 $(expr $PGS - 1)) + do + run_in_background pids pg_scrub $poolid.$(echo "{ obase=16; $pg }" | bc | tr '[:upper:]' '[:lower:]') + done + ceph pg dump pgs + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + ERRORS=0 + pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') + pid=$(cat $pidfile) + if ! kill -0 $pid + then + echo "OSD crash occurred" + tail -100 $dir/osd.0.log + ERRORS=$(expr $ERRORS + 1) + fi + + kill_daemons $dir || return 1 + + declare -a err_strings + err_strings[0]="not scheduling scrubs due to active recovery" + # Test with these two strings after disabled check in OSD::sched_scrub() + #err_strings[0]="handle_scrub_reserve_request: failed to reserve remotely" + #err_strings[1]="sched_scrub: failed to reserve locally" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + grep "failed to reserve\|not scheduling scrubs" $dir/osd.${osd}.log + done + for err_string in "${err_strings[@]}" + do + found=false + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if grep "$err_string" $dir/osd.${osd}.log > /dev/null; + then + found=true + fi + done + if [ "$found" = "false" ]; then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + teardown $dir || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +main osd-recovery-scrub "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-recovery-scrub.sh" diff --git a/ceph/qa/standalone/scrub/osd-scrub-repair.sh b/ceph/qa/standalone/scrub/osd-scrub-repair.sh index 28db9476b..2aaaebd6e 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh @@ -30,7 +30,11 @@ fi getjson="no" # Ignore the epoch and filter out the attr '_' value because it has date information and won't match -jqfilter='.inconsistents | (.[].shards[].attrs[]? | select(.name == "_") | .value) |= "----Stripped-by-test----"' +if [ "$(jq --version 2>&1 | awk '{ print $3}')" = "1.3" ]; then # Not sure all versions that apply here + jqfilter='.inconsistents | (.[].shards[].attrs[] | select(.name == "_") | .value) |= "----Stripped-by-test----"' +else + jqfilter='.inconsistents | (.[].shards[].attrs[]? | select(.name == "_") | .value) |= "----Stripped-by-test----"' +fi sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)' # Remove items are not consistent across runs, the pg interval and client @@ -183,7 +187,7 @@ function create_ec_pool() { ceph osd erasure-code-profile set myprofile crush-failure-domain=osd $3 $4 $5 $6 $7 || return 1 - ceph osd pool create "$poolname" 1 1 erasure myprofile || return 1 + create_pool "$poolname" 1 1 erasure myprofile || return 1 if [ "$allow_overwrites" = "true" ]; then ceph osd pool set "$poolname" allow_ec_overwrites true || return 1 @@ -370,11 +374,11 @@ function unfound_erasure_coded() { # # it may take a bit to appear due to mon/mgr asynchrony for f in `seq 1 60`; do - ceph -s | grep "1/1 unfound" && break + ceph -s | grep "1/1 objects unfound" && break sleep 1 done ceph -s|grep "4 osds: 4 up, 4 in" || return 1 - ceph -s|grep "1/1 unfound" || return 1 + ceph -s|grep "1/1 objects unfound" || return 1 teardown $dir || return 1 } @@ -477,7 +481,7 @@ function TEST_list_missing_erasure_coded_overwrites() { function TEST_corrupt_scrub_replicated() { local dir=$1 local poolname=csr_pool - local total_objs=15 + local total_objs=16 setup $dir || return 1 run_mon $dir a --osd_pool_default_size=2 || return 1 @@ -487,8 +491,8 @@ function TEST_corrupt_scrub_replicated() { create_rbd_pool || return 1 wait_for_clean || return 1 - ceph osd pool create foo 1 || return 1 - ceph osd pool create $poolname 1 1 || return 1 + create_pool foo 1 || return 1 + create_pool $poolname 1 1 || return 1 wait_for_clean || return 1 for i in $(seq 1 $total_objs) ; do @@ -593,22 +597,22 @@ function TEST_corrupt_scrub_replicated() { 15) objectstore_tool $dir $osd $objname rm-attr _ || return 1 + ;; + + 16) + objectstore_tool $dir 0 $objname rm-attr snapset || return 1 + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1 esac done local pg=$(get_pg $poolname ROBJ0) - set_config osd 0 filestore_debug_inject_read_err true || return 1 - set_config osd 1 filestore_debug_inject_read_err true || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \ - injectdataerr $poolname ROBJ11 || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \ - injectmdataerr $poolname ROBJ12 || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \ - injectmdataerr $poolname ROBJ13 || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \ - injectdataerr $poolname ROBJ13 || return 1 + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 pg_scrub $pg @@ -630,19 +634,23 @@ function TEST_corrupt_scrub_replicated() { { "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -661,12 +669,14 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])", @@ -688,12 +698,14 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:d60617f9:::ROBJ13:head(47'55 osd.0.0:54 dirty|omap|data_digest|omap_digest s 7 uv 39 dd 2ddbf8f5 od 6441854d alloc_hint [0 0 0])", @@ -713,17 +725,38 @@ function TEST_corrupt_scrub_replicated() { "shards": [ { "size": 7, + "attrs": [ + { + "Base64": false, + "value": "", + "name": "_" + }, + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "errors": [ "oi_attr_corrupted" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, + "attrs": [ + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "union_shard_errors": [ @@ -756,7 +789,8 @@ function TEST_corrupt_scrub_replicated() { ], "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -770,16 +804,15 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])", "union_shard_errors": [ "oi_attr_missing" ], - "errors": [ - "attr_name_mismatch" - ], + "errors": [], "object": { "version": 45, "snap": "head", @@ -788,18 +821,71 @@ function TEST_corrupt_scrub_replicated() { "name": "ROBJ15" } }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + } + ], + "errors": [ + "ss_attr_missing" + ], + "osd": 0, + "primary": false, + "size": 7 + }, + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + }, + { + "Base64": false, + "name": "snapset", + "value": "bad-val" + } + ], + "errors": [ + "ss_attr_corrupted" + ], + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "ss_attr_missing", + "ss_attr_corrupted" + ] + }, { "shards": [ { "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])", @@ -842,7 +928,8 @@ function TEST_corrupt_scrub_replicated() { ], "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -869,17 +956,18 @@ function TEST_corrupt_scrub_replicated() { ], "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], - "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", + "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 66 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 62, + "version": 66, "snap": "head", "locator": "", "nspace": "", @@ -889,50 +977,31 @@ function TEST_corrupt_scrub_replicated() { { "shards": [ { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], - "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", + "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 67 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", "size": 1, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])", "size": 1, - "errors": [], - "osd": 1 + "errors": [ + "obj_size_oi_mismatch" + ], + "osd": 1, + "primary": true } ], - "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", - "union_shard_errors": [], + "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 67 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", + "union_shard_errors": [ + "obj_size_oi_mismatch" + ], "errors": [ - "object_info_inconsistency", - "attr_value_mismatch" + "object_info_inconsistency" ], "object": { - "version": 63, + "version": 67, "snap": "head", "locator": "", "nspace": "", @@ -971,16 +1040,10 @@ EOF objectstore_tool $dir 1 $objname set-attr _ $dir/oi rm $dir/oi - set_config osd 0 filestore_debug_inject_read_err true || return 1 - set_config osd 1 filestore_debug_inject_read_err true || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \ - injectdataerr $poolname ROBJ11 || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \ - injectmdataerr $poolname ROBJ12 || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \ - injectmdataerr $poolname ROBJ13 || return 1 - CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \ - injectdataerr $poolname ROBJ13 || return 1 + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 pg_deep_scrub $pg rados list-inconsistent-pg $poolname > $dir/json || return 1 @@ -1003,7 +1066,8 @@ EOF "omap_digest": "0xf5fba2c6", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2d4a11c2", @@ -1011,15 +1075,18 @@ EOF "size": 9, "errors": [ "data_digest_mismatch_oi", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])", "union_shard_errors": [ "data_digest_mismatch_oi", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "data_digest_mismatch", @@ -1042,7 +1109,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1051,7 +1119,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:b1f19cbd:::ROBJ10:head(47'51 osd.0.0:50 dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [0 0 0])", @@ -1074,14 +1143,16 @@ EOF "omap_digest": "0xa03cef03", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [ "read_error" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:87abbf36:::ROBJ11:head(47'48 osd.0.0:47 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od a03cef03 alloc_hint [0 0 0])", @@ -1103,14 +1174,16 @@ EOF "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0x067f306a", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])", @@ -1132,14 +1205,16 @@ EOF "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [ "read_error" ], - "osd": 1 + "osd": 1, + "primary": true } ], "union_shard_errors": [ @@ -1158,22 +1233,43 @@ EOF { "shards": [ { + "attrs": [ + { + "Base64": false, + "value": "", + "name": "_" + }, + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "data_digest": "0x2ddbf8f5", "omap_digest": "0x4f14f849", "size": 7, "errors": [ "oi_attr_corrupted" ], - "osd": 0 + "osd": 0, + "primary": false }, { + "attrs": [ + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "data_digest": "0x2ddbf8f5", "omap_digest": "0x4f14f849", "size": 7, "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "union_shard_errors": [ @@ -1208,7 +1304,8 @@ EOF "omap_digest": "0x2d2a4d6e", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -1224,16 +1321,15 @@ EOF "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])", "union_shard_errors": [ "oi_attr_missing" ], - "errors": [ - "attr_name_mismatch" - ], + "errors": [], "object": { "version": 45, "snap": "head", @@ -1242,6 +1338,61 @@ EOF "name": "ROBJ15" } }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + } + ], + "data_digest": "0x2ddbf8f5", + "errors": [ + "ss_attr_missing" + ], + "omap_digest": "0x8b699207", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + }, + { + "Base64": false, + "name": "snapset", + "value": "bad-val" + } + ], + "data_digest": "0x2ddbf8f5", + "errors": [ + "ss_attr_corrupted" + ], + "omap_digest": "0x8b699207", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "ss_attr_missing", + "ss_attr_corrupted" + ] + }, { "shards": [ { @@ -1251,14 +1402,16 @@ EOF "errors": [ "data_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0xf8e11918", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:e97ce31e:::ROBJ2:head(47'56 osd.0.0:55 dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [0 0 0])", @@ -1283,13 +1436,15 @@ EOF "omap_digest": "0x00b35dfd", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])", @@ -1314,14 +1469,16 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0xe2d46ea4", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f4981d31:::ROBJ4:head(47'58 osd.0.0:57 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [0 0 0])", @@ -1346,7 +1503,8 @@ EOF "omap_digest": "0x1a862a41", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1355,7 +1513,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f4bfd4d1:::ROBJ5:head(47'59 osd.0.0:58 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [0 0 0])", @@ -1382,14 +1541,16 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0x179c919f", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:a53c12e8:::ROBJ6:head(47'50 osd.0.0:49 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [0 0 0])", @@ -1414,7 +1575,8 @@ EOF "omap_digest": "0xefced57a", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1423,7 +1585,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:8b55fa4b:::ROBJ7:head(47'49 osd.0.0:48 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [0 0 0])", @@ -1470,7 +1633,8 @@ EOF "omap_digest": "0xd6be81dc", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -1499,17 +1663,18 @@ EOF "omap_digest": "0xd6be81dc", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], - "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", + "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 66 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 62, + "version": 66, "snap": "head", "locator": "", "nspace": "", @@ -1519,54 +1684,35 @@ EOF { "shards": [ { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, - "errors": [], - "osd": 0 + "errors": [ + "obj_size_oi_mismatch" + ], + "osd": 0, + "primary": false }, { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], - "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", + "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 68 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], - "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", - "union_shard_errors": [], + "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 68 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", + "union_shard_errors": [ + "obj_size_oi_mismatch" + ], "errors": [ - "object_info_inconsistency", - "attr_value_mismatch" + "object_info_inconsistency" ], "object": { - "version": 64, + "version": 68, "snap": "head", "locator": "", "nspace": "", @@ -1615,7 +1761,7 @@ function corrupt_scrub_erasure() { fi done create_rbd_pool || return 1 - ceph osd pool create foo 1 + create_pool foo 1 create_ec_pool $poolname $allow_overwrites k=2 m=1 stripe_unit=2K --force || return 1 wait_for_clean || return 1 @@ -1690,26 +1836,31 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, "shard": 0, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true }, { "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -1728,20 +1879,23 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "shard": 0, "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true }, { "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:b197b25d:::EOBJ3:head(37'3 client.4251.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -1790,10 +1944,12 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "osd": 1, + "primary": true, "shard": 0, "errors": [], "size": 2048, @@ -1827,6 +1983,7 @@ function corrupt_scrub_erasure() { }, { "osd": 2, + "primary": false, "shard": 1, "errors": [], "size": 2048, @@ -1879,26 +2036,31 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 4096, "shard": 0, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true }, { "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -1953,16 +2115,19 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, "shard": 0, "errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x00000000", @@ -1970,13 +2135,15 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9175b684:::EOBJ1:head(27'1 client.4155.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -1997,14 +2164,16 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "shard": 0, "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x00000000", @@ -2012,7 +2181,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:b197b25d:::EOBJ3:head(41'3 client.4199.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2063,7 +2233,8 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -2098,7 +2269,8 @@ EOF "size": 2048, "errors": [], "shard": 0, - "osd": 1 + "osd": 1, + "primary": true }, { "attrs": [ @@ -2133,7 +2305,8 @@ EOF "size": 2048, "errors": [], "shard": 1, - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:5e723e06:::EOBJ4:head(48'6 client.4223.0:1 dirty|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2158,17 +2331,20 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x00000000", "omap_digest": "0xffffffff", "size": 4096, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "shard": 0, - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x00000000", @@ -2176,12 +2352,14 @@ EOF "size": 2048, "errors": [], "shard": 1, - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4288.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2212,16 +2390,19 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, "shard": 0, "errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x04cfa72f", @@ -2229,13 +2410,15 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2256,7 +2439,8 @@ EOF "ec_hash_error" ], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x04cfa72f", @@ -2264,7 +2448,8 @@ EOF "size": 2048, "errors": [], "shard": 0, - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x04cfa72f", @@ -2272,7 +2457,8 @@ EOF "size": 2048, "errors": [], "shard": 1, - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9babd184:::EOBJ2:head(29'2 client.4217.0:1 dirty|data_digest|omap_digest s 7 uv 2 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2296,10 +2482,12 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "osd": 1, + "primary": true, "shard": 0, "errors": [ "missing" @@ -2311,7 +2499,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:b197b25d:::EOBJ3:head(37'3 client.4251.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2362,10 +2551,12 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "osd": 1, + "primary": true, "shard": 0, "errors": [], "size": 2048, @@ -2401,6 +2592,7 @@ EOF }, { "osd": 2, + "primary": false, "shard": 1, "errors": [], "size": 2048, @@ -2457,16 +2649,19 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 4096, "shard": 0, "errors": [ "size_mismatch_oi", - "ec_size_error" + "ec_size_error", + "obj_size_oi_mismatch" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x04cfa72f", @@ -2474,13 +2669,15 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ "size_mismatch_oi", - "ec_size_error" + "ec_size_error", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2543,13 +2740,14 @@ function TEST_periodic_scrub_replicated() { setup $dir || return 1 run_mon $dir a --osd_pool_default_size=2 || return 1 run_mgr $dir x || return 1 - local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0" + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0" run_osd $dir 0 $ceph_osd_args || return 1 run_osd $dir 1 $ceph_osd_args || return 1 create_rbd_pool || return 1 wait_for_clean || return 1 - ceph osd pool create $poolname 1 1 || return 1 + create_pool $poolname 1 1 || return 1 wait_for_clean || return 1 local osd=0 @@ -2573,6 +2771,7 @@ function TEST_periodic_scrub_replicated() { # Make sure bad object found rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + flush_pg_stats local last_scrub=$(get_last_scrub_stamp $pg) # Fake a schedule scrub CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) \ @@ -2589,10 +2788,10 @@ function TEST_periodic_scrub_replicated() { # Can't upgrade with this set ceph osd set nodeep-scrub # Let map change propagate to OSDs - sleep 2 + flush pg_stats + sleep 5 # Fake a schedule scrub - local last_scrub=$(get_last_scrub_stamp $pg) CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) \ trigger_scrub $pg || return 1 # Wait for schedule regular scrub @@ -2609,12 +2808,9 @@ function TEST_periodic_scrub_replicated() { # Bad object still known rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + flush_pg_stats # Request a regular scrub and it will be done - local scrub_backoff_ratio=$(get_config osd ${primary} osd_scrub_backoff_ratio) - set_config osd ${primary} osd_scrub_backoff_ratio 0 pg_scrub $pg - sleep 1 - set_config osd ${primary} osd_scrub_backoff_ratio $scrub_backoff_ratio grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1 # deep-scrub error is no longer present diff --git a/ceph/qa/standalone/scrub/osd-scrub-snaps.sh b/ceph/qa/standalone/scrub/osd-scrub-snaps.sh index 020363683..4c03bdb9e 100755 --- a/ceph/qa/standalone/scrub/osd-scrub-snaps.sh +++ b/ceph/qa/standalone/scrub/osd-scrub-snaps.sh @@ -46,7 +46,8 @@ function TEST_scrub_snaps() { wait_for_clean || return 1 # Create a pool with a single pg - ceph osd pool create $poolname 1 1 + create_pool $poolname 1 1 + wait_for_clean || return 1 poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 @@ -449,15 +450,14 @@ EOF err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size" err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone" err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033" - err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 23 errors" + err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 22 errors" err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head can't decode 'snapset' attr buffer" - err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj12:1 has no oi or legacy_snaps; cannot convert 1=[[]1[]]:[[]1[]].stray_clone_snaps=[{]1=[[]1[]][}]" - for i in `seq 0 ${#err_strings[@]}` + for err_string in "${err_strings[@]}" do - if ! grep "${err_strings[$i]}" $dir/osd.0.log > /dev/null; + if ! grep "$err_string" $dir/osd.0.log > /dev/null; then - echo "Missing log message '${err_strings[$i]}'" + echo "Missing log message '$err_string'" ERRORS=$(expr $ERRORS + 1) fi done diff --git a/ceph/qa/standalone/special/test-failure.sh b/ceph/qa/standalone/special/test-failure.sh new file mode 100755 index 000000000..cede887d2 --- /dev/null +++ b/ceph/qa/standalone/special/test-failure.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -ex + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_failure_log() { + local dir=$1 + + cat > $dir/test_failure.log << EOF +This is a fake log file +* +* +* +* +* +This ends the fake log file +EOF + + # Test fails + return 1 +} + +function TEST_failure_core_only() { + local dir=$1 + + run_mon $dir a || return 1 + kill_daemons $dir SEGV mon 5 + return 0 +} + +main test_failure "$@" diff --git a/ceph/qa/suites/powercycle/osd/whitelist_health.yaml b/ceph/qa/suites/powercycle/osd/whitelist_health.yaml index 740908301..0235037b5 100644 --- a/ceph/qa/suites/powercycle/osd/whitelist_health.yaml +++ b/ceph/qa/suites/powercycle/osd/whitelist_health.yaml @@ -2,3 +2,4 @@ overrides: ceph: log-whitelist: - \(MDS_TRIM\) + - Behind on trimming diff --git a/ceph/qa/suites/rados/upgrade/jewel-x-singleton/1-jewel-install/jewel.yaml b/ceph/qa/suites/rados/upgrade/jewel-x-singleton/1-jewel-install/jewel.yaml index c138b9be4..31ca3e502 100644 --- a/ceph/qa/suites/rados/upgrade/jewel-x-singleton/1-jewel-install/jewel.yaml +++ b/ceph/qa/suites/rados/upgrade/jewel-x-singleton/1-jewel-install/jewel.yaml @@ -8,4 +8,6 @@ tasks: - ceph: skip_mgr_daemons: true add_osds_to_crush: true + log-whitelist: + - required past_interval bounds are empty - print: "**** done ceph" diff --git a/ceph/qa/suites/upgrade/jewel-x/parallel/0-cluster/start.yaml b/ceph/qa/suites/upgrade/jewel-x/parallel/0-cluster/start.yaml index 20e81be24..314562632 100644 --- a/ceph/qa/suites/upgrade/jewel-x/parallel/0-cluster/start.yaml +++ b/ceph/qa/suites/upgrade/jewel-x/parallel/0-cluster/start.yaml @@ -24,6 +24,8 @@ overrides: - scrub mismatch - ScrubResult - wrongly marked - - (MDS_FAILED) + - \(MDS_FAILED\) + - \(OBJECT_ + - is unresponsive conf: fs: xfs diff --git a/ceph/qa/suites/upgrade/jewel-x/parallel/1-jewel-install/jewel.yaml b/ceph/qa/suites/upgrade/jewel-x/parallel/1-jewel-install/jewel.yaml index a7ff9f485..a367ef37c 100644 --- a/ceph/qa/suites/upgrade/jewel-x/parallel/1-jewel-install/jewel.yaml +++ b/ceph/qa/suites/upgrade/jewel-x/parallel/1-jewel-install/jewel.yaml @@ -23,6 +23,7 @@ tasks: - \(PG_ - Monitor daemon marked osd - Behind on trimming + - is unresponsive conf: global: mon warn on pool no app: false diff --git a/ceph/qa/suites/upgrade/jewel-x/parallel/2-workload/rados_loadgenbig.yaml b/ceph/qa/suites/upgrade/jewel-x/parallel/2-workload/rados_loadgenbig.yaml deleted file mode 100644 index a91ed2cfa..000000000 --- a/ceph/qa/suites/upgrade/jewel-x/parallel/2-workload/rados_loadgenbig.yaml +++ /dev/null @@ -1,11 +0,0 @@ -meta: -- desc: | - generate read/write load with rados objects ranging from 1MB to 25MB -workload: - full_sequential: - - workunit: - branch: jewel - clients: - client.0: - - rados/load-gen-big.sh - - print: "**** done rados/load-gen-big.sh 2-workload" diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split/1-jewel-install/jewel.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split/1-jewel-install/jewel.yaml index c138b9be4..31ca3e502 100644 --- a/ceph/qa/suites/upgrade/jewel-x/stress-split/1-jewel-install/jewel.yaml +++ b/ceph/qa/suites/upgrade/jewel-x/stress-split/1-jewel-install/jewel.yaml @@ -8,4 +8,6 @@ tasks: - ceph: skip_mgr_daemons: true add_osds_to_crush: true + log-whitelist: + - required past_interval bounds are empty - print: "**** done ceph" diff --git a/ceph/qa/tasks/ceph_deploy.py b/ceph/qa/tasks/ceph_deploy.py index eb2875252..b22c32113 100644 --- a/ceph/qa/tasks/ceph_deploy.py +++ b/ceph/qa/tasks/ceph_deploy.py @@ -297,7 +297,6 @@ def build_ceph_cluster(ctx, config): # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) - execute_ceph_deploy(mgr_create) # create-keys is explicit now # http://tracker.ceph.com/issues/16036 @@ -307,6 +306,9 @@ def build_ceph_cluster(ctx, config): '--id', remote.shortname]) estatus_gather = execute_ceph_deploy(gather_keys) + + execute_ceph_deploy(mgr_create) + if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: diff --git a/ceph/qa/tasks/rgw.py b/ceph/qa/tasks/rgw.py index d6a818170..b6050c87a 100644 --- a/ceph/qa/tasks/rgw.py +++ b/ceph/qa/tasks/rgw.py @@ -142,12 +142,12 @@ def create_pools(ctx, clients): if ctx.rgw.ec_data_pool: create_ec_pool(remote, data_pool, client, 64, - ctx.rgw.erasure_code_profile, cluster_name) + ctx.rgw.erasure_code_profile, cluster_name, 'rgw') else: - create_replicated_pool(remote, data_pool, 64, cluster_name) + create_replicated_pool(remote, data_pool, 64, cluster_name, 'rgw') if ctx.rgw.cache_pools: create_cache_pool(remote, data_pool, data_pool + '.cache', 64, - 64*1024*1024, cluster_name) + 64*1024*1024, cluster_name, 'rgw') log.debug('Pools created') yield diff --git a/ceph/qa/tasks/rgw_multisite.py b/ceph/qa/tasks/rgw_multisite.py index cd120c441..74c1f3f9d 100644 --- a/ceph/qa/tasks/rgw_multisite.py +++ b/ceph/qa/tasks/rgw_multisite.py @@ -409,9 +409,9 @@ def create_zone_pools(ctx, zone): pool_name = pool_config['val']['data_pool'] if ctx.rgw.ec_data_pool: create_ec_pool(gateway.remote, pool_name, zone.name, 64, - ctx.rgw.erasure_code_profile, cluster.name) + ctx.rgw.erasure_code_profile, cluster.name, 'rgw') else: - create_replicated_pool(gateway.remote, pool_name, 64, cluster.name) + create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw') def configure_zone_compression(zone, compression): """ Set compression type in the zone's default-placement """ diff --git a/ceph/qa/tasks/util/rados.py b/ceph/qa/tasks/util/rados.py index 88ee45aa1..09388ab38 100644 --- a/ceph/qa/tasks/util/rados.py +++ b/ceph/qa/tasks/util/rados.py @@ -24,20 +24,28 @@ def rados(ctx, remote, cmd, wait=True, check_status=False): else: return proc -def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph"): +def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None): remote.run(args=['sudo', 'ceph'] + cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name]) remote.run(args=[ 'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ]) -def create_replicated_pool(remote, name, pgnum, cluster_name="ceph"): +def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None): remote.run(args=[ 'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ]) -def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"): +def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph", application=None): remote.run(args=[ 'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name ]) @@ -45,6 +53,10 @@ def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name=" 'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name, str(size), '--cluster', cluster_name ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ]) def cmd_erasure_code_profile(profile_name, profile): """ diff --git a/ceph/qa/workunits/mon/crush_ops.sh b/ceph/qa/workunits/mon/crush_ops.sh index 1ef6e5cc1..44bc70f4e 100755 --- a/ceph/qa/workunits/mon/crush_ops.sh +++ b/ceph/qa/workunits/mon/crush_ops.sh @@ -26,6 +26,10 @@ ceph osd crush set-device-class ssd osd.0 ceph osd crush set-device-class hdd osd.1 ceph osd crush rule create-replicated foo-ssd default host ssd ceph osd crush rule create-replicated foo-hdd default host hdd +ceph osd crush rule ls-by-class ssd | grep 'foo-ssd' +ceph osd crush rule ls-by-class ssd | expect_false grep 'foo-hdd' +ceph osd crush rule ls-by-class hdd | grep 'foo-hdd' +ceph osd crush rule ls-by-class hdd | expect_false grep 'foo-ssd' ceph osd erasure-code-profile set ec-foo-ssd crush-device-class=ssd m=2 k=2 ceph osd pool create ec-foo 2 erasure ec-foo-ssd @@ -33,6 +37,16 @@ ceph osd pool rm ec-foo ec-foo --yes-i-really-really-mean-it ceph osd crush rule ls | grep foo +ceph osd crush rule rename foo foo-asdf +ceph osd crush rule rename bar bar-asdf +ceph osd crush rule ls | grep 'foo-asdf' +ceph osd crush rule ls | grep 'bar-asdf' +ceph osd crush rule rm foo 2>&1 | grep 'does not exist' +ceph osd crush rule rm bar 2>&1 | grep 'does not exist' +ceph osd crush rule rename foo-asdf foo +ceph osd crush rule rename bar-asdf bar +ceph osd crush rule ls | expect_false grep 'foo-asdf' +ceph osd crush rule ls | expect_false grep 'bar-asdf' ceph osd crush rule rm foo ceph osd crush rule rm foo # idempotent ceph osd crush rule rm bar diff --git a/ceph/qa/workunits/rbd/rbd_mirror.sh b/ceph/qa/workunits/rbd/rbd_mirror.sh index 021cbaf03..04a03a66e 100755 --- a/ceph/qa/workunits/rbd/rbd_mirror.sh +++ b/ceph/qa/workunits/rbd/rbd_mirror.sh @@ -216,6 +216,24 @@ compare_images ${POOL} ${clone_image} expect_failure "is non-primary" clone_image ${CLUSTER1} ${PARENT_POOL} \ ${parent_image} ${parent_snap} ${POOL} ${clone_image}1 +testlog "TEST: data pool" +dp_image=test_data_pool +create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'master_position' +compare_images ${POOL} ${dp_image}@snap1 +compare_images ${POOL} ${dp_image}@snap2 +compare_images ${POOL} ${dp_image} + testlog "TEST: disable mirroring / delete non-primary image" image2=test2 image3=test3 diff --git a/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh b/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh index f825bec85..23216711e 100755 --- a/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/ceph/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -867,6 +867,16 @@ request_resync_image() rbd --cluster=${cluster} -p ${pool} mirror image resync ${image} } +get_image_data_pool() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} -p ${pool} info ${image} | + awk '$1 == "data_pool:" {print $2}' +} + # # Main # diff --git a/ceph/src/.git_version b/ceph/src/.git_version index a0db25312..c345fdc66 100644 --- a/ceph/src/.git_version +++ b/ceph/src/.git_version @@ -1,2 +1,2 @@ -a5f84b37668fc8e03165aaf5cbb380c78e4deba4 -v12.1.4 +32ce2a3ae5239ee33d6150705cdb24d43bab910c +v12.2.0 diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py index 8ff2faf4d..944a4343d 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/api.py @@ -3,11 +3,45 @@ API for CRUD lvm tag operations. Follows the Ceph LVM tag naming convention that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides set of utilities for interacting with LVM. """ -import json from ceph_volume import process from ceph_volume.exceptions import MultipleLVsError, MultipleVGsError +def _output_parser(output, fields): + """ + Newer versions of LVM allow ``--reportformat=json``, but older versions, + like the one included in Xenial do not. LVM has the ability to filter and + format its output so we assume the output will be in a format this parser + can handle (using ',' as a delimiter) + + :param fields: A string, possibly using ',' to group many items, as it + would be used on the CLI + :param output: The CLI output from the LVM call + """ + field_items = fields.split(',') + report = [] + for line in output: + # clear the leading/trailing whitespace + line = line.strip() + + # remove the extra '"' in each field + line = line.replace('"', '') + + # prevent moving forward with empty contents + if not line: + continue + + # spliting on ';' because that is what the lvm call uses as + # '--separator' + output_items = [i.strip() for i in line.split(';')] + # map the output to the fiels + report.append( + dict(zip(field_items, output_items)) + ) + + return report + + def parse_tags(lv_tags): """ Return a dictionary mapping of all the tags associated with @@ -37,49 +71,22 @@ def parse_tags(lv_tags): def get_api_vgs(): """ - Return the list of group volumes available in the system using flags to include common - metadata associated with them + Return the list of group volumes available in the system using flags to + include common metadata associated with them - Command and sample JSON output, should look like:: + Command and sample delimeted output, should look like:: - $ sudo vgs --reportformat=json - { - "report": [ - { - "vg": [ - { - "vg_name":"VolGroup00", - "pv_count":"1", - "lv_count":"2", - "snap_count":"0", - "vg_attr":"wz--n-", - "vg_size":"38.97g", - "vg_free":"0 "}, - { - "vg_name":"osd_vg", - "pv_count":"3", - "lv_count":"1", - "snap_count":"0", - "vg_attr":"wz--n-", - "vg_size":"32.21g", - "vg_free":"9.21g" - } - ] - } - ] - } + $ sudo vgs --noheadings --separator=';' \ + -o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free + ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m + osd_vg;3;1;0;wz--n-;29.21g;9.21g """ + fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free' stdout, stderr, returncode = process.call( - [ - 'sudo', 'vgs', '--reportformat=json' - ] + ['sudo', 'vgs', '--noheadings', '--separator=";"', '-o', fields] ) - report = json.loads(''.join(stdout)) - for report_item in report.get('report', []): - # is it possible to get more than one item in "report" ? - return report_item['vg'] - return [] + return _output_parser(stdout, fields) def get_api_lvs(): @@ -87,37 +94,18 @@ def get_api_lvs(): Return the list of logical volumes available in the system using flags to include common metadata associated with them - Command and sample JSON output, should look like:: + Command and delimeted output, should look like:: - $ sudo lvs -o lv_tags,lv_path,lv_name,vg_name --reportformat=json - { - "report": [ - { - "lv": [ - { - "lv_tags":"", - "lv_path":"/dev/VolGroup00/LogVol00", - "lv_name":"LogVol00", - "vg_name":"VolGroup00"}, - { - "lv_tags":"ceph.osd_fsid=aaa-fff-0000,ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0", - "lv_path":"/dev/osd_vg/OriginLV", - "lv_name":"OriginLV", - "vg_name":"osd_vg" - } - ] - } - ] - } + $ sudo lvs --noheadings --separator=';' -o lv_tags,lv_path,lv_name,vg_name + ;/dev/ubuntubox-vg/root;root;ubuntubox-vg + ;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg """ + fields = 'lv_tags,lv_path,lv_name,vg_name' stdout, stderr, returncode = process.call( - ['sudo', 'lvs', '-o', 'lv_tags,lv_path,lv_name,vg_name', '--reportformat=json']) - report = json.loads(''.join(stdout)) - for report_item in report.get('report', []): - # is it possible to get more than one item in "report" ? - return report_item['lv'] - return [] + ['sudo', 'lvs', '--noheadings', '--separator=";"', '-o', fields] + ) + return _output_parser(stdout, fields) def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_tags=None): diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py index 599bbe6e0..b4e4ee3ad 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/common.py @@ -1,3 +1,4 @@ +from ceph_volume.util import arg_validators import argparse @@ -14,12 +15,13 @@ def common_parser(prog, description): required_args = parser.add_argument_group('required arguments') parser.add_argument( '--journal', - help='A logical group name, path to a logical volume, or path to a device', + help='A logical volume (vg_name/lv_name), or path to a device', ) required_args.add_argument( '--data', required=True, - help='A logical group name or a path to a logical volume', + type=arg_validators.LVPath(), + help='A logical volume (vg_name/lv_name) for OSD data', ) parser.add_argument( '--journal-size', diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py index 1d3f6a3b6..8c747f342 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/create.py @@ -28,28 +28,18 @@ class Create(object): all the metadata to the logical volumes using LVM tags, and starting the OSD daemon. - Most basic Usage looks like (journal will be collocated from the same volume group): - - ceph-volume lvm create --data {volume group name} - - Example calls for supported scenarios: - Dedicated volume group for Journal(s) - ------------------------------------- + Filestore + --------- Existing logical volume (lv) or device: - ceph-volume lvm create --data {logical volume} --journal /path/to/{lv}|{device} + ceph-volume lvm create --filestore --data {vg name/lv name} --journal /path/to/device Or: - ceph-volume lvm create --data {data volume group} --journal {journal volume group} - - Collocated (same group) for data and journal - -------------------------------------------- - - ceph-volume lvm create --data {volume group} + ceph-volume lvm create --filestore --data {vg name/lv name} --journal {vg name/lv name} """) parser = create_parser( diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py index bd84aab18..a9630ce48 100644 --- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py +++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py @@ -9,20 +9,6 @@ from . import api from .common import prepare_parser -def canonical_device_path(device): - """ - Ensure that a device is canonical (full path) and that it exists so that - it can be used throughout the prepare/activate process - """ - # FIXME: this is obviously super naive - inferred = os.path.join('/dev', device) - if os.path.exists(os.path.abspath(device)): - return device - elif os.path.exists(inferred): - return inferred - raise RuntimeError('Selected device does not exist: %s' % device) - - def prepare_filestore(device, journal, secrets, id_=None, fsid=None): """ :param device: The name of the volume group or lvm to work with @@ -65,6 +51,19 @@ class Prepare(object): def __init__(self, argv): self.argv = argv + def get_journal_lv(self, argument): + """ + Perform some parsing of the value of ``--journal`` so that the process + can determine correctly if it got a device path or an lv + :param argument: The value of ``--journal``, that will need to be split + to retrieve the actual lv + """ + try: + vg_name, lv_name = argument.split('/') + except (ValueError, AttributeError): + return None + return api.get_lv(lv_name=lv_name, vg_name=vg_name) + @decorators.needs_root def prepare(self, args): # FIXME we don't allow re-using a keyring, we always generate one for the @@ -78,66 +77,40 @@ class Prepare(object): #osd_id = args.osd_id or prepare_utils.create_id(fsid) # allow re-using an id, in case a prepare failed osd_id = args.osd_id or prepare_utils.create_id(fsid, json.dumps(secrets)) - journal_name = "journal_%s" % fsid - osd_name = "osd_%s" % fsid - + vg_name, lv_name = args.data.split('/') if args.filestore: - data_vg = api.get_vg(vg_name=args.data) - data_lv = api.get_lv(lv_name=args.data) - journal_vg = api.get_vg(vg_name=args.journal) - journal_lv = api.get_lv(lv_name=args.journal) - journal_device = None - # it is possible to pass a device as a journal that is not - # an actual logical volume (or group) - if not args.journal: - if data_lv: - raise RuntimeError('--journal is required when not using a vg for OSD data') - # collocated: carve out the journal from the data vg - if data_vg: - journal_lv = api.create_lv( - name=journal_name, - group=data_vg.name, - size=args.journal_size, - osd_fsid=fsid, - osd_id=osd_id, - type='journal', - cluster_fsid=cluster_fsid - ) + data_lv = api.get_lv(lv_name=lv_name, vg_name=vg_name) - # if a volume group was defined for the journal create that first - if journal_vg: - journal_lv = api.create_lv( - name=journal_name, - group=args.journal, - size=args.journal_size, - osd_fsid=fsid, - osd_id=osd_id, - type='journal', - cluster_fsid=cluster_fsid - ) - if journal_lv: - journal_device = journal_lv.lv_path - # The journal is probably a device, not in LVM - elif args.journal: - journal_device = canonical_device_path(args.journal) - # At this point we must have a journal_lv or a journal device - # now create the osd from the group if that was found - if data_vg: - # XXX make sure that a there aren't more OSDs than physical - # devices from this volume group - data_lv = api.create_lv( - name=osd_name, - group=args.data, - osd_fsid=fsid, - osd_id=osd_id, - type='data', - journal_device=journal_device, - cluster_fsid=cluster_fsid - ) # we must have either an existing data_lv or a newly created, so lets make # sure that the tags are correct if not data_lv: raise RuntimeError('no data logical volume found with: %s' % args.data) + + if not args.journal: + raise RuntimeError('--journal is required when using --filestore') + journal_device = None + journal_lv = self.get_journal_lv(args.journal) + + # check if we have an actual path to a device, which is allowed + if not journal_lv: + if os.path.exists(args.journal): + journal_device = args.journal + else: + raise RuntimeError( + '--journal specified an invalid or non-existent device: %s' % args.journal + ) + # Otherwise the journal_device is the path to the lv + else: + journal_device = journal_lv.lv_path + journal_lv.set_tags({ + 'ceph.type': 'journal', + 'ceph.osd_fsid': fsid, + 'ceph.osd_id': osd_id, + 'ceph.cluster_fsid': cluster_fsid, + 'ceph.journal_device': journal_device, + 'ceph.data_device': data_lv.lv_path, + }) + data_lv.set_tags({ 'ceph.type': 'data', 'ceph.osd_fsid': fsid, diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/main.py b/ceph/src/ceph-volume/ceph_volume/systemd/main.py index 69c0f38a2..bf24f0a01 100644 --- a/ceph/src/ceph-volume/ceph_volume/systemd/main.py +++ b/ceph/src/ceph-volume/ceph_volume/systemd/main.py @@ -57,7 +57,6 @@ def main(args=None): Expected input is similar to:: - ['/path/to/ceph-volume-systemd', '--'] ['/path/to/ceph-volume-systemd', '-'] For example:: diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py index c849fac99..089afa1a8 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_api.py @@ -24,50 +24,53 @@ class TestParseTags(object): class TestGetAPIVgs(object): def test_report_is_emtpy(self, monkeypatch): - monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0)) + monkeypatch.setattr(api.process, 'call', lambda x: ('\n\n', '', 0)) assert api.get_api_vgs() == [] def test_report_has_stuff(self, monkeypatch): - report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}]}]}' + report = [' VolGroup00'] monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0)) assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}] - def test_report_has_multiple_items(self, monkeypatch): - report = '{"report":[{"vg":[{"vg_name":"VolGroup00"},{"vg_name":"ceph_vg"}]}]}' + def test_report_has_stuff_with_empty_attrs(self, monkeypatch): + report = [' VolGroup00 ;;;;;;9g'] monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0)) - assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}, {'vg_name': 'ceph_vg'}] + result = api.get_api_vgs()[0] + assert len(result.keys()) == 7 + assert result['vg_name'] == 'VolGroup00' + assert result['vg_free'] == '9g' - def test_does_not_get_poluted_with_non_vg_items(self, monkeypatch): - report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}],"lv":[{"lv":"1"}]}]}' + def test_report_has_multiple_items(self, monkeypatch): + report = [' VolGroup00;;;;;;;', ' ceph_vg;;;;;;;'] monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0)) - assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}] + result = api.get_api_vgs() + assert result[0]['vg_name'] == 'VolGroup00' + assert result[1]['vg_name'] == 'ceph_vg' class TestGetAPILvs(object): def test_report_is_emtpy(self, monkeypatch): - monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0)) + monkeypatch.setattr(api.process, 'call', lambda x: ('', '', 0)) assert api.get_api_lvs() == [] def test_report_has_stuff(self, monkeypatch): - report = '{"report":[{"lv":[{"lv_name":"VolGroup00"}]}]}' + report = [' ;/path;VolGroup00;root'] monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0)) - assert api.get_api_lvs() == [{'lv_name': 'VolGroup00'}] + result = api.get_api_lvs() + assert result[0]['lv_name'] == 'VolGroup00' def test_report_has_multiple_items(self, monkeypatch): - report = '{"report":[{"lv":[{"lv_name":"VolName"},{"lv_name":"ceph_lv"}]}]}' - monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0)) - assert api.get_api_lvs() == [{'lv_name': 'VolName'}, {'lv_name': 'ceph_lv'}] - - def test_does_not_get_poluted_with_non_lv_items(self, monkeypatch): - report = '{"report":[{"lv":[{"lv_name":"VolName"}],"vg":[{"vg":"1"}]}]}' + report = [' ;/path;VolName;root', ';/dev/path;ceph_lv;ceph_vg'] monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0)) - assert api.get_api_lvs() == [{'lv_name': 'VolName'}] + result = api.get_api_lvs() + assert result[0]['lv_name'] == 'VolName' + assert result[1]['lv_name'] == 'ceph_lv' @pytest.fixture def volumes(monkeypatch): - monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0)) + monkeypatch.setattr(process, 'call', lambda x: ('', '', 0)) volumes = api.Volumes() volumes._purge() return volumes @@ -75,7 +78,7 @@ def volumes(monkeypatch): @pytest.fixture def volume_groups(monkeypatch): - monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0)) + monkeypatch.setattr(process, 'call', lambda x: ('', '', 0)) vgs = api.VolumeGroups() vgs._purge() return vgs diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py index b8402a767..fabae296a 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py +++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py @@ -37,6 +37,21 @@ class TestPrepare(object): assert 'A logical group name or a path' in stdout +class TestGetJournalLV(object): + + @pytest.mark.parametrize('arg', ['', '///', None, '/dev/sda1']) + def test_no_journal_on_invalid_path(self, monkeypatch, arg): + monkeypatch.setattr(lvm.prepare.api, 'get_lv', lambda **kw: False) + prepare = lvm.prepare.Prepare([]) + assert prepare.get_journal_lv(arg) is None + + def test_no_journal_lv_found(self, monkeypatch): + # patch it with 0 so we know we are getting to get_lv + monkeypatch.setattr(lvm.prepare.api, 'get_lv', lambda **kw: 0) + prepare = lvm.prepare.Prepare([]) + assert prepare.get_journal_lv('vg/lv') == 0 + + class TestActivate(object): def test_main_spits_help_with_no_arguments(self, capsys): diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all index 971c63d40..cd16377b1 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/centos7/create/group_vars/all @@ -11,7 +11,9 @@ osd_scenario: lvm copy_admin_key: true # test-volume is created by tests/functional/lvm_setup.yml from /dev/sda lvm_volumes: - test_volume: /dev/sdc + - data: test_volume + journal: /dev/sdc + data_vg: test_group os_tuning_params: - { name: kernel.pid_max, value: 4194303 } - { name: fs.file-max, value: 26234859 } diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all index 971c63d40..cd16377b1 100644 --- a/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all +++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/xenial/create/group_vars/all @@ -11,7 +11,9 @@ osd_scenario: lvm copy_admin_key: true # test-volume is created by tests/functional/lvm_setup.yml from /dev/sda lvm_volumes: - test_volume: /dev/sdc + - data: test_volume + journal: /dev/sdc + data_vg: test_group os_tuning_params: - { name: kernel.pid_max, value: 4194303 } - { name: fs.file-max, value: 26234859 } diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py new file mode 100644 index 000000000..917469128 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py @@ -0,0 +1,24 @@ +import pytest +import argparse +from ceph_volume.util import arg_validators + + +invalid_lv_paths = [ + '', 'lv_name', '///', '/lv_name', 'lv_name/', + '/dev/lv_group/lv_name' +] + + +class TestLVPath(object): + + def setup(self): + self.validator = arg_validators.LVPath() + + @pytest.mark.parametrize('path', invalid_lv_paths) + def test_no_slash_is_an_error(self, path): + with pytest.raises(argparse.ArgumentError): + self.validator(path) + + def test_is_valid(self): + path = 'vg/lv' + assert self.validator(path) == path diff --git a/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py b/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py new file mode 100644 index 000000000..feb470716 --- /dev/null +++ b/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -0,0 +1,29 @@ +import argparse + + +class LVPath(object): + """ + A simple validator to ensure that a logical volume is specified like:: + + / + + Because for LVM it is better to be specific on what group does an lv + belongs to. + """ + + def __call__(self, string): + error = None + try: + vg, lv = string.split('/') + except ValueError: + error = "Logical volume must be specified as 'volume_group/logical_volume' but got: %s" % string + raise argparse.ArgumentError(None, error) + + if not vg: + error = "Didn't specify a volume group like 'volume_group/logical_volume', got: %s" % string + if not lv: + error = "Didn't specify a logical volume like 'volume_group/logical_volume', got: %s" % string + + if error: + raise argparse.ArgumentError(None, error) + return string diff --git a/ceph/src/ceph_mgr.cc b/ceph/src/ceph_mgr.cc index 6b72cf2f9..91043f6e8 100644 --- a/ceph/src/ceph_mgr.cc +++ b/ceph/src/ceph_mgr.cc @@ -20,6 +20,7 @@ #include "common/config.h" #include "common/ceph_argparse.h" #include "common/errno.h" +#include "common/pick_address.h" #include "global/global_init.h" #include "mgr/MgrStandby.h" @@ -52,6 +53,8 @@ int main(int argc, const char **argv) usage(); } + pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); + global_init_daemonize(g_ceph_context); global_init_chdir(g_ceph_context); common_init_finish(g_ceph_context); diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc index e461ab4a5..cc0a93c36 100644 --- a/ceph/src/client/Client.cc +++ b/ceph/src/client/Client.cc @@ -9378,11 +9378,11 @@ int Client::chdir(const char *relpath, std::string &new_cwd, cwd.swap(in); ldout(cct, 3) << "chdir(" << relpath << ") cwd now " << cwd->ino << dendl; - getcwd(new_cwd, perms); + _getcwd(new_cwd, perms); return 0; } -void Client::getcwd(string& dir, const UserPerm& perms) +void Client::_getcwd(string& dir, const UserPerm& perms) { filepath path; ldout(cct, 10) << "getcwd " << *cwd << dendl; @@ -9422,6 +9422,12 @@ void Client::getcwd(string& dir, const UserPerm& perms) dir += path.get_path(); } +void Client::getcwd(string& dir, const UserPerm& perms) +{ + Mutex::Locker l(client_lock); + _getcwd(dir, perms); +} + int Client::statfs(const char *path, struct statvfs *stbuf, const UserPerm& perms) { diff --git a/ceph/src/client/Client.h b/ceph/src/client/Client.h index beefa1eba..8a1852e7a 100644 --- a/ceph/src/client/Client.h +++ b/ceph/src/client/Client.h @@ -956,6 +956,7 @@ public: // crap int chdir(const char *s, std::string &new_cwd, const UserPerm& perms); + void _getcwd(std::string& cwd, const UserPerm& perms); void getcwd(std::string& cwd, const UserPerm& perms); // namespace ops diff --git a/ceph/src/client/fuse_ll.cc b/ceph/src/client/fuse_ll.cc index 1fdd3289a..d10f7b814 100644 --- a/ceph/src/client/fuse_ll.cc +++ b/ceph/src/client/fuse_ll.cc @@ -112,14 +112,15 @@ static int getgroups(fuse_req_t req, gid_t **sgids) return 0; } - *sgids = (gid_t*)malloc(c*sizeof(**sgids)); - if (!*sgids) { + gid_t *gids = new (std::nothrow) gid_t[c]; + if (!gids) { return -ENOMEM; } - c = fuse_req_getgroups(req, c, *sgids); + c = fuse_req_getgroups(req, c, gids); if (c < 0) { - free(*sgids); - return c; + delete gids; + } else { + *sgids = gids; } return c; #endif diff --git a/ceph/src/cls/log/cls_log.cc b/ceph/src/cls/log/cls_log.cc index 411cbc919..c7ed1f5bf 100644 --- a/ceph/src/cls/log/cls_log.cc +++ b/ceph/src/cls/log/cls_log.cc @@ -200,9 +200,7 @@ static int cls_log_list(cls_method_context_t hctx, bufferlist *in, bufferlist *o } } - if (ret.truncated) { - ret.marker = marker; - } + ret.marker = marker; ::encode(ret, *out); diff --git a/ceph/src/common/AsyncReserver.h b/ceph/src/common/AsyncReserver.h index ae22b535d..28512ac80 100644 --- a/ceph/src/common/AsyncReserver.h +++ b/ceph/src/common/AsyncReserver.h @@ -144,6 +144,16 @@ public: } do_queues(); } + + /** + * Has reservations + * + * Return true if there are reservations in progress + */ + bool has_reservation() { + Mutex::Locker l(lock); + return !in_progress.empty(); + } static const unsigned MAX_PRIORITY = (unsigned)-1; }; diff --git a/ceph/src/common/LogClient.cc b/ceph/src/common/LogClient.cc index 07c53e80d..aeb2f5bfc 100644 --- a/ceph/src/common/LogClient.cc +++ b/ceph/src/common/LogClient.cc @@ -224,11 +224,17 @@ void LogChannel::do_log(clog_type prio, const std::string& s) // seq and who should be set for syslog/graylog/log_to_mon e.who = parent->get_myinst(); e.name = parent->get_myname(); - e.seq = parent->get_next_seq(); e.prio = prio; e.msg = s; e.channel = get_log_channel(); + // log to monitor? + if (log_to_monitors) { + e.seq = parent->queue(e); + } else { + e.seq = parent->get_next_seq(); + } + // log to syslog? if (do_log_to_syslog()) { ldout(cct,0) << __func__ << " log to syslog" << dendl; @@ -240,11 +246,6 @@ void LogChannel::do_log(clog_type prio, const std::string& s) ldout(cct,0) << __func__ << " log to graylog" << dendl; graylog->log_log_entry(&e); } - - // log to monitor? - if (log_to_monitors) { - parent->queue(e); - } } Message *LogClient::get_mon_log_message(bool flush) @@ -268,8 +269,8 @@ bool LogClient::are_pending() Message *LogClient::_get_mon_log_message() { assert(log_lock.is_locked()); - if (log_queue.empty()) - return NULL; + if (log_queue.empty()) + return NULL; // only send entries that haven't been sent yet during this mon // session! monclient needs to call reset_session() on mon session @@ -324,6 +325,7 @@ void LogClient::_send_to_mon() version_t LogClient::queue(LogEntry &entry) { Mutex::Locker l(log_lock); + entry.seq = ++last_log; log_queue.push_back(entry); if (is_mon) { @@ -335,6 +337,7 @@ version_t LogClient::queue(LogEntry &entry) uint64_t LogClient::get_next_seq() { + Mutex::Locker l(log_lock); return ++last_log; } diff --git a/ceph/src/common/LogClient.h b/ceph/src/common/LogClient.h index 56bb91ba9..be70e4512 100644 --- a/ceph/src/common/LogClient.h +++ b/ceph/src/common/LogClient.h @@ -245,7 +245,7 @@ private: bool is_mon; Mutex log_lock; version_t last_log_sent; - std::atomic last_log; + version_t last_log; std::deque log_queue; std::map channels; diff --git a/ceph/src/common/ipaddr.cc b/ceph/src/common/ipaddr.cc index 41add2de2..27b9cdff2 100644 --- a/ceph/src/common/ipaddr.cc +++ b/ceph/src/common/ipaddr.cc @@ -27,7 +27,7 @@ static void netmask_ipv4(const struct in_addr *addr, } -const struct sockaddr *find_ipv4_in_subnet(const struct ifaddrs *addrs, +const struct ifaddrs *find_ipv4_in_subnet(const struct ifaddrs *addrs, const struct sockaddr_in *net, unsigned int prefix_len) { struct in_addr want, temp; @@ -49,7 +49,7 @@ const struct sockaddr *find_ipv4_in_subnet(const struct ifaddrs *addrs, netmask_ipv4(cur, prefix_len, &temp); if (temp.s_addr == want.s_addr) { - return addrs->ifa_addr; + return addrs; } } @@ -71,7 +71,7 @@ static void netmask_ipv6(const struct in6_addr *addr, } -const struct sockaddr *find_ipv6_in_subnet(const struct ifaddrs *addrs, +const struct ifaddrs *find_ipv6_in_subnet(const struct ifaddrs *addrs, const struct sockaddr_in6 *net, unsigned int prefix_len) { struct in6_addr want, temp; @@ -93,14 +93,14 @@ const struct sockaddr *find_ipv6_in_subnet(const struct ifaddrs *addrs, netmask_ipv6(cur, prefix_len, &temp); if (IN6_ARE_ADDR_EQUAL(&temp, &want)) - return addrs->ifa_addr; + return addrs; } return NULL; } -const struct sockaddr *find_ip_in_subnet(const struct ifaddrs *addrs, +const struct ifaddrs *find_ip_in_subnet(const struct ifaddrs *addrs, const struct sockaddr *net, unsigned int prefix_len) { switch (net->sa_family) { diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc index 2515d6aaa..6166ef3f8 100644 --- a/ceph/src/common/options.cc +++ b/ceph/src/common/options.cc @@ -2510,7 +2510,7 @@ std::vector