cmake_minimum_required(VERSION 2.8.11)
project(ceph)
-set(VERSION 12.1.4)
+set(VERSION 12.2.0)
if(POLICY CMP0046)
# Tweak policies (this one disables "missing" dependency warning)
option(WITH_LZ4 "LZ4 compression support" OFF)
if(WITH_LZ4)
- find_package(LZ4 REQUIRED)
+ find_package(LZ4 1.7 REQUIRED)
set(HAVE_LZ4 ${LZ4_FOUND})
endif(WITH_LZ4)
# Contributor: John Coyle <dx9err@gmail.com>
# Maintainer: John Coyle <dx9err@gmail.com>
pkgname=ceph
-pkgver=12.1.4
+pkgver=12.2.0
pkgrel=0
pkgdesc="Ceph is a distributed object store and file system"
pkgusers="ceph"
xmlstarlet
yasm
"
-source="ceph-12.1.4.tar.bz2"
+source="ceph-12.2.0.tar.bz2"
subpackages="
$pkgname-base
$pkgname-common
_udevrulesdir=/etc/udev/rules.d
_python_sitelib=/usr/lib/python2.7/site-packages
-builddir=$srcdir/ceph-12.1.4
+builddir=$srcdir/ceph-12.2.0
build() {
export CEPH_BUILD_VIRTUALENV=$builddir
# main package definition
#################################################################################
Name: ceph
-Version: 12.1.4
+Version: 12.2.0
Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
Group: System/Filesystems
%endif
URL: http://ceph.com/
-Source0: http://ceph.com/download/ceph-12.1.4.tar.bz2
+Source0: http://ceph.com/download/ceph-12.2.0.tar.bz2
%if 0%{?suse_version}
%if 0%{?is_opensuse}
ExclusiveArch: x86_64 aarch64 ppc64 ppc64le
# common
#################################################################################
%prep
-%autosetup -p1 -n ceph-12.1.4
+%autosetup -p1 -n ceph-12.2.0
%build
%if 0%{with cephfs_java}
%{_mandir}/man8/ceph-detect-init.8*
%{_mandir}/man8/ceph-create-keys.8*
%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-volume.8*
+%{_mandir}/man8/ceph-volume-systemd.8*
%{_mandir}/man8/ceph-run.8*
%{_mandir}/man8/crushtool.8*
%{_mandir}/man8/osdmaptool.8*
%{_mandir}/man8/ceph-detect-init.8*
%{_mandir}/man8/ceph-create-keys.8*
%{_mandir}/man8/ceph-disk.8*
+%{_mandir}/man8/ceph-volume.8*
+%{_mandir}/man8/ceph-volume-systemd.8*
%{_mandir}/man8/ceph-run.8*
%{_mandir}/man8/crushtool.8*
%{_mandir}/man8/osdmaptool.8*
# LZ4_FOUND
# LZ4_INCLUDE_DIR
# LZ4_LIBRARY
+# LZ4_VERSION_STRING
+# LZ4_VERSION_MAJOR
+# LZ4_VERSION_MINOR
+# LZ4_VERSION_RELEASE
find_path(LZ4_INCLUDE_DIR NAMES lz4.h)
+
+if(LZ4_INCLUDE_DIR AND EXISTS "${LZ4_INCLUDE_DIR}/lz4.h")
+ foreach(ver "MAJOR" "MINOR" "RELEASE")
+ file(STRINGS "${LZ4_INCLUDE_DIR}/lz4.h" LZ4_VER_${ver}_LINE
+ REGEX "^#define[ \t]+LZ4_VERSION_${ver}[ \t]+[0-9]+[ \t]+.*$")
+ string(REGEX REPLACE "^#define[ \t]+LZ4_VERSION_${ver}[ \t]+([0-9]+)[ \t]+.*$"
+ "\\1" LZ4_VERSION_${ver} "${LZ4_VER_${ver}_LINE}")
+ unset(${LZ4_VER_${ver}_LINE})
+ endforeach()
+ set(LZ4_VERSION_STRING
+ "${LZ4_VERSION_MAJOR}.${LZ4_VERSION_MINOR}.${LZ4_VERSION_RELEASE}")
+endif()
+
find_library(LZ4_LIBRARY NAMES lz4)
include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(LZ4 DEFAULT_MSG LZ4_LIBRARY LZ4_INCLUDE_DIR)
+find_package_handle_standard_args(LZ4
+ REQUIRED_VARS LZ4_LIBRARY LZ4_INCLUDE_DIR
+ VERSION_VAR LZ4_VERSION_STRING)
mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY)
usr/lib/python*/dist-packages/ceph_disk*
usr/sbin/ceph-disk
usr/sbin/ceph-volume
+usr/sbin/ceph-volume-systemd
usr/lib/python*/dist-packages/ceph_volume/*
usr/lib/python*/dist-packages/ceph_volume-*
usr/share/man/man8/ceph-clsinfo.8
usr/share/man/man8/ceph-disk.8
+usr/share/man/man8/ceph-volume.8
+usr/share/man/man8/ceph-volume-systemd.8
usr/share/man/man8/ceph-osd.8
+ceph (12.2.0-1) stable; urgency=medium
+
+ * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.com> Mon, 28 Aug 2017 16:30:16 +0000
+
ceph (12.1.4-1) stable; urgency=medium
* New upstream release
--- /dev/null
+.. _ceph-volume:
+
+ceph-volume
+===========
+Deploy OSDs with different device technologies like lvm or physical disks using
+pluggable tools (:doc:`lvm/index` itself is treated like a plugin). It tries to
+follow the workflow of ``ceph-disk`` for deploying OSDs, with a predictable,
+and robust way of preparing, activating, and starting OSDs.
+
+:ref:`Overview <ceph-volume-overview>` |
+:ref:`Plugin Guide <ceph-volume-plugins>` |
+
+
+**Command Line Subcommands**
+Although currently there is support for ``lvm``, the plan is to support other
+technologies, including plain disks.
+
+* :ref:`ceph-volume-lvm`
+
+.. toctree::
+ :hidden:
+ :maxdepth: 3
+ :caption: Contents:
+
+ intro
+ lvm/index
+ lvm/activate
+ lvm/prepare
+ lvm/scan
+ lvm/systemd
--- /dev/null
+.. _ceph-volume-overview:
+
+Overview
+--------
+The ``ceph-volume`` tool aims to be a single purpose command line tool to deploy
+logical volumes as OSDs, trying to maintain a similar API to ``ceph-disk`` when
+preparing, activating, and creating OSDs.
+
+It deviates from ``ceph-disk`` by not interacting or relying on the udev rules
+that come installed for Ceph. These rules allow automatic detection of
+previously setup devices that are in turn fed into ``ceph-disk`` to activate
+them.
+
+
+``ceph-volume lvm``
+-------------------
+By making use of :term:`LVM tags`, the :ref:`ceph-volume-lvm` sub-command is
+able to store and later re-discover and query devices associated with OSDs so
+that they can later activated.
--- /dev/null
+.. _ceph-volume-lvm-activate:
+
+``activate``
+============
+Once :ref:`ceph-volume-lvm-prepare` is completed, and all the various steps
+that entails are done, the volume is ready to get "activated".
+
+This activation process enables a systemd unit that persists the OSD ID and its
+UUID (also called ``fsid`` in Ceph CLI tools), so that at boot time it can
+understand what OSD is enabled and needs to be mounted.
+
+.. note:: The execution of this call is fully idempotent, and there is no
+ side-effects when running multiple times
+
+New OSDs
+--------
+To activate newly prepared OSDs both the :term:`OSD id` and :term:`OSD uuid`
+need to be supplied. For example::
+
+ ceph-volume activate --filestore 0 0263644D-0BF1-4D6D-BC34-28BD98AE3BC8
+
+.. note:: The UUID is stored in the ``osd_fsid`` file in the OSD path, which is
+ generated when :ref:`ceph-volume-lvm-prepare` is used.
+
+requiring uuids
+^^^^^^^^^^^^^^^
+The :term:`OSD uuid` is being required as an extra step to ensure that the
+right OSD is being activated. It is entirely possible that a previous OSD with
+the same id exists and would end up activating the incorrect one.
+
+
+Discovery
+---------
+With either existing OSDs or new ones being activated, a *discovery* process is
+performed using :term:`LVM tags` to enable the systemd units.
+
+The systemd unit will capture the :term:`OSD id` and :term:`OSD uuid` and
+persist it. Internally, the activation will enable it like::
+
+ systemctl enable ceph-volume@$id-$uuid-lvm
+
+For example::
+
+ systemctl enable ceph-volume@0-8715BEB4-15C5-49DE-BA6F-401086EC7B41-lvm
+
+Would start the discovery process for the OSD with an id of ``0`` and a UUID of
+``8715BEB4-15C5-49DE-BA6F-401086EC7B41``.
+
+.. note:: for more details on the systemd workflow see :ref:`ceph-volume-systemd`
+
+The systemd unit will look for the matching OSD device, and by looking at its
+:term:`LVM tags` will proceed to:
+
+# mount the device in the corresponding location (by convention this is
+ ``/var/lib/ceph/osd/<cluster name>-<osd id>/``)
+
+# ensure that all required devices are ready for that OSD
+
+# start the ``ceph-osd@0`` systemd unit
+
+Existing OSDs
+-------------
+For exsiting OSDs that have been deployed with different tooling, the only way
+to port them over to the new mechanism is to prepare them again (losing data).
+See :ref:`ceph-volume-lvm-existing-osds` for details on how to proceed.
+
+Summary
+-------
+To recap the ``activate`` process:
+
+#. require both :term:`OSD id` and :term:`OSD uuid`
+#. enable the system unit with matching id and uuid
+#. the systemd unit will ensure all devices are ready and mounted (if needed)
+#. the matching ``ceph-osd`` systemd unit will get started
--- /dev/null
+.. _ceph-volume-lvm:
+
+``lvm``
+=======
+Implements the functionality needed to deploy OSDs from the ``lvm`` subcommand:
+``ceph-volume lvm``
+
+**Command Line Subcommands**
+
+* :ref:`ceph-volume-lvm-prepare`
+
+* :ref:`ceph-volume-lvm-activate`
+
+.. not yet implemented
+.. * :ref:`ceph-volume-lvm-scan`
+
+**Internal functionality**
+
+There are other aspects of the ``lvm`` subcommand that are internal and not
+exposed to the user, these sections explain how these pieces work together,
+clarifying the workflows of the tool.
+
+:ref:`Systemd Units <ceph-volume-systemd>` |
+:ref:`lvm <ceph-volume-lvm-api>`
--- /dev/null
+.. _ceph-volume-lvm-prepare:
+
+``prepare``
+===========
+This subcommand allows a :term:`filestore` setup (:term:`bluestore` support is
+planned) and currently consumes only logical volumes for both the data and
+journal. It will not create or modify the logical volumes except for adding
+extra metadata.
+
+.. note:: This is part of a two step process to deploy an OSD. If looking for
+ a single-call way, please see :ref:`ceph-volume-lvm-create`
+
+To help identify volumes, the process of preparing a volume (or volumes) to
+work with Ceph, the tool will assign a few pieces of metadata information using
+:term:`LVM tags`.
+
+:term:`LVM tags` makes volumes easy to discover later, and help identify them as
+part of a Ceph system, and what role they have (journal, filestore, bluestore,
+etc...)
+
+Although initially :term:`filestore` is supported (and supported by default)
+the back end can be specified with:
+
+
+* :ref:`--filestore <ceph-volume-lvm-prepare_filestore>`
+* ``--bluestore``
+
+.. when available, this will need to be updated to:
+.. * :ref:`--bluestore <ceph-volume-prepare_bluestore>`
+
+.. _ceph-volume-lvm-prepare_filestore:
+
+``filestore``
+-------------
+This is the default OSD backend and allows preparation of logical volumes for
+a :term:`filestore` OSD.
+
+The process is *very* strict, it requires two logical volumes that are ready to
+be used. No special preparation is needed for these volumes other than
+following the minimum size requirements for data and journal.
+
+The API call looks like::
+
+ ceph-volume prepare --filestore --data data --journal journal
+
+The journal *must* be a logical volume, just like the data volume, and that
+argument is always required even if both live under the same group.
+
+A generated uuid is used to ask the cluster for a new OSD. These two pieces are
+crucial for identifying an OSD and will later be used throughout the
+:ref:`ceph-volume-lvm-activate` process.
+
+The OSD data directory is created using the following convention::
+
+ /var/lib/ceph/osd/<cluster name>-<osd id>
+
+At this point the data volume is mounted at this location, and the journal
+volume is linked::
+
+ ln -s /path/to/journal /var/lib/ceph/osd/<cluster_name>-<osd-id>/journal
+
+The monmap is fetched using the bootstrap key from the OSD::
+
+ /usr/bin/ceph --cluster ceph --name client.bootstrap-osd
+ --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring
+ mon getmap -o /var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap
+
+``ceph-osd`` will be called to populate the OSD directory, that is already
+mounted, re-using all the pieces of information from the initial steps::
+
+ ceph-osd --cluster ceph --mkfs --mkkey -i <osd id> \
+ --monmap /var/lib/ceph/osd/<cluster name>-<osd id>/activate.monmap --osd-data \
+ /var/lib/ceph/osd/<cluster name>-<osd id> --osd-journal /var/lib/ceph/osd/<cluster name>-<osd id>/journal \
+ --osd-uuid <osd uuid> --keyring /var/lib/ceph/osd/<cluster name>-<osd id>/keyring \
+ --setuser ceph --setgroup ceph
+
+.. _ceph-volume-lvm-existing-osds:
+
+Existing OSDs
+-------------
+For existing clusters that want to use this new system and have OSDs that are
+already running there are a few things to take into account:
+
+.. warning:: this process will forcefully format the data device, destroying
+ existing data, if any.
+
+* OSD paths should follow this convention::
+
+ /var/lib/ceph/osd/<cluster name>-<osd id>
+
+* Preferably, no other mechanisms to mount the volume should exist, and should
+ be removed (like fstab mount points)
+* There is currently no support for encrypted volumes
+
+The one time process for an existing OSD, with an ID of 0 and
+using a ``"ceph"`` cluster name would look like::
+
+ ceph-volume lvm prepare --filestore --osd-id 0 --osd-fsid E3D291C1-E7BF-4984-9794-B60D9FA139CB
+
+The command line tool will not contact the monitor to generate an OSD ID and
+will format the LVM device in addition to storing the metadata on it so that it
+can later be startednot contact the monitor to generate an OSD ID and will
+format the LVM device in addition to storing the metadata on it so that it can
+later be started (for detailed metadata description see :ref:`ceph-volume-lvm-tags`).
+
+
+.. _ceph-volume-lvm-prepare_bluestore:
+
+``bluestore``
+-------------
+This subcommand is planned but not currently implemented.
+
+
+Storing metadata
+----------------
+The following tags will get applied as part of the prepartion process
+regardless of the type of volume (journal or data) and also regardless of the
+OSD backend:
+
+* ``cluster_fsid``
+* ``data_device``
+* ``journal_device``
+* ``encrypted``
+* ``osd_fsid``
+* ``osd_id``
+* ``block``
+* ``db``
+* ``wal``
+* ``lockbox_device``
+
+.. note:: For the complete lvm tag conventions see :ref:`ceph-volume-lvm-tag-api`
+
+
+Summary
+-------
+To recap the ``prepare`` process:
+
+#. Accept only logical volumes for data and journal (both required)
+#. Generate a UUID for the OSD
+#. Ask the monitor get an OSD ID reusing the generated UUID
+#. OSD data directory is created and data volume mounted
+#. Journal is symlinked from data volume to journal location
+#. monmap is fetched for activation
+#. devices is mounted and data directory is populated by ``ceph-osd``
+#. data and journal volumes are assigned all the Ceph metadata using lvm tags
--- /dev/null
+scan
+====
+This sub-command will allow to discover Ceph volumes previously setup by the
+tool by looking into the system's logical volumes and their tags.
+
+As part of the the :ref:`ceph-volume-lvm-prepare` process, the logical volumes are assigned
+a few tags with important pieces of information.
+
+.. note:: This sub-command is not yet implemented
--- /dev/null
+.. _ceph-volume-systemd:
+
+systemd
+=======
+As part of the :ref:`ceph-volume-lvm-activate` process, a few systemd units will get enabled
+that will use the OSD id and uuid as part of their name. These units will be
+run when the system boots, and will proceed to activate their corresponding
+volumes.
+
+The API for activation requires both the :term:`OSD id` and :term:`OSD uuid`,
+which get persisted by systemd. Internally, the activation process enables the
+systemd unit using the following convention::
+
+ ceph-volume@<type>-<extra metadata>
+
+Where ``type`` is the sub-command used to parse the extra metadata, and ``extra
+metadata`` is any additional information needed by the sub-command to be able
+to activate the OSD. For example an OSD with an ID of 0, for the ``lvm``
+sub-command would look like::
+
+ systemctl enable ceph-volume@lvm-0-0A3E1ED2-DA8A-4F0E-AA95-61DEC71768D6
+
+
+Process
+-------
+The systemd unit is a :term:`systemd oneshot` service, meant to start at boot after the
+local filesystem is ready to be used.
+
+Upon startup, it will identify the logical volume using :term:`LVM tags`,
+finding a matching ID and later ensuring it is the right one with
+the :term:`OSD uuid`.
+
+After identifying the correct volume it will then proceed to mount it by using
+the OSD destination conventions, that is::
+
+ /var/lib/ceph/osd/<cluster name>-<osd id>
+
+For our example OSD with an id of ``0``, that means the identified device will
+be mounted at::
+
+
+ /var/lib/ceph/osd/ceph-0
+
+Once that process is complete, a call will be made to start the OSD::
+
+ systemctl start ceph-osd@0
'install/*',
'mon/*',
'rados/*',
+ 'mgr/*',
+ 'ceph-volume/*',
'radosgw/*',
'rbd/*',
'start/*']
--- /dev/null
+===================================
+ceph-volume developer documentation
+===================================
+
+.. rubric:: Contents
+
+.. toctree::
+ :maxdepth: 1
+
+
+ plugins
+ lvm
+ systemd
--- /dev/null
+
+.. _ceph-volume-lvm-api:
+
+LVM
+===
+The backend of ``ceph-volume lvm`` is LVM, it relies heavily on the usage of
+tags, which is a way for LVM to allow extending its volume metadata. These
+values can later be queried against devices and it is how they get discovered
+later.
+
+.. warning:: These APIs are not meant to be public, but are documented so that
+ it is clear what the tool is doing behind the scenes. Do not alter
+ any of these values.
+
+
+.. _ceph-volume-lvm-tag-api:
+
+Tag API
+-------
+The process of identifying logical volumes as part of Ceph relies on applying
+tags on all volumes. It follows a naming convention for the namespace that
+looks like::
+
+ ceph.<tag name>=<tag value>
+
+All tags are prefixed by the ``ceph`` keyword do claim ownership of that
+namespace and make it easily identifiable. This is how the OSD ID would be used
+in the context of lvm tags::
+
+ ceph.osd_id=0
+
+
+.. _ceph-volume-lvm-tags:
+
+Metadata
+--------
+The following describes all the metadata from Ceph OSDs that is stored on an
+LVM volume:
+
+
+``type``
+--------
+Describes if the device is a an OSD or Journal, with the ability to expand to
+other types when supported (for example a lockbox)
+
+Example::
+
+ ceph.type=osd
+
+
+``cluster_fsid``
+----------------
+Example::
+
+ ceph.cluster_fsid=7146B649-AE00-4157-9F5D-1DBFF1D52C26
+
+``data_device``
+---------------
+Example::
+
+ ceph.data_device=/dev/ceph/data-0
+
+``journal_device``
+------------------
+Example::
+
+ ceph.journal_device=/dev/ceph/journal-0
+
+``encrypted``
+-------------
+Example for enabled encryption with ``luks``::
+
+ ceph.encrypted=luks
+
+For plain dmcrypt::
+
+ ceph.encrypted=dmcrypt
+
+For disabled encryption::
+
+ ceph.encrypted=0
+
+``osd_fsid``
+------------
+Example::
+
+ ceph.osd_fsid=88ab9018-f84b-4d62-90b4-ce7c076728ff
+
+``osd_id``
+----------
+Example::
+
+ ceph.osd_id=1
+
+``block``
+---------
+Just used on :term:`bluestore` backends.
+
+Example::
+
+ ceph.block=/dev/mapper/vg-block-0
+
+``db``
+------
+Just used on :term:`bluestore` backends.
+
+Example::
+
+ ceph.db=/dev/mapper/vg-db-0
+
+``wal``
+-------
+Just used on :term:`bluestore` backends.
+
+Example::
+
+ ceph.wal=/dev/mapper/vg-wal-0
+
+
+``lockbox_device``
+------------------
+Only used when encryption is enabled, to store keys in an unencrypted
+volume.
+
+Example::
+
+ ceph.lockbox_device=/dev/mapper/vg-lockbox-0
--- /dev/null
+.. _ceph-volume-plugins:
+
+Plugins
+=======
+``ceph-volume`` started initially to provide support for using ``lvm`` as
+the underlying system for an OSD. It is included as part of the tool but it is
+treated like a plugin.
+
+This modularity, allows for other device or device-like technologies to be able
+to consume and re-use the utilities and workflows provided.
+
+Adding Plugins
+--------------
+As a Python tool, plugins ``setuptools`` entry points. For a new plugin to be
+available, it should have an entry similar to this in its ``setup.py`` file:
+
+.. code-block:: python
+
+ setup(
+ ...
+ entry_points = dict(
+ ceph_volume_handlers = [
+ 'my_command = my_package.my_module:MyClass',
+ ],
+ ),
+
+The ``MyClass`` should be a class that accepts ``sys.argv`` as its argument,
+``ceph-volume`` will pass that in at instantiation and call them ``main``
+method.
+
+This is how a plugin for ``ZFS`` could look like for example:
+
+.. code-block:: python
+
+ class ZFS(object):
+
+ help_menu = 'Deploy OSDs with ZFS'
+ _help = """
+ Use ZFS as the underlying technology for OSDs
+
+ --verbose Increase the verbosity level
+ """
+
+ def __init__(self, argv):
+ self.argv = argv
+
+ def main(self):
+ parser = argparse.ArgumentParser()
+ args = parser.parse_args(self.argv)
+ ...
+
+And its entry point (via ``setuptools``) in ``setup.py`` would looke like:
+
+.. code-block:: python
+
+ entry_points = {
+ 'ceph_volume_handlers': [
+ 'zfs = ceph_volume_zfs.zfs:ZFS',
+ ],
+ },
+
+After installation, the ``zfs`` subcommand would be listed and could be used
+as::
+
+ ceph-volume zfs
--- /dev/null
+.. _ceph-volume-systemd-api:
+
+systemd
+=======
+The workflow to *"activate"* an OSD is by relying on systemd unit files and its
+ability to persist information as a suffix to the instance name.
+
+``ceph-volume`` exposes the following convention for unit files::
+
+ ceph-volume@<sub command>-<extra metadata>
+
+For example, this is how enabling an OSD could look like for the
+:ref:`ceph-volume-lvm` sub command::
+
+ systemctl enable ceph-volume@lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
+
+
+These 3 pieces of persisted information are needed by the sub-command so that
+it understands what OSD it needs to activate.
+
+Since ``lvm`` is not the only subcommand that will be supported, this
+is how it will allow other device types to be defined.
+
+At some point for example, for plain disks, it could be::
+
+ systemctl enable ceph-volume@disk-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
+
+At startup, the systemd unit will execute a helper script that will parse the
+suffix and will end up calling ``ceph-volume`` back. Using the previous
+example for lvm, that call will look like::
+
+ ceph-volume lvm activate 0 8715BEB4-15C5-49DE-BA6F-401086EC7B41
+
+
+.. warning:: These workflows are not meant to be public, but are documented so that
+ it is clear what the tool is doing behind the scenes. Do not alter
+ any of these values.
osd_internals/index*
mds_internals/index*
radosgw/index*
+ ceph-volume/index*
Ceph is growing rapidly. As firms deploy Ceph, the technical terms such as
"RADOS", "RBD," "RGW" and so forth require corresponding marketing terms
-that explain what each component does. The terms in this glossary are
+that explain what each component does. The terms in this glossary are
intended to complement the existing technical terminology.
Sometimes more than one term applies to a definition. Generally, the first
reflect either technical terms or legacy ways of referring to Ceph systems.
-.. glossary::
+.. glossary::
Ceph Project
- The aggregate term for the people, software, mission and infrastructure
+ The aggregate term for the people, software, mission and infrastructure
of Ceph.
-
+
cephx
The Ceph authentication protocol. Cephx operates like Kerberos, but it
has no single point of failure.
Ceph
Ceph Platform
- All Ceph software, which includes any piece of code hosted at
+ All Ceph software, which includes any piece of code hosted at
`http://github.com/ceph`_.
-
+
Ceph System
Ceph Stack
A collection of two or more components of Ceph.
Node
Host
Any single machine or server in a Ceph System.
-
+
Ceph Storage Cluster
Ceph Object Store
RADOS
Ceph Cluster Map
cluster map
- The set of maps comprising the monitor map, OSD map, PG map, MDS map and
+ The set of maps comprising the monitor map, OSD map, PG map, MDS map and
CRUSH map. See `Cluster Map`_ for details.
Ceph Object Storage
RADOS Gateway
RGW
The S3/Swift gateway component of Ceph.
-
+
Ceph Block Device
RBD
The block storage component of Ceph.
-
+
Ceph Block Storage
- The block storage "product," service or capabilities when used in
+ The block storage "product," service or capabilities when used in
conjunction with ``librbd``, a hypervisor such as QEMU or Xen, and a
hypervisor abstraction layer such as ``libvirt``.
Cloud Platforms
Cloud Stacks
- Third party cloud provisioning platforms such as OpenStack, CloudStack,
+ Third party cloud provisioning platforms such as OpenStack, CloudStack,
OpenNebula, ProxMox, etc.
Object Storage Device
Sometimes, Ceph users use the
term "OSD" to refer to :term:`Ceph OSD Daemon`, though the
proper term is "Ceph OSD".
-
+
Ceph OSD Daemon
Ceph OSD Daemons
Ceph OSD
disk (:term:`OSD`). Sometimes, Ceph users use the
term "OSD" to refer to "Ceph OSD Daemon", though the
proper term is "Ceph OSD".
-
+
+ OSD id
+ The integer that defines an OSD. It is generated by the monitors as part
+ of the creation of a new OSD.
+
+ OSD fsid
+ This is a unique identifier used to further improve the uniqueness of an
+ OSD and it is found in the OSD path in a file called ``osd_fsid``. This
+ ``fsid`` term is used interchangeably with ``uuid``
+
+ OSD uuid
+ Just like the OSD fsid, this is the OSD unique identifer and is used
+ interchangeably with ``fsid``
+
+ bluestore
+ OSD BlueStore is a new back end for OSD daemons (kraken and newer
+ versions). Unlike :term:`filestore` it stores objects directly on the
+ Ceph block devices without any file system interface.
+
+ filestore
+ A back end for OSD daemons, where a Journal is needed and files are
+ written to the filesystem.
+
Ceph Monitor
MON
The Ceph monitor software.
Ceph Clients
Ceph Client
- The collection of Ceph components which can access a Ceph Storage
- Cluster. These include the Ceph Object Gateway, the Ceph Block Device,
- the Ceph Filesystem, and their corresponding libraries, kernel modules,
+ The collection of Ceph components which can access a Ceph Storage
+ Cluster. These include the Ceph Object Gateway, the Ceph Block Device,
+ the Ceph Filesystem, and their corresponding libraries, kernel modules,
and FUSEs.
Ceph Kernel Modules
- The collection of kernel modules which can be used to interact with the
+ The collection of kernel modules which can be used to interact with the
Ceph System (e.g,. ``ceph.ko``, ``rbd.ko``).
Ceph Client Libraries
- The collection of libraries that can be used to interact with components
+ The collection of libraries that can be used to interact with components
of the Ceph System.
Ceph Release
Any distinct numbered version of Ceph.
-
+
Ceph Point Release
Any ad-hoc release that includes only bug or security fixes.
testing, but may contain new features.
Ceph Release Candidate
- A major version of Ceph that has undergone initial quality assurance
+ A major version of Ceph that has undergone initial quality assurance
testing and is ready for beta testers.
Ceph Stable Release
- A major version of Ceph where all features from the preceding interim
+ A major version of Ceph where all features from the preceding interim
releases have been put through quality assurance testing successfully.
Ceph Test Framework
CRUSH
Controlled Replication Under Scalable Hashing. It is the algorithm
Ceph uses to compute object storage locations.
-
+
ruleset
A set of CRUSH data placement rules that applies to a particular pool(s).
Pools
Pools are logical partitions for storing objects.
+ systemd oneshot
+ A systemd ``type`` where a command is defined in ``ExecStart`` which will
+ exit upon completion (it is not intended to daemonize)
+
+ LVM tags
+ Extensible metadata for LVM volumes and groups. It is used to store
+ Ceph-specific information about devices and its relationship with
+ OSDs.
+
.. _http://github.com/ceph: http://github.com/ceph
.. _Cluster Map: ../architecture#cluster-map
api/index
architecture
Development <dev/index>
+ ceph-volume/index
release-notes
releases
Glossary <glossary>
production.
Following the same configuration as `Installation (Quick)`_, we will set up a
-cluster with ``node1`` as the monitor node, and ``node2`` and ``node3`` for
+cluster with ``node1`` as the monitor node, and ``node2`` and ``node3`` for
OSD nodes.
-.. ditaa::
+.. ditaa::
/------------------\ /----------------\
| Admin Node | | node1 |
| +-------->+ |
Bootstrapping a monitor (a Ceph Storage Cluster, in theory) requires
a number of things:
-- **Unique Identifier:** The ``fsid`` is a unique identifier for the cluster,
- and stands for File System ID from the days when the Ceph Storage Cluster was
- principally for the Ceph Filesystem. Ceph now supports native interfaces,
- block devices, and object storage gateway interfaces too, so ``fsid`` is a
+- **Unique Identifier:** The ``fsid`` is a unique identifier for the cluster,
+ and stands for File System ID from the days when the Ceph Storage Cluster was
+ principally for the Ceph Filesystem. Ceph now supports native interfaces,
+ block devices, and object storage gateway interfaces too, so ``fsid`` is a
bit of a misnomer.
- **Cluster Name:** Ceph clusters have a cluster name, which is a simple string
without spaces. The default cluster name is ``ceph``, but you may specify
- a different cluster name. Overriding the default cluster name is
- especially useful when you are working with multiple clusters and you need to
- clearly understand which cluster your are working with.
-
- For example, when you run multiple clusters in a `federated architecture`_,
+ a different cluster name. Overriding the default cluster name is
+ especially useful when you are working with multiple clusters and you need to
+ clearly understand which cluster your are working with.
+
+ For example, when you run multiple clusters in a `federated architecture`_,
the cluster name (e.g., ``us-west``, ``us-east``) identifies the cluster for
- the current CLI session. **Note:** To identify the cluster name on the
- command line interface, specify the Ceph configuration file with the
+ the current CLI session. **Note:** To identify the cluster name on the
+ command line interface, specify the Ceph configuration file with the
cluster name (e.g., ``ceph.conf``, ``us-west.conf``, ``us-east.conf``, etc.).
Also see CLI usage (``ceph --cluster {cluster-name}``).
-
-- **Monitor Name:** Each monitor instance within a cluster has a unique name.
+
+- **Monitor Name:** Each monitor instance within a cluster has a unique name.
In common practice, the Ceph Monitor name is the host name (we recommend one
- Ceph Monitor per host, and no commingling of Ceph OSD Daemons with
+ Ceph Monitor per host, and no commingling of Ceph OSD Daemons with
Ceph Monitors). You may retrieve the short hostname with ``hostname -s``.
-- **Monitor Map:** Bootstrapping the initial monitor(s) requires you to
- generate a monitor map. The monitor map requires the ``fsid``, the cluster
+- **Monitor Map:** Bootstrapping the initial monitor(s) requires you to
+ generate a monitor map. The monitor map requires the ``fsid``, the cluster
name (or uses the default), and at least one host name and its IP address.
-- **Monitor Keyring**: Monitors communicate with each other via a
- secret key. You must generate a keyring with a monitor secret and provide
+- **Monitor Keyring**: Monitors communicate with each other via a
+ secret key. You must generate a keyring with a monitor secret and provide
it when bootstrapping the initial monitor(s).
-
+
- **Administrator Keyring**: To use the ``ceph`` CLI tools, you must have
a ``client.admin`` user. So you must generate the admin user and keyring,
and you must also add the ``client.admin`` user to the monitor keyring.
-The foregoing requirements do not imply the creation of a Ceph Configuration
-file. However, as a best practice, we recommend creating a Ceph configuration
+The foregoing requirements do not imply the creation of a Ceph Configuration
+file. However, as a best practice, we recommend creating a Ceph configuration
file and populating it with the ``fsid``, the ``mon initial members`` and the
``mon host`` settings.
You can get and set all of the monitor settings at runtime as well. However,
-a Ceph Configuration file may contain only those settings that override the
+a Ceph Configuration file may contain only those settings that override the
default values. When you add settings to a Ceph configuration file, these
-settings override the default settings. Maintaining those settings in a
+settings override the default settings. Maintaining those settings in a
Ceph configuration file makes it easier to maintain your cluster.
The procedure is as follows:
ssh {hostname}
- For example::
+ For example::
ssh node1
-#. Ensure you have a directory for the Ceph configuration file. By default,
- Ceph uses ``/etc/ceph``. When you install ``ceph``, the installer will
+#. Ensure you have a directory for the Ceph configuration file. By default,
+ Ceph uses ``/etc/ceph``. When you install ``ceph``, the installer will
create the ``/etc/ceph`` directory automatically. ::
- ls /etc/ceph
+ ls /etc/ceph
**Note:** Deployment tools may remove this directory when purging a
cluster (e.g., ``ceph-deploy purgedata {node-name}``, ``ceph-deploy purge
{node-name}``).
-#. Create a Ceph configuration file. By default, Ceph uses
+#. Create a Ceph configuration file. By default, Ceph uses
``ceph.conf``, where ``ceph`` reflects the cluster name. ::
sudo vim /etc/ceph/ceph.conf
-#. Generate a unique ID (i.e., ``fsid``) for your cluster. ::
+#. Generate a unique ID (i.e., ``fsid``) for your cluster. ::
uuidgen
-
-#. Add the unique ID to your Ceph configuration file. ::
+
+#. Add the unique ID to your Ceph configuration file. ::
fsid = {UUID}
- For example::
+ For example::
fsid = a7f64266-0894-4f1e-a635-d0aeaca0e993
-#. Add the initial monitor(s) to your Ceph configuration file. ::
+#. Add the initial monitor(s) to your Ceph configuration file. ::
mon initial members = {hostname}[,{hostname}]
- For example::
+ For example::
mon initial members = node1
-#. Add the IP address(es) of the initial monitor(s) to your Ceph configuration
- file and save the file. ::
+#. Add the IP address(es) of the initial monitor(s) to your Ceph configuration
+ file and save the file. ::
mon host = {ip-address}[,{ip-address}]
#. Generate an administrator keyring, generate a ``client.admin`` user and add
- the user to the keyring. ::
+ the user to the keyring. ::
sudo ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'
-#. Add the ``client.admin`` key to the ``ceph.mon.keyring``. ::
+#. Add the ``client.admin`` key to the ``ceph.mon.keyring``. ::
ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring
-#. Generate a monitor map using the hostname(s), host IP address(es) and the FSID.
- Save it as ``/tmp/monmap``::
+#. Generate a monitor map using the hostname(s), host IP address(es) and the FSID.
+ Save it as ``/tmp/monmap``::
monmaptool --create --add {hostname} {ip-address} --fsid {uuid} /tmp/monmap
sudo -u ceph ceph-mon --mkfs -i node1 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring
-#. Consider settings for a Ceph configuration file. Common settings include
+#. Consider settings for a Ceph configuration file. Common settings include
the following::
[global]
osd pool default size = {n} # Write an object n times.
osd pool default min size = {n} # Allow writing n copy in a degraded state.
osd pool default pg num = {n}
- osd pool default pgp num = {n}
+ osd pool default pgp num = {n}
osd crush chooseleaf type = {n}
In the foregoing example, the ``[global]`` section of the configuration might
osd pool default size = 2
osd pool default min size = 1
osd pool default pg num = 333
- osd pool default pgp num = 333
+ osd pool default pgp num = 333
osd crush chooseleaf type = 1
#. Touch the ``done`` file.
0 data,1 metadata,2 rbd,
-#. Verify that the monitor is running. ::
+#. Verify that the monitor is running. ::
ceph -s
You should see output that the monitor you started is up and running, and
you should see a health error indicating that placement groups are stuck
- inactive. It should look something like this::
+ inactive. It should look something like this::
cluster a7f64266-0894-4f1e-a635-d0aeaca0e993
health HEALTH_ERR 192 pgs stuck inactive; 192 pgs stuck unclean; no osds
On each node where you run a ceph-mon daemon, you should also set up a ceph-mgr daemon.
-See :doc:`../mgr/administrator`
+See :ref:`mgr-administrator-guide`
Adding OSDs
===========
cannot reach an ``active + clean`` state until you have enough OSDs to handle the
number of copies of an object (e.g., ``osd pool default size = 2`` requires at
least two OSDs). After bootstrapping your monitor, your cluster has a default
-CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to
+CRUSH map; however, the CRUSH map doesn't have any Ceph OSD Daemons mapped to
a Ceph Node.
Ceph provides the ``ceph-disk`` utility, which can prepare a disk, partition or
directory for use with Ceph. The ``ceph-disk`` utility creates the OSD ID by
incrementing the index. Additionally, ``ceph-disk`` will add the new OSD to the
-CRUSH map under the host for you. Execute ``ceph-disk -h`` for CLI details.
+CRUSH map under the host for you. Execute ``ceph-disk -h`` for CLI details.
The ``ceph-disk`` utility automates the steps of the `Long Form`_ below. To
create the first two OSDs with the short form procedure, execute the following
on ``node2`` and ``node3``:
sudo ceph-disk activate {data-path} [--activate-key {path}]
- For example::
+ For example::
sudo ceph-disk activate /dev/hdd1
``client.bootstrap-osd`` key is present on the machine. You may
alternatively execute this command as ``client.admin`` on a
different host where that key is present.::
-
+
ID=$(echo "{\"cephx_secret\": \"$OSD_SECRET\"}" | \
ceph osd new $UUID -i - \
-n client.bootstrap-osd -k /var/lib/ceph/bootstrap-osd/ceph.keyring)
mkdir /var/lib/ceph/osd/ceph-$ID
-#. If the OSD is for a drive other than the OS drive, prepare it
+#. If the OSD is for a drive other than the OS drive, prepare it
for use with Ceph, and mount it to the directory you just created. ::
mkfs.xfs /dev/{DEV}
chown -R ceph:ceph /var/lib/ceph/osd/ceph-$ID
-#. After you add an OSD to Ceph, the OSD is in your configuration. However,
- it is not yet running. You must start
+#. After you add an OSD to Ceph, the OSD is in your configuration. However,
+ it is not yet running. You must start
your new OSD before it can begin receiving data.
For modern systemd distributions::
systemctl enable ceph-osd@$ID
systemctl start ceph-osd@$ID
-
+
For example::
systemctl enable ceph-osd@12
#. Create a keyring.::
ceph-authtool --create-keyring /var/lib/ceph/mds/{cluster-name}-{id}/keyring --gen-key -n mds.{id}
-
+
#. Import the keyring and set caps.::
ceph auth add mds.{id} osd "allow rwx" mds "allow" mon "allow profile mds" -i /var/lib/ceph/mds/{cluster}-{id}/keyring
-
+
#. Add to ceph.conf.::
[mds.{id}]
=======
Once you have your monitor and two OSDs up and running, you can watch the
-placement groups peer by executing the following::
+placement groups peer by executing the following::
ceph -w
-To view the tree, execute the following::
+To view the tree, execute the following::
ceph osd tree
-
-You should see output that looks something like this::
+
+You should see output that looks something like this::
# id weight type name up/down reweight
-1 2 root default
-2 2 host node1
0 1 osd.0 up 1
-3 1 host node2
- 1 1 osd.1 up 1
+ 1 1 osd.1 up 1
-To add (or remove) additional monitors, see `Add/Remove Monitors`_.
+To add (or remove) additional monitors, see `Add/Remove Monitors`_.
To add (or remove) additional Ceph OSD Daemons, see `Add/Remove OSDs`_.
ceph-clsinfo.rst
ceph-detect-init.rst
ceph-disk.rst
+ ceph-volume.rst
+ ceph-volume-systemd.rst
ceph-osd.rst
osdmaptool.rst)
--- /dev/null
+:orphan:
+
+=======================================================
+ ceph-volume-systemd -- systemd ceph-volume helper tool
+=======================================================
+
+.. program:: ceph-volume-systemd
+
+Synopsis
+========
+
+| **ceph-volume-systemd** *systemd instance name*
+
+
+Description
+===========
+:program:`ceph-volume-systemd` is a systemd helper tool that receives input
+from (dynamically created) systemd units so that activation of OSDs can
+proceed.
+
+It translates the input into a system call to ceph-volume for activation
+purposes only.
+
+
+Examples
+========
+Its input is the ``systemd instance name`` (represented by ``%i`` in a systemd
+unit), and it should be in the following format::
+
+ <ceph-volume subcommand>-<extra metadata>
+
+In the case of ``lvm`` a call could look like::
+
+ /usr/bin/ceph-volume-systemd lvm-0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
+
+Which in turn will call ``ceph-volume`` in the following way::
+
+ ceph-volume lvm trigger 0-8715BEB4-15C5-49DE-BA6F-401086EC7B41
+
+Any other subcommand will need to have implemented a ``trigger`` command that
+can consume the extra metadata in this format.
+
+
+Availability
+============
+
+:program:`ceph-volume-systemd` is part of Ceph, a massively scalable,
+open-source, distributed storage system. Please refer to the documentation at
+http://docs.ceph.com/ for more information.
+
+
+See also
+========
+
+:doc:`ceph-osd <ceph-osd>`\(8),
+:doc:`ceph-disk <ceph-volume>`\(8),
--- /dev/null
+:orphan:
+
+========================================
+ ceph-volume -- Ceph OSD deployment tool
+========================================
+
+.. program:: ceph-volume
+
+Synopsis
+========
+
+| **ceph-volume** [-h] [--cluster CLUSTER] [--log-level LOG_LEVEL]
+| [--log-path LOG_PATH]
+
+| **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare* ]
+
+Description
+===========
+
+:program:`ceph-volume` is a single purpose command line tool to deploy logical
+volumes as OSDs, trying to maintain a similar API to ``ceph-disk`` when
+preparing, activating, and creating OSDs.
+
+It deviates from ``ceph-disk`` by not interacting or relying on the udev rules
+that come installed for Ceph. These rules allow automatic detection of
+previously setup devices that are in turn fed into ``ceph-disk`` to activate
+them.
+
+
+Commands
+========
+
+lvm
+---
+
+By making use of LVM tags, the ``lvm`` sub-command is able to store and later
+re-discover and query devices associated with OSDs so that they can later
+activated.
+
+Subcommands:
+
+**activate**
+Enables a systemd unit that persists the OSD ID and its UUID (also called
+``fsid`` in Ceph CLI tools), so that at boot time it can understand what OSD is
+enabled and needs to be mounted.
+
+Usage::
+
+ ceph-volume lvm activate --filestore <osd id> <osd fsid>
+
+Optional Arguments:
+
+* [-h, --help] show the help message and exit
+* [--bluestore] filestore objectstore (not yet implemented)
+* [--filestore] filestore objectstore (current default)
+
+
+**prepare**
+Prepares a logical volume to be used as an OSD and journal using a ``filestore`` setup
+(``bluestore`` support is planned). It will not create or modify the logical volumes
+except for adding extra metadata.
+
+Usage::
+
+ ceph-volume lvm prepare --filestore --data <data lv> --journal <journal device>
+
+Optional arguments:
+
+* [-h, --help] show the help message and exit
+* [--journal JOURNAL] A logical group name, path to a logical volume, or path to a device
+* [--journal-size GB] Size (in GB) A logical group name or a path to a logical volume
+* [--bluestore] Use the bluestore objectstore (not currently supported)
+* [--filestore] Use the filestore objectstore (currently the only supported object store)
+* [--osd-id OSD_ID] Reuse an existing OSD id
+* [--osd-fsid OSD_FSID] Reuse an existing OSD fsid
+
+Required arguments:
+
+* --data A logical group name or a path to a logical volume
+
+**create**
+Wraps the two-step process to provision a new osd (calling ``prepare`` first
+and then ``activate``) into a single one. The reason to prefer ``prepare`` and
+then ``activate`` is to gradually introduce new OSDs into a cluster, and
+avoiding large amounts of data being rebalanced.
+
+The single-call process unifies exactly what ``prepare`` and ``activate`` do,
+with the convenience of doing it all at once. Flags and general usage are
+equivalent to those of the ``prepare`` subcommand.
+
+**trigger**
+This subcommand is not meant to be used directly, and it is used by systemd so
+that it proxies input to ``ceph-volume lvm activate`` by parsing the
+input from systemd, detecting the UUID and ID associated with an OSD.
+
+Usage::
+
+ ceph-volume lvm trigger <SYSTEMD-DATA>
+
+The systemd "data" is expected to be in the format of::
+
+ <OSD ID>-<OSD UUID>
+
+The lvs associated with the OSD need to have been prepared previously,
+so that all needed tags and metadata exist.
+
+Positional arguments:
+
+* <SYSTEMD_DATA> Data from a systemd unit containing ID and UUID of the OSD.
+
+Availability
+============
+
+:program:`ceph-volume` is part of Ceph, a massively scalable, open-source, distributed storage system. Please refer to
+the documentation at http://docs.ceph.com/ for more information.
+
+
+See also
+========
+
+:doc:`ceph-osd <ceph-osd>`\(8),
+:doc:`ceph-disk <ceph-disk>`\(8),
ceph osd create {<uuid>} {<id>}
-Subcommand ``new`` reuses a previously destroyed OSD *id*. The new OSD will
-have the specified *uuid*, and the command expects a JSON file containing
-the base64 cephx key for auth entity *client.osd.<id>*, as well as optional
-base64 cepx key for dm-crypt lockbox access and a dm-crypt key. Specifying
-a dm-crypt requires specifying the accompanying lockbox cephx key.
+Subcommand ``new`` can be used to create a new OSD or to recreate a previously
+destroyed OSD with a specific *id*. The new OSD will have the specified *uuid*,
+and the command expects a JSON file containing the base64 cephx key for auth
+entity *client.osd.<id>*, as well as optional base64 cepx key for dm-crypt
+lockbox access and a dm-crypt key. Specifying a dm-crypt requires specifying
+the accompanying lockbox cephx key.
Usage::
ceph osd new {<id>} {<uuid>} -i {<secrets.json>}
-The secrets JSON file is expected to maintain a form of the following format::
+The secrets JSON file is optional but if provided, is expected to maintain
+a form of the following format::
{
"cephx_secret": "AQBWtwhZdBO5ExAAIDyjK2Bh16ZXylmzgYYEjg=="
+.. _mgr-administrator-guide:
ceph-mgr administrator's guide
==============================
-----------------
In general, you should set up a ceph-mgr on each of the hosts
-running a ceph-mon daemon to achieve the same level of availability.
+running a ceph-mon daemon to achieve the same level of availability.
By default, whichever ceph-mgr instance comes up first will be made
active by the monitors, and the others will be standbys. There is
"ec_hash_error",
"ec_size_error",
"oi_attr_missing",
- "oi_attr_corrupted"
+ "oi_attr_corrupted",
+ "obj_size_oi_mismatch",
+ "ss_attr_missing",
+ "ss_attr_corrupted"
]
},
"minItems": 0,
"osd": {
"type": "integer"
},
+ "primary": {
+ "type": "boolean"
+ },
"size": {
"type": "integer"
},
"ec_hash_error",
"ec_size_error",
"oi_attr_missing",
- "oi_attr_corrupted"
+ "oi_attr_corrupted",
+ "obj_size_oi_mismatch",
+ "ss_attr_missing",
+ "ss_attr_corrupted"
]
},
"minItems": 0,
},
"required": [
"osd",
+ "primary",
"errors"
]
}
:maxdepth: 1
../../man/8/ceph-disk.rst
+ ../../man/8/ceph-volume.rst
+ ../../man/8/ceph-volume-systemd.rst
../../man/8/ceph.rst
../../man/8/ceph-deploy.rst
../../man/8/ceph-rest-api.rst
configuration file of the same name (e.g. /etc/ceph/remote.conf). See the
`ceph-conf`_ documentation for how to configure multiple clusters.
+.. note:: Images in a given pool will be mirrored to a pool with the same name
+ on the remote cluster. Images using a separate data-pool will use a data-pool
+ with the same name on the remote cluster. E.g., if an image being mirrored is
+ in the ``rbd`` pool on the local cluster and using a data-pool called
+ ``rbd-ec``, pools called ``rbd`` and ``rbd-ec`` must exist on the remote
+ cluster and will be used for mirroring the image.
+
Enable Mirroring
----------------
echo "Scheduling " $2 " branch"
if [ $2 = "master" ] ; then
- # run master branch with --newest option looking for good sha1 7 builds back
- teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 --newest 7 -e $5 $6
+ # run master branch with --newest option looking for good sha1 7 builds back with /999 jobs
+ teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 --newest 7 -e $5 $6
elif [ $2 = "hammer" ] ; then
# run hammer branch with less jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/56 -e $5 $6
# run jewel branch with /40 jobs
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6
elif [ $2 = "kraken" ] ; then
- # run kraken branch with /40 jobs
- teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6
+ # run kraken branch with /999 jobs
+ teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 -e $5 $6
elif [ $2 = "luminous" ] ; then
- # run luminous branch with /40 jobs
- teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $5 $6
+ # run luminous branch with /999 jobs
+ teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/999 -e $5 $6
else
# run NON master branches without --newest
teuthology-suite -v -c $2 -m $3 -k distro -s $4 --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 -e $5 $6
-#!/bin/sh -ex
+#!/usr/bin/env bash
+set -e
-if [ ! -e Makefile ]; then
+if [ ! -e Makefile -o ! -d bin ]; then
echo 'run this from the build dir'
exit 1
fi
+if [ ! -d /tmp/ceph-disk-virtualenv -o ! -d /tmp/ceph-detect-init-virtualenv ]; then
+ echo '/tmp/*-virtualenv directories not built. Please run "make check" first.'
+ exit 1
+fi
+
if [ `uname` = FreeBSD ]; then
# otherwise module prettytable will not be found
export PYTHONPATH=/usr/local/lib/python2.7/site-packages
exec_mode=+111
+ KERNCORE="kern.corefile"
+ COREPATTERN="core.%N.%P"
else
+ export PYTHONPATH=/usr/lib/python2.7/dist-packages
exec_mode=/111
+ KERNCORE="kernel.core_pattern"
+ COREPATTERN="core.%e.%p.%t"
+fi
+
+function finish() {
+ if [ -n "$precore" ]; then
+ sudo sysctl -w ${KERNCORE}=${precore}
+ fi
+ exit 0
+}
+
+trap finish TERM HUP INT
+
+PATH=$(pwd)/bin:$PATH
+
+# TODO: Use getops
+dryrun=false
+if [[ "$1" = "--dry-run" ]]; then
+ dryrun=true
+ shift
+fi
+
+all=false
+if [ "$1" = "" ]; then
+ all=true
fi
-for f in `find ../qa/standalone -perm $exec_mode -type f`
+select=("$@")
+
+location="../qa/standalone"
+
+count=0
+errors=0
+userargs=""
+precore="$(sysctl -n $KERNCORE)"
+# If corepattern already set, avoid having to use sudo
+if [ "$precore" = "$COREPATTERN" ]; then
+ precore=""
+else
+ sudo sysctl -w ${KERNCORE}=${COREPATTERN}
+fi
+ulimit -c unlimited
+for f in $(cd $location ; find . -perm $exec_mode -type f)
do
- echo '--- $f ---'
- PATH=$PATH:bin \
- CEPH_ROOT=.. \
- CEPH_LIB=lib \
- $f || exit 1
+ f=$(echo $f | sed 's/\.\///')
+ # This is tested with misc/test-ceph-helpers.sh
+ if [[ "$f" = "ceph-helpers.sh" ]]; then
+ continue
+ fi
+ if [[ "$all" = "false" ]]; then
+ found=false
+ for c in "${!select[@]}"
+ do
+ # Get command and any arguments of subset of tests ro tun
+ allargs="${select[$c]}"
+ arg1=$(echo "$allargs" | cut --delimiter " " --field 1)
+ # Get user args for this selection for use below
+ userargs="$(echo $allargs | cut -s --delimiter " " --field 2-)"
+ if [[ "$arg1" = $(basename $f) ]]; then
+ found=true
+ break
+ fi
+ if [[ "$arg1" = "$f" ]]; then
+ found=true
+ break
+ fi
+ done
+ if [[ "$found" = "false" ]]; then
+ continue
+ fi
+ fi
+ # Don't run test-failure.sh unless explicitly specified
+ if [ "$all" = "true" -a "$f" = "special/test-failure.sh" ]; then
+ continue
+ fi
+
+ cmd="$location/$f $userargs"
+ count=$(expr $count + 1)
+ echo "--- $cmd ---"
+ if [[ "$dryrun" != "true" ]]; then
+ if ! PATH=$PATH:bin \
+ CEPH_ROOT=.. \
+ CEPH_LIB=lib \
+ LOCALRUN=yes \
+ $cmd ; then
+ echo "$f .............. FAILED"
+ errors=$(expr $errors + 1)
+ fi
+ fi
done
+if [ -n "$precore" ]; then
+ sudo sysctl -w ${KERNCORE}=${precore}
+fi
+
+if [ "$errors" != "0" ]; then
+ echo "$errors TESTS FAILED, $count TOTAL TESTS"
+ exit 1
+fi
+echo "ALL $count TESTS PASSED"
exit 0
* The qa/run-standalone.sh will run all of them in sequence. This is slow
since there is no parallelism.
- * You can run an individual script by passing these environment args. For
- example, if you are in the build/ directory,
+ * You can run individual script(s) by specifying the basename or path below
+ qa/standalone as arguments to qa/run-standalone.sh.
-PATH=$PATH:bin CEPH_ROOT=.. CEPH_LIB=lib ../qa/standalone/mon/misc.sh
+../qa/run-standalone.sh misc.sh osd/osd-dup.sh
+
+ * Add support for specifying arguments to selected tests by simply adding
+ list of tests to each argument.
+
+../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp"
if [ `uname` = FreeBSD ]; then
SED=gsed
DIFFCOLOPTS=""
+ KERNCORE="kern.corefile"
else
SED=sed
termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/')
termwidth="-W ${termwidth}"
fi
DIFFCOLOPTS="-y $termwidth"
+ KERNCORE="kernel.core_pattern"
fi
EXTRA_OPTS=""
#
function teardown() {
local dir=$1
+ local dumplogs=$2
kill_daemons $dir KILL
if [ `uname` != FreeBSD ] \
&& [ $(stat -f -c '%T' .) == "btrfs" ]; then
__teardown_btrfs $dir
fi
+ local cores="no"
+ local pattern="$(sysctl -n $KERNCORE)"
+ # See if we have apport core handling
+ if [ "${pattern:0:1}" = "|" ]; then
+ # TODO: Where can we get the dumps?
+ # Not sure where the dumps really are so this will look in the CWD
+ pattern=""
+ fi
+ # Local we start with core and teuthology ends with core
+ if ls $(dirname $pattern) | grep -q '^core\|core$' ; then
+ cores="yes"
+ if [ -n "$LOCALRUN" ]; then
+ mkdir /tmp/cores.$$ 2> /dev/null || true
+ for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do
+ mv $i /tmp/cores.$$
+ done
+ fi
+ fi
+ if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then
+ display_logs $dir
+ fi
rm -fr $dir
rm -rf $(get_asok_dir)
+ if [ "$cores" = "yes" ]; then
+ echo "ERROR: Failure due to cores found"
+ if [ -n "$LOCALRUN" ]; then
+ echo "Find saved core files in /tmp/cores.$$"
+ fi
+ return 1
+ fi
+ return 0
}
function __teardown_btrfs() {
--id $id \
--mon-osd-full-ratio=.99 \
--mon-data-avail-crit=1 \
+ --mon-data-avail-warn=5 \
--paxos-propose-interval=0.1 \
--osd-crush-chooseleaf-type=0 \
$EXTRA_OPTS \
function create_rbd_pool() {
ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1
- ceph osd pool create rbd $PG_NUM || return 1
+ create_pool rbd $PG_NUM || return 1
rbd pool init rbd
}
+function create_pool() {
+ ceph osd pool create "$@"
+ sleep 1
+}
+
#######################################################################
function run_mgr() {
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
- stamp=$(get_last_scrub_stamp 2.0)
+ stamp=$(get_last_scrub_stamp 1.0)
test -n "$stamp" || return 1
teardown $dir || return 1
}
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
- repair 2.0 || return 1
+ repair 1.0 || return 1
kill_daemons $dir KILL osd || return 1
- ! TIMEOUT=1 repair 2.0 || return 1
+ ! TIMEOUT=1 repair 1.0 || return 1
teardown $dir || return 1
}
#######################################################################
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
- pg_scrub 2.0 || return 1
+ pg_scrub 1.0 || return 1
kill_daemons $dir KILL osd || return 1
- ! TIMEOUT=1 pg_scrub 2.0 || return 1
+ ! TIMEOUT=1 pg_scrub 1.0 || return 1
teardown $dir || return 1
}
local sname=${3:-last_scrub_stamp}
for ((i=0; i < $TIMEOUT; i++)); do
- if test "$last_scrub" != "$(get_last_scrub_stamp $pgid $sname)" ; then
+ if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then
return 0
fi
sleep 1
run_osd $dir 0 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
- local pgid=2.0
+ local pgid=1.0
ceph pg repair $pgid
local last_scrub=$(get_last_scrub_stamp $pgid)
wait_for_scrub $pgid "$last_scrub" || return 1
bytes_used=`ceph df detail --format=json | jq "$jq_filter.bytes_used"`
test $raw_bytes_used > 0 || return 1
test $raw_bytes_used == $bytes_used || return 1
+ teardown $dir
}
#######################################################################
if run $dir "$@" ; then
code=0
else
- display_logs $dir
code=1
fi
- teardown $dir || return 1
+ teardown $dir $code || return 1
return $code
}
export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one
export CEPH_ARGS
- CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
export CEPH_CONF=/dev/null
local dir=td/ceph-helpers
for func in $funcs ; do
- $func $dir || return 1
+ if ! $func $dir; then
+ teardown $dir 1
+ return 1
+ fi
done
}
if test "$1" = TESTS ; then
shift
run_tests "$@"
+ exit $?
fi
# NOTE:
return 1
}
+function inject_eio() {
+ local pooltype=$1
+ shift
+ local which=$1
+ shift
+ local poolname=$1
+ shift
+ local objname=$1
+ shift
+ local dir=$1
+ shift
+ local shard_id=$1
+ shift
+
+ local -a initial_osds=($(get_osds $poolname $objname))
+ local osd_id=${initial_osds[$shard_id]}
+ if [ "$pooltype" != "ec" ]; then
+ shard_id=""
+ fi
+ set_config osd $osd_id filestore_debug_inject_read_err true || return 1
+ local loop=0
+ while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \
+ inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do
+ loop=$(expr $loop + 1)
+ if [ $loop = "10" ]; then
+ return 1
+ fi
+ sleep 1
+ done
+}
+
# Local Variables:
# compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config"
# End:
ceph osd erasure-code-profile set myprofile \
crush-failure-domain=osd || return 1
- ceph osd pool create $poolname 12 12 erasure myprofile \
+ create_pool $poolname 12 12 erasure myprofile \
|| return 1
wait_for_clean || return 1
}
mapping=DD_ \
crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \
layers='[ [ "DDc", "" ] ]' || return 1
- ceph osd pool create $poolname 12 12 erasure $profile \
+ create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
plugin=lrc \
k=4 m=2 l=3 \
crush-failure-domain=osd || return 1
- ceph osd pool create $poolname 12 12 erasure $profile \
+ create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
ceph osd erasure-code-profile set profile-isa \
plugin=isa \
crush-failure-domain=osd || return 1
- ceph osd pool create $poolname 1 1 erasure profile-isa \
+ create_pool $poolname 1 1 erasure profile-isa \
|| return 1
rados_put_get $dir $poolname || return 1
plugin=jerasure \
k=4 m=2 \
crush-failure-domain=osd || return 1
- ceph osd pool create $poolname 12 12 erasure $profile \
+ create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
plugin=shec \
k=2 m=1 c=1 \
crush-failure-domain=osd || return 1
- ceph osd pool create $poolname 12 12 erasure $profile \
+ create_pool $poolname 12 12 erasure $profile \
|| return 1
rados_put_get $dir $poolname || return 1
mapping='_DD' \
crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1
ceph osd erasure-code-profile get remap-profile
- ceph osd pool create remap-pool 12 12 erasure remap-profile \
+ create_pool remap-pool 12 12 erasure remap-profile \
|| return 1
#
plugin=jerasure \
k=2 m=1 \
crush-failure-domain=osd || return 1
- ceph osd pool create $poolname 1 1 erasure myprofile \
+ create_pool $poolname 1 1 erasure myprofile \
|| return 1
wait_for_clean || return 1
}
rm $dir/ORIGINAL
}
-function inject_eio() {
- local objname=$1
- shift
- local dir=$1
- shift
- local shard_id=$1
- shift
-
- local poolname=pool-jerasure
- local -a initial_osds=($(get_osds $poolname $objname))
- local osd_id=${initial_osds[$shard_id]}
- set_config osd $osd_id filestore_debug_inject_read_err true || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \
- injectdataerr $poolname $objname $shard_id || return 1
-}
-
function rados_get_data_eio() {
local dir=$1
shift
#
local poolname=pool-jerasure
local objname=obj-eio-$$-$shard_id
- inject_eio $objname $dir $shard_id || return 1
+ inject_eio ec data $poolname $objname $dir $shard_id || return 1
rados_put_get $dir $poolname $objname $recovery || return 1
shard_id=$(expr $shard_id + 1)
- inject_eio $objname $dir $shard_id || return 1
+ inject_eio ec data $poolname $objname $dir $shard_id || return 1
# Now 2 out of 3 shards get EIO, so should fail
rados_get $dir $poolname $objname fail || return 1
}
# GNU Library Public License for more details.
#
-$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS
+$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@"
setup $dir || return 1
run_mon $dir a || return 1
create_rbd_pool || return 1
- ceph osd pool create $TEST_POOL 8
+ create_pool $TEST_POOL 8
local flag
for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do
! ceph osd pool set $TEST_POOL min_size 0 || return 1
local ecpool=erasepool
- ceph osd pool create $ecpool 12 12 erasure default || return 1
+ create_pool $ecpool 12 12 erasure default || return 1
#erasue pool size=k+m, min_size=k
local size=$(ceph osd pool get $ecpool size|awk '{print $2}')
local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}')
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
- ceph osd pool create hello 8 || return 1
+ create_pool hello 8 || return 1
echo "hello world" > $dir/hello
rados --pool hello put foo $dir/hello || return 1
grep "WRONG does not exist" || return 1
ceph osd erasure-code-profile set $profile || return 1
- ceph osd pool create poolname 12 12 erasure $profile || return 1
+ create_pool poolname 12 12 erasure $profile || return 1
! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1
grep "poolname.*using.*$profile" $dir/out || return 1
ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1
run_osd $dir 2 || return 1
local poolname=testquoa
- ceph osd pool create $poolname 20
+ create_pool $poolname 20
local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'`
local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'`
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+[ `uname` = FreeBSD ] && exit 0
+
function run() {
local dir=$1
shift
sleep 5
- ceph osd pool create foo 16
+ create_pool foo 16
# write some objects
rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
--- /dev/null
+#! /bin/bash
+#
+# Copyright (C) 2017 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ $func $dir || return 1
+ done
+}
+
+function TEST_recovery_scrub() {
+ local dir=$1
+ local poolname=test
+
+ TESTDATA="testdata.$$"
+ OSDS=8
+ PGS=32
+ OBJECTS=4
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 || return 1
+ run_mgr $dir x || return 1
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ # Create a pool with $PGS pgs
+ create_pool $poolname $PGS $PGS
+ wait_for_clean || return 1
+ poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
+
+ dd if=/dev/urandom of=$TESTDATA bs=1M count=50
+ for i in $(seq 1 $OBJECTS)
+ do
+ rados -p $poolname put obj${i} $TESTDATA
+ done
+ rm -f $TESTDATA
+
+ ceph osd pool set $poolname size 4
+
+ pids=""
+ for pg in $(seq 0 $(expr $PGS - 1))
+ do
+ run_in_background pids pg_scrub $poolid.$(echo "{ obase=16; $pg }" | bc | tr '[:upper:]' '[:lower:]')
+ done
+ ceph pg dump pgs
+ wait_background pids
+ return_code=$?
+ if [ $return_code -ne 0 ]; then return $return_code; fi
+
+ ERRORS=0
+ pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid')
+ pid=$(cat $pidfile)
+ if ! kill -0 $pid
+ then
+ echo "OSD crash occurred"
+ tail -100 $dir/osd.0.log
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+
+ kill_daemons $dir || return 1
+
+ declare -a err_strings
+ err_strings[0]="not scheduling scrubs due to active recovery"
+ # Test with these two strings after disabled check in OSD::sched_scrub()
+ #err_strings[0]="handle_scrub_reserve_request: failed to reserve remotely"
+ #err_strings[1]="sched_scrub: failed to reserve locally"
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ grep "failed to reserve\|not scheduling scrubs" $dir/osd.${osd}.log
+ done
+ for err_string in "${err_strings[@]}"
+ do
+ found=false
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ if grep "$err_string" $dir/osd.${osd}.log > /dev/null;
+ then
+ found=true
+ fi
+ done
+ if [ "$found" = "false" ]; then
+ echo "Missing log message '$err_string'"
+ ERRORS=$(expr $ERRORS + 1)
+ fi
+ done
+
+ teardown $dir || return 1
+
+ if [ $ERRORS != "0" ];
+ then
+ echo "TEST FAILED WITH $ERRORS ERRORS"
+ return 1
+ fi
+
+ echo "TEST PASSED"
+ return 0
+}
+
+main osd-recovery-scrub "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+# ../qa/run-standalone.sh osd-recovery-scrub.sh"
getjson="no"
# Ignore the epoch and filter out the attr '_' value because it has date information and won't match
-jqfilter='.inconsistents | (.[].shards[].attrs[]? | select(.name == "_") | .value) |= "----Stripped-by-test----"'
+if [ "$(jq --version 2>&1 | awk '{ print $3}')" = "1.3" ]; then # Not sure all versions that apply here
+ jqfilter='.inconsistents | (.[].shards[].attrs[] | select(.name == "_") | .value) |= "----Stripped-by-test----"'
+else
+ jqfilter='.inconsistents | (.[].shards[].attrs[]? | select(.name == "_") | .value) |= "----Stripped-by-test----"'
+fi
sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
# Remove items are not consistent across runs, the pg interval and client
ceph osd erasure-code-profile set myprofile crush-failure-domain=osd $3 $4 $5 $6 $7 || return 1
- ceph osd pool create "$poolname" 1 1 erasure myprofile || return 1
+ create_pool "$poolname" 1 1 erasure myprofile || return 1
if [ "$allow_overwrites" = "true" ]; then
ceph osd pool set "$poolname" allow_ec_overwrites true || return 1
#
# it may take a bit to appear due to mon/mgr asynchrony
for f in `seq 1 60`; do
- ceph -s | grep "1/1 unfound" && break
+ ceph -s | grep "1/1 objects unfound" && break
sleep 1
done
ceph -s|grep "4 osds: 4 up, 4 in" || return 1
- ceph -s|grep "1/1 unfound" || return 1
+ ceph -s|grep "1/1 objects unfound" || return 1
teardown $dir || return 1
}
function TEST_corrupt_scrub_replicated() {
local dir=$1
local poolname=csr_pool
- local total_objs=15
+ local total_objs=16
setup $dir || return 1
run_mon $dir a --osd_pool_default_size=2 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
- ceph osd pool create foo 1 || return 1
- ceph osd pool create $poolname 1 1 || return 1
+ create_pool foo 1 || return 1
+ create_pool $poolname 1 1 || return 1
wait_for_clean || return 1
for i in $(seq 1 $total_objs) ; do
15)
objectstore_tool $dir $osd $objname rm-attr _ || return 1
+ ;;
+
+ 16)
+ objectstore_tool $dir 0 $objname rm-attr snapset || return 1
+ echo -n bad-val > $dir/bad-val
+ objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
esac
done
local pg=$(get_pg $poolname ROBJ0)
- set_config osd 0 filestore_debug_inject_read_err true || return 1
- set_config osd 1 filestore_debug_inject_read_err true || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \
- injectdataerr $poolname ROBJ11 || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \
- injectmdataerr $poolname ROBJ12 || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \
- injectmdataerr $poolname ROBJ13 || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \
- injectdataerr $poolname ROBJ13 || return 1
+ inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+ inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
pg_scrub $pg
{
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 9,
"errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])",
"union_shard_errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
"errors": [
"stat_error"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])",
"errors": [
"stat_error"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:d60617f9:::ROBJ13:head(47'55 osd.0.0:54 dirty|omap|data_digest|omap_digest s 7 uv 39 dd 2ddbf8f5 od 6441854d alloc_hint [0 0 0])",
"shards": [
{
"size": 7,
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "",
+ "name": "_"
+ },
+ {
+ "Base64": true,
+ "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+ "name": "snapset"
+ }
+ ],
"errors": [
"oi_attr_corrupted"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 7,
+ "attrs": [
+ {
+ "Base64": true,
+ "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+ "name": "snapset"
+ }
+ ],
"errors": [
"oi_attr_missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"union_shard_errors": [
],
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"attrs": [
"errors": [
"oi_attr_missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])",
"union_shard_errors": [
"oi_attr_missing"
],
- "errors": [
- "attr_name_mismatch"
- ],
+ "errors": [],
"object": {
"version": 45,
"snap": "head",
"name": "ROBJ15"
}
},
+ {
+ "errors": [],
+ "object": {
+ "locator": "",
+ "name": "ROBJ16",
+ "nspace": "",
+ "snap": "head",
+ "version": 0
+ },
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": true,
+ "name": "_",
+ "value": ""
+ }
+ ],
+ "errors": [
+ "ss_attr_missing"
+ ],
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "attrs": [
+ {
+ "Base64": true,
+ "name": "_",
+ "value": ""
+ },
+ {
+ "Base64": false,
+ "name": "snapset",
+ "value": "bad-val"
+ }
+ ],
+ "errors": [
+ "ss_attr_corrupted"
+ ],
+ "osd": 1,
+ "primary": true,
+ "size": 7
+ }
+ ],
+ "union_shard_errors": [
+ "ss_attr_missing",
+ "ss_attr_corrupted"
+ ]
+ },
{
"shards": [
{
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"errors": [
"missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])",
],
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"attrs": [
],
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
- "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
+ "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 66 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
"union_shard_errors": [],
"errors": [
"attr_value_mismatch",
"attr_name_mismatch"
],
"object": {
- "version": 62,
+ "version": 66,
"snap": "head",
"locator": "",
"nspace": "",
{
"shards": [
{
- "attrs": [
- {
- "Base64": true,
- "value": "",
- "name": "_"
- },
- {
- "Base64": true,
- "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
- "name": "snapset"
- }
- ],
- "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
+ "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 67 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
"size": 1,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
- "attrs": [
- {
- "Base64": true,
- "value": "",
- "name": "_"
- },
- {
- "Base64": true,
- "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
- "name": "snapset"
- }
- ],
"object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])",
"size": 1,
- "errors": [],
- "osd": 1
+ "errors": [
+ "obj_size_oi_mismatch"
+ ],
+ "osd": 1,
+ "primary": true
}
],
- "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
- "union_shard_errors": [],
+ "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 67 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])",
+ "union_shard_errors": [
+ "obj_size_oi_mismatch"
+ ],
"errors": [
- "object_info_inconsistency",
- "attr_value_mismatch"
+ "object_info_inconsistency"
],
"object": {
- "version": 63,
+ "version": 67,
"snap": "head",
"locator": "",
"nspace": "",
objectstore_tool $dir 1 $objname set-attr _ $dir/oi
rm $dir/oi
- set_config osd 0 filestore_debug_inject_read_err true || return 1
- set_config osd 1 filestore_debug_inject_read_err true || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \
- injectdataerr $poolname ROBJ11 || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \
- injectmdataerr $poolname ROBJ12 || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) \
- injectmdataerr $poolname ROBJ13 || return 1
- CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.1) \
- injectdataerr $poolname ROBJ13 || return 1
+ inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+ inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+ inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
pg_deep_scrub $pg
rados list-inconsistent-pg $poolname > $dir/json || return 1
"omap_digest": "0xf5fba2c6",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2d4a11c2",
"size": 9,
"errors": [
"data_digest_mismatch_oi",
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])",
"union_shard_errors": [
"data_digest_mismatch_oi",
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"data_digest_mismatch",
"errors": [
"omap_digest_mismatch_oi"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"errors": [
"omap_digest_mismatch_oi"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:b1f19cbd:::ROBJ10:head(47'51 osd.0.0:50 dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [0 0 0])",
"omap_digest": "0xa03cef03",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 7,
"errors": [
"read_error"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:87abbf36:::ROBJ11:head(47'48 osd.0.0:47 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od a03cef03 alloc_hint [0 0 0])",
"errors": [
"stat_error"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"omap_digest": "0x067f306a",
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])",
"errors": [
"stat_error"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 7,
"errors": [
"read_error"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"union_shard_errors": [
{
"shards": [
{
+ "attrs": [
+ {
+ "Base64": false,
+ "value": "",
+ "name": "_"
+ },
+ {
+ "Base64": true,
+ "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+ "name": "snapset"
+ }
+ ],
"data_digest": "0x2ddbf8f5",
"omap_digest": "0x4f14f849",
"size": 7,
"errors": [
"oi_attr_corrupted"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
+ "attrs": [
+ {
+ "Base64": true,
+ "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
+ "name": "snapset"
+ }
+ ],
"data_digest": "0x2ddbf8f5",
"omap_digest": "0x4f14f849",
"size": 7,
"errors": [
"oi_attr_missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"union_shard_errors": [
"omap_digest": "0x2d2a4d6e",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"attrs": [
"errors": [
"oi_attr_missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])",
"union_shard_errors": [
"oi_attr_missing"
],
- "errors": [
- "attr_name_mismatch"
- ],
+ "errors": [],
"object": {
"version": 45,
"snap": "head",
"name": "ROBJ15"
}
},
+ {
+ "errors": [],
+ "object": {
+ "locator": "",
+ "name": "ROBJ16",
+ "nspace": "",
+ "snap": "head",
+ "version": 0
+ },
+ "shards": [
+ {
+ "attrs": [
+ {
+ "Base64": true,
+ "name": "_",
+ "value": ""
+ }
+ ],
+ "data_digest": "0x2ddbf8f5",
+ "errors": [
+ "ss_attr_missing"
+ ],
+ "omap_digest": "0x8b699207",
+ "osd": 0,
+ "primary": false,
+ "size": 7
+ },
+ {
+ "attrs": [
+ {
+ "Base64": true,
+ "name": "_",
+ "value": ""
+ },
+ {
+ "Base64": false,
+ "name": "snapset",
+ "value": "bad-val"
+ }
+ ],
+ "data_digest": "0x2ddbf8f5",
+ "errors": [
+ "ss_attr_corrupted"
+ ],
+ "omap_digest": "0x8b699207",
+ "osd": 1,
+ "primary": true,
+ "size": 7
+ }
+ ],
+ "union_shard_errors": [
+ "ss_attr_missing",
+ "ss_attr_corrupted"
+ ]
+ },
{
"shards": [
{
"errors": [
"data_digest_mismatch_oi"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"omap_digest": "0xf8e11918",
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:e97ce31e:::ROBJ2:head(47'56 osd.0.0:55 dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [0 0 0])",
"omap_digest": "0x00b35dfd",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"errors": [
"missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])",
"errors": [
"omap_digest_mismatch_oi"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"omap_digest": "0xe2d46ea4",
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:f4981d31:::ROBJ4:head(47'58 osd.0.0:57 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [0 0 0])",
"omap_digest": "0x1a862a41",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"errors": [
"omap_digest_mismatch_oi"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:f4bfd4d1:::ROBJ5:head(47'59 osd.0.0:58 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [0 0 0])",
"errors": [
"omap_digest_mismatch_oi"
],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"omap_digest": "0x179c919f",
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:a53c12e8:::ROBJ6:head(47'50 osd.0.0:49 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [0 0 0])",
"omap_digest": "0xefced57a",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x2ddbf8f5",
"errors": [
"omap_digest_mismatch_oi"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
"selected_object_info": "3:8b55fa4b:::ROBJ7:head(47'49 osd.0.0:48 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [0 0 0])",
"omap_digest": "0xd6be81dc",
"size": 7,
"errors": [],
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"attrs": [
"omap_digest": "0xd6be81dc",
"size": 7,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
- "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
+ "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 66 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])",
"union_shard_errors": [],
"errors": [
"attr_value_mismatch",
"attr_name_mismatch"
],
"object": {
- "version": 62,
+ "version": 66,
"snap": "head",
"locator": "",
"nspace": "",
{
"shards": [
{
- "attrs": [
- {
- "Base64": true,
- "value": "",
- "name": "_"
- },
- {
- "Base64": true,
- "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
- "name": "snapset"
- }
- ],
"object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])",
"data_digest": "0x1f26fb26",
"omap_digest": "0x2eecc539",
"size": 3,
- "errors": [],
- "osd": 0
+ "errors": [
+ "obj_size_oi_mismatch"
+ ],
+ "osd": 0,
+ "primary": false
},
{
- "attrs": [
- {
- "Base64": true,
- "value": "",
- "name": "_"
- },
- {
- "Base64": true,
- "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
- "name": "snapset"
- }
- ],
- "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
+ "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 68 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
"data_digest": "0x1f26fb26",
"omap_digest": "0x2eecc539",
"size": 3,
"errors": [],
- "osd": 1
+ "osd": 1,
+ "primary": true
}
],
- "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
- "union_shard_errors": [],
+ "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 68 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])",
+ "union_shard_errors": [
+ "obj_size_oi_mismatch"
+ ],
"errors": [
- "object_info_inconsistency",
- "attr_value_mismatch"
+ "object_info_inconsistency"
],
"object": {
- "version": 64,
+ "version": 68,
"snap": "head",
"locator": "",
"nspace": "",
fi
done
create_rbd_pool || return 1
- ceph osd pool create foo 1
+ create_pool foo 1
create_ec_pool $poolname $allow_overwrites k=2 m=1 stripe_unit=2K --force || return 1
wait_for_clean || return 1
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 9,
"shard": 0,
"errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"union_shard_errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"shard": 0,
"errors": [
"missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:b197b25d:::EOBJ3:head(37'3 client.4251.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"osd": 1,
+ "primary": true,
"shard": 0,
"errors": [],
"size": 2048,
},
{
"osd": 2,
+ "primary": false,
"shard": 1,
"errors": [],
"size": 2048,
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 4096,
"shard": 0,
"errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"union_shard_errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 9,
"shard": 0,
"errors": [
"read_error",
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"data_digest": "0x00000000",
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:9175b684:::EOBJ1:head(27'1 client.4155.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"union_shard_errors": [
"read_error",
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"shard": 0,
"errors": [
"missing"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"data_digest": "0x00000000",
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:b197b25d:::EOBJ3:head(41'3 client.4199.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"attrs": [
"size": 2048,
"errors": [],
"shard": 0,
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"attrs": [
"size": 2048,
"errors": [],
"shard": 1,
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:5e723e06:::EOBJ4:head(48'6 client.4223.0:1 dirty|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x00000000",
"omap_digest": "0xffffffff",
"size": 4096,
"errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"shard": 0,
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"data_digest": "0x00000000",
"size": 2048,
"errors": [],
"shard": 1,
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4288.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"union_shard_errors": [
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 9,
"shard": 0,
"errors": [
"read_error",
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"data_digest": "0x04cfa72f",
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"union_shard_errors": [
"read_error",
- "size_mismatch_oi"
+ "size_mismatch_oi",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
"ec_hash_error"
],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"data_digest": "0x04cfa72f",
"size": 2048,
"errors": [],
"shard": 0,
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"data_digest": "0x04cfa72f",
"size": 2048,
"errors": [],
"shard": 1,
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:9babd184:::EOBJ2:head(29'2 client.4217.0:1 dirty|data_digest|omap_digest s 7 uv 2 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"osd": 1,
+ "primary": true,
"shard": 0,
"errors": [
"missing"
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:b197b25d:::EOBJ3:head(37'3 client.4251.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"osd": 1,
+ "primary": true,
"shard": 0,
"errors": [],
"size": 2048,
},
{
"osd": 2,
+ "primary": false,
"shard": 1,
"errors": [],
"size": 2048,
"size": 2048,
"errors": [],
"shard": 2,
- "osd": 0
+ "osd": 0,
+ "primary": false
},
{
"size": 4096,
"shard": 0,
"errors": [
"size_mismatch_oi",
- "ec_size_error"
+ "ec_size_error",
+ "obj_size_oi_mismatch"
],
- "osd": 1
+ "osd": 1,
+ "primary": true
},
{
"data_digest": "0x04cfa72f",
"size": 2048,
"shard": 1,
"errors": [],
- "osd": 2
+ "osd": 2,
+ "primary": false
}
],
"selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])",
"union_shard_errors": [
"size_mismatch_oi",
- "ec_size_error"
+ "ec_size_error",
+ "obj_size_oi_mismatch"
],
"errors": [
"size_mismatch"
setup $dir || return 1
run_mon $dir a --osd_pool_default_size=2 || return 1
run_mgr $dir x || return 1
- local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0"
+ local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+ ceph_osd_args+="--osd_scrub_backoff_ratio=0"
run_osd $dir 0 $ceph_osd_args || return 1
run_osd $dir 1 $ceph_osd_args || return 1
create_rbd_pool || return 1
wait_for_clean || return 1
- ceph osd pool create $poolname 1 1 || return 1
+ create_pool $poolname 1 1 || return 1
wait_for_clean || return 1
local osd=0
# Make sure bad object found
rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+ flush_pg_stats
local last_scrub=$(get_last_scrub_stamp $pg)
# Fake a schedule scrub
CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) \
# Can't upgrade with this set
ceph osd set nodeep-scrub
# Let map change propagate to OSDs
- sleep 2
+ flush pg_stats
+ sleep 5
# Fake a schedule scrub
- local last_scrub=$(get_last_scrub_stamp $pg)
CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) \
trigger_scrub $pg || return 1
# Wait for schedule regular scrub
# Bad object still known
rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1
+ flush_pg_stats
# Request a regular scrub and it will be done
- local scrub_backoff_ratio=$(get_config osd ${primary} osd_scrub_backoff_ratio)
- set_config osd ${primary} osd_scrub_backoff_ratio 0
pg_scrub $pg
- sleep 1
- set_config osd ${primary} osd_scrub_backoff_ratio $scrub_backoff_ratio
grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
# deep-scrub error is no longer present
wait_for_clean || return 1
# Create a pool with a single pg
- ceph osd pool create $poolname 1 1
+ create_pool $poolname 1 1
+ wait_for_clean || return 1
poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
- err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 23 errors"
+ err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 22 errors"
err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head can't decode 'snapset' attr buffer"
- err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj12:1 has no oi or legacy_snaps; cannot convert 1=[[]1[]]:[[]1[]].stray_clone_snaps=[{]1=[[]1[]][}]"
- for i in `seq 0 ${#err_strings[@]}`
+ for err_string in "${err_strings[@]}"
do
- if ! grep "${err_strings[$i]}" $dir/osd.0.log > /dev/null;
+ if ! grep "$err_string" $dir/osd.0.log > /dev/null;
then
- echo "Missing log message '${err_strings[$i]}'"
+ echo "Missing log message '$err_string'"
ERRORS=$(expr $ERRORS + 1)
fi
done
--- /dev/null
+#!/usr/bin/env bash
+set -ex
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_failure_log() {
+ local dir=$1
+
+ cat > $dir/test_failure.log << EOF
+This is a fake log file
+*
+*
+*
+*
+*
+This ends the fake log file
+EOF
+
+ # Test fails
+ return 1
+}
+
+function TEST_failure_core_only() {
+ local dir=$1
+
+ run_mon $dir a || return 1
+ kill_daemons $dir SEGV mon 5
+ return 0
+}
+
+main test_failure "$@"
ceph:
log-whitelist:
- \(MDS_TRIM\)
+ - Behind on trimming
- ceph:
skip_mgr_daemons: true
add_osds_to_crush: true
+ log-whitelist:
+ - required past_interval bounds are empty
- print: "**** done ceph"
- scrub mismatch
- ScrubResult
- wrongly marked
- - (MDS_FAILED)
+ - \(MDS_FAILED\)
+ - \(OBJECT_
+ - is unresponsive
conf:
fs: xfs
- \(PG_
- Monitor daemon marked osd
- Behind on trimming
+ - is unresponsive
conf:
global:
mon warn on pool no app: false
+++ /dev/null
-meta:
-- desc: |
- generate read/write load with rados objects ranging from 1MB to 25MB
-workload:
- full_sequential:
- - workunit:
- branch: jewel
- clients:
- client.0:
- - rados/load-gen-big.sh
- - print: "**** done rados/load-gen-big.sh 2-workload"
- ceph:
skip_mgr_daemons: true
add_osds_to_crush: true
+ log-whitelist:
+ - required past_interval bounds are empty
- print: "**** done ceph"
# are taking way more than a minute/monitor to form quorum, so lets
# try the next block which will wait up to 15 minutes to gatherkeys.
execute_ceph_deploy(mon_create_nodes)
- execute_ceph_deploy(mgr_create)
# create-keys is explicit now
# http://tracker.ceph.com/issues/16036
'--id', remote.shortname])
estatus_gather = execute_ceph_deploy(gather_keys)
+
+ execute_ceph_deploy(mgr_create)
+
if mds_nodes:
estatus_mds = execute_ceph_deploy(deploy_mds)
if estatus_mds != 0:
if ctx.rgw.ec_data_pool:
create_ec_pool(remote, data_pool, client, 64,
- ctx.rgw.erasure_code_profile, cluster_name)
+ ctx.rgw.erasure_code_profile, cluster_name, 'rgw')
else:
- create_replicated_pool(remote, data_pool, 64, cluster_name)
+ create_replicated_pool(remote, data_pool, 64, cluster_name, 'rgw')
if ctx.rgw.cache_pools:
create_cache_pool(remote, data_pool, data_pool + '.cache', 64,
- 64*1024*1024, cluster_name)
+ 64*1024*1024, cluster_name, 'rgw')
log.debug('Pools created')
yield
pool_name = pool_config['val']['data_pool']
if ctx.rgw.ec_data_pool:
create_ec_pool(gateway.remote, pool_name, zone.name, 64,
- ctx.rgw.erasure_code_profile, cluster.name)
+ ctx.rgw.erasure_code_profile, cluster.name, 'rgw')
else:
- create_replicated_pool(gateway.remote, pool_name, 64, cluster.name)
+ create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw')
def configure_zone_compression(zone, compression):
""" Set compression type in the zone's default-placement """
else:
return proc
-def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph"):
+def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None):
remote.run(args=['sudo', 'ceph'] +
cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name])
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'create', name,
str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name
])
+ if application:
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+ ])
-def create_replicated_pool(remote, name, pgnum, cluster_name="ceph"):
+def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None):
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name
])
+ if application:
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+ ])
-def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"):
+def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph", application=None):
remote.run(args=[
'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name
])
'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name,
str(size), '--cluster', cluster_name
])
+ if application:
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+ ])
def cmd_erasure_code_profile(profile_name, profile):
"""
ceph osd crush set-device-class hdd osd.1
ceph osd crush rule create-replicated foo-ssd default host ssd
ceph osd crush rule create-replicated foo-hdd default host hdd
+ceph osd crush rule ls-by-class ssd | grep 'foo-ssd'
+ceph osd crush rule ls-by-class ssd | expect_false grep 'foo-hdd'
+ceph osd crush rule ls-by-class hdd | grep 'foo-hdd'
+ceph osd crush rule ls-by-class hdd | expect_false grep 'foo-ssd'
ceph osd erasure-code-profile set ec-foo-ssd crush-device-class=ssd m=2 k=2
ceph osd pool create ec-foo 2 erasure ec-foo-ssd
ceph osd crush rule ls | grep foo
+ceph osd crush rule rename foo foo-asdf
+ceph osd crush rule rename bar bar-asdf
+ceph osd crush rule ls | grep 'foo-asdf'
+ceph osd crush rule ls | grep 'bar-asdf'
+ceph osd crush rule rm foo 2>&1 | grep 'does not exist'
+ceph osd crush rule rm bar 2>&1 | grep 'does not exist'
+ceph osd crush rule rename foo-asdf foo
+ceph osd crush rule rename bar-asdf bar
+ceph osd crush rule ls | expect_false grep 'foo-asdf'
+ceph osd crush rule ls | expect_false grep 'bar-asdf'
ceph osd crush rule rm foo
ceph osd crush rule rm foo # idempotent
ceph osd crush rule rm bar
expect_failure "is non-primary" clone_image ${CLUSTER1} ${PARENT_POOL} \
${parent_image} ${parent_snap} ${POOL} ${clone_image}1
+testlog "TEST: data pool"
+dp_image=test_data_pool
+create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL}
+data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image}
+data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image})
+test "${data_pool}" = "${PARENT_POOL}"
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2'
+write_image ${CLUSTER2} ${POOL} ${dp_image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'master_position'
+compare_images ${POOL} ${dp_image}@snap1
+compare_images ${POOL} ${dp_image}@snap2
+compare_images ${POOL} ${dp_image}
+
testlog "TEST: disable mirroring / delete non-primary image"
image2=test2
image3=test3
rbd --cluster=${cluster} -p ${pool} mirror image resync ${image}
}
+get_image_data_pool()
+{
+ local cluster=$1
+ local pool=$2
+ local image=$3
+
+ rbd --cluster ${cluster} -p ${pool} info ${image} |
+ awk '$1 == "data_pool:" {print $2}'
+}
+
#
# Main
#
-a5f84b37668fc8e03165aaf5cbb380c78e4deba4
-v12.1.4
+32ce2a3ae5239ee33d6150705cdb24d43bab910c
+v12.2.0
that prefixes tags with ``ceph.`` and uses ``=`` for assignment, and provides
set of utilities for interacting with LVM.
"""
-import json
from ceph_volume import process
from ceph_volume.exceptions import MultipleLVsError, MultipleVGsError
+def _output_parser(output, fields):
+ """
+ Newer versions of LVM allow ``--reportformat=json``, but older versions,
+ like the one included in Xenial do not. LVM has the ability to filter and
+ format its output so we assume the output will be in a format this parser
+ can handle (using ',' as a delimiter)
+
+ :param fields: A string, possibly using ',' to group many items, as it
+ would be used on the CLI
+ :param output: The CLI output from the LVM call
+ """
+ field_items = fields.split(',')
+ report = []
+ for line in output:
+ # clear the leading/trailing whitespace
+ line = line.strip()
+
+ # remove the extra '"' in each field
+ line = line.replace('"', '')
+
+ # prevent moving forward with empty contents
+ if not line:
+ continue
+
+ # spliting on ';' because that is what the lvm call uses as
+ # '--separator'
+ output_items = [i.strip() for i in line.split(';')]
+ # map the output to the fiels
+ report.append(
+ dict(zip(field_items, output_items))
+ )
+
+ return report
+
+
def parse_tags(lv_tags):
"""
Return a dictionary mapping of all the tags associated with
def get_api_vgs():
"""
- Return the list of group volumes available in the system using flags to include common
- metadata associated with them
+ Return the list of group volumes available in the system using flags to
+ include common metadata associated with them
- Command and sample JSON output, should look like::
+ Command and sample delimeted output, should look like::
- $ sudo vgs --reportformat=json
- {
- "report": [
- {
- "vg": [
- {
- "vg_name":"VolGroup00",
- "pv_count":"1",
- "lv_count":"2",
- "snap_count":"0",
- "vg_attr":"wz--n-",
- "vg_size":"38.97g",
- "vg_free":"0 "},
- {
- "vg_name":"osd_vg",
- "pv_count":"3",
- "lv_count":"1",
- "snap_count":"0",
- "vg_attr":"wz--n-",
- "vg_size":"32.21g",
- "vg_free":"9.21g"
- }
- ]
- }
- ]
- }
+ $ sudo vgs --noheadings --separator=';' \
+ -o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free
+ ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m
+ osd_vg;3;1;0;wz--n-;29.21g;9.21g
"""
+ fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free'
stdout, stderr, returncode = process.call(
- [
- 'sudo', 'vgs', '--reportformat=json'
- ]
+ ['sudo', 'vgs', '--noheadings', '--separator=";"', '-o', fields]
)
- report = json.loads(''.join(stdout))
- for report_item in report.get('report', []):
- # is it possible to get more than one item in "report" ?
- return report_item['vg']
- return []
+ return _output_parser(stdout, fields)
def get_api_lvs():
Return the list of logical volumes available in the system using flags to include common
metadata associated with them
- Command and sample JSON output, should look like::
+ Command and delimeted output, should look like::
- $ sudo lvs -o lv_tags,lv_path,lv_name,vg_name --reportformat=json
- {
- "report": [
- {
- "lv": [
- {
- "lv_tags":"",
- "lv_path":"/dev/VolGroup00/LogVol00",
- "lv_name":"LogVol00",
- "vg_name":"VolGroup00"},
- {
- "lv_tags":"ceph.osd_fsid=aaa-fff-0000,ceph.osd_fsid=aaa-fff-bbbb,ceph.osd_id=0",
- "lv_path":"/dev/osd_vg/OriginLV",
- "lv_name":"OriginLV",
- "vg_name":"osd_vg"
- }
- ]
- }
- ]
- }
+ $ sudo lvs --noheadings --separator=';' -o lv_tags,lv_path,lv_name,vg_name
+ ;/dev/ubuntubox-vg/root;root;ubuntubox-vg
+ ;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg
"""
+ fields = 'lv_tags,lv_path,lv_name,vg_name'
stdout, stderr, returncode = process.call(
- ['sudo', 'lvs', '-o', 'lv_tags,lv_path,lv_name,vg_name', '--reportformat=json'])
- report = json.loads(''.join(stdout))
- for report_item in report.get('report', []):
- # is it possible to get more than one item in "report" ?
- return report_item['lv']
- return []
+ ['sudo', 'lvs', '--noheadings', '--separator=";"', '-o', fields]
+ )
+ return _output_parser(stdout, fields)
def get_lv(lv_name=None, vg_name=None, lv_path=None, lv_tags=None):
+from ceph_volume.util import arg_validators
import argparse
required_args = parser.add_argument_group('required arguments')
parser.add_argument(
'--journal',
- help='A logical group name, path to a logical volume, or path to a device',
+ help='A logical volume (vg_name/lv_name), or path to a device',
)
required_args.add_argument(
'--data',
required=True,
- help='A logical group name or a path to a logical volume',
+ type=arg_validators.LVPath(),
+ help='A logical volume (vg_name/lv_name) for OSD data',
)
parser.add_argument(
'--journal-size',
all the metadata to the logical volumes using LVM tags, and starting
the OSD daemon.
- Most basic Usage looks like (journal will be collocated from the same volume group):
-
- ceph-volume lvm create --data {volume group name}
-
-
Example calls for supported scenarios:
- Dedicated volume group for Journal(s)
- -------------------------------------
+ Filestore
+ ---------
Existing logical volume (lv) or device:
- ceph-volume lvm create --data {logical volume} --journal /path/to/{lv}|{device}
+ ceph-volume lvm create --filestore --data {vg name/lv name} --journal /path/to/device
Or:
- ceph-volume lvm create --data {data volume group} --journal {journal volume group}
-
- Collocated (same group) for data and journal
- --------------------------------------------
-
- ceph-volume lvm create --data {volume group}
+ ceph-volume lvm create --filestore --data {vg name/lv name} --journal {vg name/lv name}
""")
parser = create_parser(
from .common import prepare_parser
-def canonical_device_path(device):
- """
- Ensure that a device is canonical (full path) and that it exists so that
- it can be used throughout the prepare/activate process
- """
- # FIXME: this is obviously super naive
- inferred = os.path.join('/dev', device)
- if os.path.exists(os.path.abspath(device)):
- return device
- elif os.path.exists(inferred):
- return inferred
- raise RuntimeError('Selected device does not exist: %s' % device)
-
-
def prepare_filestore(device, journal, secrets, id_=None, fsid=None):
"""
:param device: The name of the volume group or lvm to work with
def __init__(self, argv):
self.argv = argv
+ def get_journal_lv(self, argument):
+ """
+ Perform some parsing of the value of ``--journal`` so that the process
+ can determine correctly if it got a device path or an lv
+ :param argument: The value of ``--journal``, that will need to be split
+ to retrieve the actual lv
+ """
+ try:
+ vg_name, lv_name = argument.split('/')
+ except (ValueError, AttributeError):
+ return None
+ return api.get_lv(lv_name=lv_name, vg_name=vg_name)
+
@decorators.needs_root
def prepare(self, args):
# FIXME we don't allow re-using a keyring, we always generate one for the
#osd_id = args.osd_id or prepare_utils.create_id(fsid)
# allow re-using an id, in case a prepare failed
osd_id = args.osd_id or prepare_utils.create_id(fsid, json.dumps(secrets))
- journal_name = "journal_%s" % fsid
- osd_name = "osd_%s" % fsid
-
+ vg_name, lv_name = args.data.split('/')
if args.filestore:
- data_vg = api.get_vg(vg_name=args.data)
- data_lv = api.get_lv(lv_name=args.data)
- journal_vg = api.get_vg(vg_name=args.journal)
- journal_lv = api.get_lv(lv_name=args.journal)
- journal_device = None
- # it is possible to pass a device as a journal that is not
- # an actual logical volume (or group)
- if not args.journal:
- if data_lv:
- raise RuntimeError('--journal is required when not using a vg for OSD data')
- # collocated: carve out the journal from the data vg
- if data_vg:
- journal_lv = api.create_lv(
- name=journal_name,
- group=data_vg.name,
- size=args.journal_size,
- osd_fsid=fsid,
- osd_id=osd_id,
- type='journal',
- cluster_fsid=cluster_fsid
- )
+ data_lv = api.get_lv(lv_name=lv_name, vg_name=vg_name)
- # if a volume group was defined for the journal create that first
- if journal_vg:
- journal_lv = api.create_lv(
- name=journal_name,
- group=args.journal,
- size=args.journal_size,
- osd_fsid=fsid,
- osd_id=osd_id,
- type='journal',
- cluster_fsid=cluster_fsid
- )
- if journal_lv:
- journal_device = journal_lv.lv_path
- # The journal is probably a device, not in LVM
- elif args.journal:
- journal_device = canonical_device_path(args.journal)
- # At this point we must have a journal_lv or a journal device
- # now create the osd from the group if that was found
- if data_vg:
- # XXX make sure that a there aren't more OSDs than physical
- # devices from this volume group
- data_lv = api.create_lv(
- name=osd_name,
- group=args.data,
- osd_fsid=fsid,
- osd_id=osd_id,
- type='data',
- journal_device=journal_device,
- cluster_fsid=cluster_fsid
- )
# we must have either an existing data_lv or a newly created, so lets make
# sure that the tags are correct
if not data_lv:
raise RuntimeError('no data logical volume found with: %s' % args.data)
+
+ if not args.journal:
+ raise RuntimeError('--journal is required when using --filestore')
+ journal_device = None
+ journal_lv = self.get_journal_lv(args.journal)
+
+ # check if we have an actual path to a device, which is allowed
+ if not journal_lv:
+ if os.path.exists(args.journal):
+ journal_device = args.journal
+ else:
+ raise RuntimeError(
+ '--journal specified an invalid or non-existent device: %s' % args.journal
+ )
+ # Otherwise the journal_device is the path to the lv
+ else:
+ journal_device = journal_lv.lv_path
+ journal_lv.set_tags({
+ 'ceph.type': 'journal',
+ 'ceph.osd_fsid': fsid,
+ 'ceph.osd_id': osd_id,
+ 'ceph.cluster_fsid': cluster_fsid,
+ 'ceph.journal_device': journal_device,
+ 'ceph.data_device': data_lv.lv_path,
+ })
+
data_lv.set_tags({
'ceph.type': 'data',
'ceph.osd_fsid': fsid,
Expected input is similar to::
- ['/path/to/ceph-volume-systemd', '<osd id>-<osd uuid>-<device type>']
['/path/to/ceph-volume-systemd', '<type>-<extra metadata>']
For example::
class TestGetAPIVgs(object):
def test_report_is_emtpy(self, monkeypatch):
- monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0))
+ monkeypatch.setattr(api.process, 'call', lambda x: ('\n\n', '', 0))
assert api.get_api_vgs() == []
def test_report_has_stuff(self, monkeypatch):
- report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}]}]}'
+ report = [' VolGroup00']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}]
- def test_report_has_multiple_items(self, monkeypatch):
- report = '{"report":[{"vg":[{"vg_name":"VolGroup00"},{"vg_name":"ceph_vg"}]}]}'
+ def test_report_has_stuff_with_empty_attrs(self, monkeypatch):
+ report = [' VolGroup00 ;;;;;;9g']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
- assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}, {'vg_name': 'ceph_vg'}]
+ result = api.get_api_vgs()[0]
+ assert len(result.keys()) == 7
+ assert result['vg_name'] == 'VolGroup00'
+ assert result['vg_free'] == '9g'
- def test_does_not_get_poluted_with_non_vg_items(self, monkeypatch):
- report = '{"report":[{"vg":[{"vg_name":"VolGroup00"}],"lv":[{"lv":"1"}]}]}'
+ def test_report_has_multiple_items(self, monkeypatch):
+ report = [' VolGroup00;;;;;;;', ' ceph_vg;;;;;;;']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
- assert api.get_api_vgs() == [{'vg_name': 'VolGroup00'}]
+ result = api.get_api_vgs()
+ assert result[0]['vg_name'] == 'VolGroup00'
+ assert result[1]['vg_name'] == 'ceph_vg'
class TestGetAPILvs(object):
def test_report_is_emtpy(self, monkeypatch):
- monkeypatch.setattr(api.process, 'call', lambda x: ('{}', '', 0))
+ monkeypatch.setattr(api.process, 'call', lambda x: ('', '', 0))
assert api.get_api_lvs() == []
def test_report_has_stuff(self, monkeypatch):
- report = '{"report":[{"lv":[{"lv_name":"VolGroup00"}]}]}'
+ report = [' ;/path;VolGroup00;root']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
- assert api.get_api_lvs() == [{'lv_name': 'VolGroup00'}]
+ result = api.get_api_lvs()
+ assert result[0]['lv_name'] == 'VolGroup00'
def test_report_has_multiple_items(self, monkeypatch):
- report = '{"report":[{"lv":[{"lv_name":"VolName"},{"lv_name":"ceph_lv"}]}]}'
- monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
- assert api.get_api_lvs() == [{'lv_name': 'VolName'}, {'lv_name': 'ceph_lv'}]
-
- def test_does_not_get_poluted_with_non_lv_items(self, monkeypatch):
- report = '{"report":[{"lv":[{"lv_name":"VolName"}],"vg":[{"vg":"1"}]}]}'
+ report = [' ;/path;VolName;root', ';/dev/path;ceph_lv;ceph_vg']
monkeypatch.setattr(api.process, 'call', lambda x: (report, '', 0))
- assert api.get_api_lvs() == [{'lv_name': 'VolName'}]
+ result = api.get_api_lvs()
+ assert result[0]['lv_name'] == 'VolName'
+ assert result[1]['lv_name'] == 'ceph_lv'
@pytest.fixture
def volumes(monkeypatch):
- monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0))
+ monkeypatch.setattr(process, 'call', lambda x: ('', '', 0))
volumes = api.Volumes()
volumes._purge()
return volumes
@pytest.fixture
def volume_groups(monkeypatch):
- monkeypatch.setattr(process, 'call', lambda x: ('{}', '', 0))
+ monkeypatch.setattr(process, 'call', lambda x: ('', '', 0))
vgs = api.VolumeGroups()
vgs._purge()
return vgs
assert 'A logical group name or a path' in stdout
+class TestGetJournalLV(object):
+
+ @pytest.mark.parametrize('arg', ['', '///', None, '/dev/sda1'])
+ def test_no_journal_on_invalid_path(self, monkeypatch, arg):
+ monkeypatch.setattr(lvm.prepare.api, 'get_lv', lambda **kw: False)
+ prepare = lvm.prepare.Prepare([])
+ assert prepare.get_journal_lv(arg) is None
+
+ def test_no_journal_lv_found(self, monkeypatch):
+ # patch it with 0 so we know we are getting to get_lv
+ monkeypatch.setattr(lvm.prepare.api, 'get_lv', lambda **kw: 0)
+ prepare = lvm.prepare.Prepare([])
+ assert prepare.get_journal_lv('vg/lv') == 0
+
+
class TestActivate(object):
def test_main_spits_help_with_no_arguments(self, capsys):
copy_admin_key: true
# test-volume is created by tests/functional/lvm_setup.yml from /dev/sda
lvm_volumes:
- test_volume: /dev/sdc
+ - data: test_volume
+ journal: /dev/sdc
+ data_vg: test_group
os_tuning_params:
- { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 }
copy_admin_key: true
# test-volume is created by tests/functional/lvm_setup.yml from /dev/sda
lvm_volumes:
- test_volume: /dev/sdc
+ - data: test_volume
+ journal: /dev/sdc
+ data_vg: test_group
os_tuning_params:
- { name: kernel.pid_max, value: 4194303 }
- { name: fs.file-max, value: 26234859 }
--- /dev/null
+import pytest
+import argparse
+from ceph_volume.util import arg_validators
+
+
+invalid_lv_paths = [
+ '', 'lv_name', '///', '/lv_name', 'lv_name/',
+ '/dev/lv_group/lv_name'
+]
+
+
+class TestLVPath(object):
+
+ def setup(self):
+ self.validator = arg_validators.LVPath()
+
+ @pytest.mark.parametrize('path', invalid_lv_paths)
+ def test_no_slash_is_an_error(self, path):
+ with pytest.raises(argparse.ArgumentError):
+ self.validator(path)
+
+ def test_is_valid(self):
+ path = 'vg/lv'
+ assert self.validator(path) == path
--- /dev/null
+import argparse
+
+
+class LVPath(object):
+ """
+ A simple validator to ensure that a logical volume is specified like::
+
+ <vg name>/<lv name>
+
+ Because for LVM it is better to be specific on what group does an lv
+ belongs to.
+ """
+
+ def __call__(self, string):
+ error = None
+ try:
+ vg, lv = string.split('/')
+ except ValueError:
+ error = "Logical volume must be specified as 'volume_group/logical_volume' but got: %s" % string
+ raise argparse.ArgumentError(None, error)
+
+ if not vg:
+ error = "Didn't specify a volume group like 'volume_group/logical_volume', got: %s" % string
+ if not lv:
+ error = "Didn't specify a logical volume like 'volume_group/logical_volume', got: %s" % string
+
+ if error:
+ raise argparse.ArgumentError(None, error)
+ return string
#include "common/config.h"
#include "common/ceph_argparse.h"
#include "common/errno.h"
+#include "common/pick_address.h"
#include "global/global_init.h"
#include "mgr/MgrStandby.h"
usage();
}
+ pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
+
global_init_daemonize(g_ceph_context);
global_init_chdir(g_ceph_context);
common_init_finish(g_ceph_context);
cwd.swap(in);
ldout(cct, 3) << "chdir(" << relpath << ") cwd now " << cwd->ino << dendl;
- getcwd(new_cwd, perms);
+ _getcwd(new_cwd, perms);
return 0;
}
-void Client::getcwd(string& dir, const UserPerm& perms)
+void Client::_getcwd(string& dir, const UserPerm& perms)
{
filepath path;
ldout(cct, 10) << "getcwd " << *cwd << dendl;
dir += path.get_path();
}
+void Client::getcwd(string& dir, const UserPerm& perms)
+{
+ Mutex::Locker l(client_lock);
+ _getcwd(dir, perms);
+}
+
int Client::statfs(const char *path, struct statvfs *stbuf,
const UserPerm& perms)
{
// crap
int chdir(const char *s, std::string &new_cwd, const UserPerm& perms);
+ void _getcwd(std::string& cwd, const UserPerm& perms);
void getcwd(std::string& cwd, const UserPerm& perms);
// namespace ops
return 0;
}
- *sgids = (gid_t*)malloc(c*sizeof(**sgids));
- if (!*sgids) {
+ gid_t *gids = new (std::nothrow) gid_t[c];
+ if (!gids) {
return -ENOMEM;
}
- c = fuse_req_getgroups(req, c, *sgids);
+ c = fuse_req_getgroups(req, c, gids);
if (c < 0) {
- free(*sgids);
- return c;
+ delete gids;
+ } else {
+ *sgids = gids;
}
return c;
#endif
}
}
- if (ret.truncated) {
- ret.marker = marker;
- }
+ ret.marker = marker;
::encode(ret, *out);
}
do_queues();
}
+
+ /**
+ * Has reservations
+ *
+ * Return true if there are reservations in progress
+ */
+ bool has_reservation() {
+ Mutex::Locker l(lock);
+ return !in_progress.empty();
+ }
static const unsigned MAX_PRIORITY = (unsigned)-1;
};
// seq and who should be set for syslog/graylog/log_to_mon
e.who = parent->get_myinst();
e.name = parent->get_myname();
- e.seq = parent->get_next_seq();
e.prio = prio;
e.msg = s;
e.channel = get_log_channel();
+ // log to monitor?
+ if (log_to_monitors) {
+ e.seq = parent->queue(e);
+ } else {
+ e.seq = parent->get_next_seq();
+ }
+
// log to syslog?
if (do_log_to_syslog()) {
ldout(cct,0) << __func__ << " log to syslog" << dendl;
ldout(cct,0) << __func__ << " log to graylog" << dendl;
graylog->log_log_entry(&e);
}
-
- // log to monitor?
- if (log_to_monitors) {
- parent->queue(e);
- }
}
Message *LogClient::get_mon_log_message(bool flush)
Message *LogClient::_get_mon_log_message()
{
assert(log_lock.is_locked());
- if (log_queue.empty())
- return NULL;
+ if (log_queue.empty())
+ return NULL;
// only send entries that haven't been sent yet during this mon
// session! monclient needs to call reset_session() on mon session
version_t LogClient::queue(LogEntry &entry)
{
Mutex::Locker l(log_lock);
+ entry.seq = ++last_log;
log_queue.push_back(entry);
if (is_mon) {
uint64_t LogClient::get_next_seq()
{
+ Mutex::Locker l(log_lock);
return ++last_log;
}
bool is_mon;
Mutex log_lock;
version_t last_log_sent;
- std::atomic<uint64_t> last_log;
+ version_t last_log;
std::deque<LogEntry> log_queue;
std::map<std::string, LogChannelRef> channels;
}
-const struct sockaddr *find_ipv4_in_subnet(const struct ifaddrs *addrs,
+const struct ifaddrs *find_ipv4_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr_in *net,
unsigned int prefix_len) {
struct in_addr want, temp;
netmask_ipv4(cur, prefix_len, &temp);
if (temp.s_addr == want.s_addr) {
- return addrs->ifa_addr;
+ return addrs;
}
}
}
-const struct sockaddr *find_ipv6_in_subnet(const struct ifaddrs *addrs,
+const struct ifaddrs *find_ipv6_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr_in6 *net,
unsigned int prefix_len) {
struct in6_addr want, temp;
netmask_ipv6(cur, prefix_len, &temp);
if (IN6_ARE_ADDR_EQUAL(&temp, &want))
- return addrs->ifa_addr;
+ return addrs;
}
return NULL;
}
-const struct sockaddr *find_ip_in_subnet(const struct ifaddrs *addrs,
+const struct ifaddrs *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr *net,
unsigned int prefix_len) {
switch (net->sa_family) {
.set_description(""),
Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
- .set_default(1500)
+ .set_default(3000)
.set_description("minimum number of entries to maintain in the PG log")
.add_service("osd")
.add_see_also("osd_max_pg_log_entries")
exit(1);
}
- const struct sockaddr *found = find_ip_in_subnet(ifa, (struct sockaddr *) &net, prefix_len);
+ const struct ifaddrs *found = find_ip_in_subnet(ifa,
+ (struct sockaddr *) &net, prefix_len);
if (found)
- return found;
+ return found->ifa_addr;
}
return NULL;
freeifaddrs(ifa);
}
+
+std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network)
+{
+ struct ifaddrs *ifa;
+ int r = getifaddrs(&ifa);
+ if (r < 0) {
+ string err = cpp_strerror(errno);
+ lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
+ return {};
+ }
+
+ unsigned int prefix_len = 0;
+ const struct ifaddrs *found = find_ip_in_subnet(ifa,
+ (const struct sockaddr *) &network, prefix_len);
+
+ std::string result;
+ if (found) {
+ result = found->ifa_name;
+ }
+
+ freeifaddrs(ifa);
+
+ return result;
+}
+
+
bool have_local_addr(CephContext *cct, const list<entity_addr_t>& ls, entity_addr_t *match)
{
struct ifaddrs *ifa;
*/
void pick_addresses(CephContext *cct, int needs);
+/**
+ * Find a network interface whose address matches the address/netmask
+ * in `network`.
+ */
+std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network);
+
/**
* check for a locally configured address
*
void shard_info_wrapper::encode(bufferlist& bl) const
{
- ENCODE_START(2, 1, bl);
+ ENCODE_START(3, 3, bl);
::encode(errors, bl);
+ ::encode(primary, bl);
if (has_shard_missing()) {
return;
}
void shard_info_wrapper::decode(bufferlist::iterator& bp)
{
- DECODE_START(2, bp);
+ DECODE_START(3, bp);
::decode(errors, bp);
+ ::decode(primary, bp);
if (has_shard_missing()) {
return;
}
::decode(omap_digest, bp);
::decode(data_digest_present, bp);
::decode(data_digest, bp);
- if (struct_v > 1)
- ::decode(selected_oi, bp);
+ ::decode(selected_oi, bp);
DECODE_FINISH(bp);
}
inconsistent_obj_wrapper::set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>& maps,
map<pg_shard_t, shard_info_wrapper> &shard_map,
- int &shallow_errors, int &deep_errors)
+ int &shallow_errors, int &deep_errors,
+ const pg_shard_t &primary)
{
for (auto pg_map : maps) {
auto oid_object = pg_map.second->objects.find(hoid);
+ shard_map[pg_map.first].primary = (pg_map.first == primary);
if (oid_object == pg_map.second->objects.end())
shard_map[pg_map.first].set_missing();
else
void set_ss_attr_corrupted() {
errors |= err_t::SS_ATTR_CORRUPTED;
}
+ void set_obj_size_oi_mismatch() {
+ errors |= err_t::OBJ_SIZE_OI_MISMATCH;
+ }
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);
};
void set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>&,
map<pg_shard_t, shard_info_wrapper>&,
- int &shallow_errors, int &deep_errors);
+ int &shallow_errors, int &deep_errors,
+ const pg_shard_t &primary);
void set_version(uint64_t ver) { version = ver; }
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bp);
case COMP_ALG_SNAPPY: return "snappy";
case COMP_ALG_ZLIB: return "zlib";
case COMP_ALG_ZSTD: return "zstd";
+#ifdef HAVE_LZ4
case COMP_ALG_LZ4: return "lz4";
+#endif
default: return "???";
}
}
return COMP_ALG_ZLIB;
if (s == "zstd")
return COMP_ALG_ZSTD;
+#ifdef HAVE_LZ4
if (s == "lz4")
return COMP_ALG_LZ4;
+#endif
if (s == "" || s == "none")
return COMP_ALG_NONE;
COMP_ALG_SNAPPY = 1,
COMP_ALG_ZLIB = 2,
COMP_ALG_ZSTD = 3,
+#ifdef HAVE_LZ4
COMP_ALG_LZ4 = 4,
+#endif
COMP_ALG_LAST //the last value for range checks
};
// compression options
if (verbose) err << "bucket " << name << " id " << maybe_id;
if (sub->children.size() > 2) {
string class_name = string_node(sub->children[3]);
- if (!crush.class_exists(class_name)) {
- err << " unknown device class '" << class_name << "'" << std::endl;
- return -EINVAL;
- }
- int cid = crush.get_class_id(class_name);
+ // note that we do not verify class existence here,
+ // as this bucket might come from an empty shadow tree
+ // which currently has no OSDs but is still referenced by a rule!
+ int cid = crush.get_or_create_class_id(class_name);
if (class_id.count(cid) != 0) {
err << "duplicate device class " << class_name << " for bucket " << name << std::endl;
return -ERANGE;
item_weight[id] = bucketweight;
assert(id != 0);
- int r = crush.add_bucket(id, alg, hash, type, size, &items[0], &weights[0], NULL);
+ int idout;
+ int r = crush.add_bucket(id, alg, hash, type, size,
+ &items[0], &weights[0], &idout);
if (r < 0) {
if (r == -EEXIST)
err << "Duplicate bucket id " << id << std::endl;
return set_item_name(oldid, dstname);
}
+int CrushWrapper::rename_rule(const string& srcname,
+ const string& dstname,
+ ostream *ss)
+{
+ if (!rule_exists(srcname)) {
+ if (ss) {
+ *ss << "source rule name '" << srcname << "' does not exist";
+ }
+ return -ENOENT;
+ }
+ if (rule_exists(dstname)) {
+ if (ss) {
+ *ss << "destination rule name '" << dstname << "' already exists";
+ }
+ return -EEXIST;
+ }
+ int rule_id = get_rule_id(srcname);
+ auto it = rule_name_map.find(rule_id);
+ assert(it != rule_name_map.end());
+ it->second = dstname;
+ if (have_rmaps) {
+ rule_name_rmap.erase(srcname);
+ rule_name_rmap[dstname] = rule_id;
+ }
+ return 0;
+}
+
void CrushWrapper::find_takes(set<int>& roots) const
{
for (unsigned i=0; i<crush->max_rules; i++) {
// swap names
swap_names(src, dst);
- return 0;
+ return rebuild_roots_with_classes();
}
int CrushWrapper::link_bucket(
crush->rules[ruleno] = NULL;
rule_name_map.erase(ruleno);
have_rmaps = false;
- return 0;
+ return rebuild_roots_with_classes();
}
int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *bucket, int item, int weight)
if (bucket->items[position] == item)
break;
assert(position != bucket->size);
- for (auto w : choose_args) {
- crush_choose_arg_map arg_map = w.second;
+ for (auto &w : choose_args) {
+ crush_choose_arg_map &arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
for (__u32 j = 0; j < arg->weight_set_size; j++) {
crush_weight_set *weight_set = &arg->weight_set[j];
crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items,
weights);
assert(b);
+ assert(idout);
int r = crush_add_bucket(crush, bucketno, b, idout);
+ int pos = -1 - *idout;
for (auto& p : choose_args) {
crush_choose_arg_map& cmap = p.second;
if (cmap.args) {
- if ((int)cmap.size <= *idout) {
+ if ((int)cmap.size <= pos) {
cmap.args = (crush_choose_arg*)realloc(
cmap.args,
- sizeof(crush_choose_arg) * (*idout + 1));
+ sizeof(crush_choose_arg) * (pos + 1));
+ assert(cmap.args);
memset(&cmap.args[cmap.size], 0,
- sizeof(crush_choose_arg) * (*idout + 1 - cmap.size));
- cmap.size = *idout + 1;
+ sizeof(crush_choose_arg) * (pos + 1 - cmap.size));
+ cmap.size = pos + 1;
}
} else {
cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
- *idout + 1);
- cmap.size = *idout + 1;
+ pos + 1);
+ assert(cmap.args);
+ cmap.size = pos + 1;
}
if (size > 0) {
int positions = get_choose_args_positions(cmap);
- crush_choose_arg& carg = cmap.args[*idout];
+ crush_choose_arg& carg = cmap.args[pos];
carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
size);
carg.weight_set_size = positions;
if (r < 0) {
return r;
}
- for (auto w : choose_args) {
- crush_choose_arg_map arg_map = w.second;
+ for (auto &w : choose_args) {
+ crush_choose_arg_map &arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
for (__u32 j = 0; j < arg->weight_set_size; j++) {
crush_weight_set *weight_set = &arg->weight_set[j];
if (r < 0) {
return r;
}
- for (auto w : choose_args) {
- crush_choose_arg_map arg_map = w.second;
+ for (auto &w : choose_args) {
+ crush_choose_arg_map &arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
for (__u32 j = 0; j < arg->weight_set_size; j++) {
crush_weight_set *weight_set = &arg->weight_set[j];
unsigned new_size = -1-bno + 1;
cmap.args = (crush_choose_arg*)realloc(cmap.args,
new_size * sizeof(cmap.args[0]));
+ assert(cmap.args);
memset(cmap.args + cmap.size, 0,
(new_size - cmap.size) * sizeof(cmap.args[0]));
+ cmap.size = new_size;
}
auto& o = cmap.args[-1-original_id];
auto& n = cmap.args[-1-bno];
return 0;
}
+int CrushWrapper::get_rules_by_class(const string &class_name, set<int> *rules)
+{
+ assert(rules);
+ rules->clear();
+ if (!class_exists(class_name)) {
+ return -ENOENT;
+ }
+ int class_id = get_class_id(class_name);
+ for (unsigned i = 0; i < crush->max_rules; ++i) {
+ crush_rule *r = crush->rules[i];
+ if (!r)
+ continue;
+ for (unsigned j = 0; j < r->len; ++j) {
+ if (r->steps[j].op == CRUSH_RULE_TAKE) {
+ int step_item = r->steps[j].arg1;
+ int original_item;
+ int c;
+ int res = split_id_class(step_item, &original_item, &c);
+ if (res < 0) {
+ return res;
+ }
+ if (c != -1 && c == class_id) {
+ rules->insert(i);
+ break;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
bool CrushWrapper::_class_is_dead(int class_id)
{
for (auto &p: class_map) {
__u32 choose_args_size;
::decode(choose_args_size, blp);
for (__u32 i = 0; i < choose_args_size; i++) {
- uint64_t choose_args_index;
+ typename decltype(choose_args)::key_type choose_args_index;
::decode(choose_args_index, blp);
crush_choose_arg_map arg_map;
arg_map.size = crush->max_buckets;
ostream *ss);
// rule names
+ int rename_rule(const string& srcname,
+ const string& dstname,
+ ostream *ss);
bool rule_exists(string name) const {
build_rmaps();
return rule_name_rmap.count(name);
int rename_class(const string& srcname, const string& dstname);
int populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
+ int get_rules_by_class(const string &class_name, set<int> *rules);
bool _class_is_dead(int class_id);
void cleanup_dead_classes();
int rebuild_roots_with_classes();
If there are multiple matches, the first one is returned; this order
is system-dependent and should not be relied on.
*/
-const struct sockaddr *find_ip_in_subnet(const struct ifaddrs *addrs,
+const struct ifaddrs *find_ip_in_subnet(const struct ifaddrs *addrs,
const struct sockaddr *net,
unsigned int prefix_len);
OI_ATTR_MISSING = 1 << 14,
OI_ATTR_CORRUPTED = 1 << 15,
SS_ATTR_MISSING = 1 << 16,
- SS_ATTR_CORRUPTED = 1 << 17
+ SS_ATTR_CORRUPTED = 1 << 17,
+ OBJ_SIZE_OI_MISMATCH = 1 << 18
// When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
};
uint64_t errors = 0;
- static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED;
+ static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED|OBJ_SIZE_OI_MISMATCH;
static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH;
bool has_shard_missing() const {
return errors & SHARD_MISSING;
bool has_deep_errors() const {
return errors & DEEP_ERRORS;
}
+ bool has_obj_size_oi_mismatch() const {
+ return errors & OBJ_SIZE_OI_MISMATCH;
+ }
};
struct shard_info_t : err_t {
bool data_digest_present = false;
uint32_t data_digest = 0;
bool selected_oi = false;
+ bool primary = false;
};
struct osd_shard_t {
if (caps_info.allow_all) {
// Flag for auth providers that don't provide cap strings
s->auth_caps.set_allow_all();
- }
-
- bufferlist::iterator p = caps_info.caps.begin();
- string auth_cap_str;
- try {
- ::decode(auth_cap_str, p);
-
- dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl;
- std::ostringstream errstr;
- if (!s->auth_caps.parse(g_ceph_context, auth_cap_str, &errstr)) {
- dout(1) << __func__ << ": auth cap parse error: " << errstr.str()
- << " parsing '" << auth_cap_str << "'" << dendl;
- clog->warn() << name << " mds cap '" << auth_cap_str
- << "' does not parse: " << errstr.str();
+ } else {
+ bufferlist::iterator p = caps_info.caps.begin();
+ string auth_cap_str;
+ try {
+ ::decode(auth_cap_str, p);
+
+ dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl;
+ std::ostringstream errstr;
+ if (!s->auth_caps.parse(g_ceph_context, auth_cap_str, &errstr)) {
+ dout(1) << __func__ << ": auth cap parse error: " << errstr.str()
+ << " parsing '" << auth_cap_str << "'" << dendl;
+ clog->warn() << name << " mds cap '" << auth_cap_str
+ << "' does not parse: " << errstr.str();
+ is_valid = false;
+ }
+ } catch (buffer::error& e) {
+ // Assume legacy auth, defaults to:
+ // * permit all filesystem ops
+ // * permit no `tell` ops
+ dout(1) << __func__ << ": cannot decode auth caps bl of length " << caps_info.caps.length() << dendl;
+ is_valid = false;
}
- } catch (buffer::error& e) {
- // Assume legacy auth, defaults to:
- // * permit all filesystem ops
- // * permit no `tell` ops
- dout(1) << __func__ << ": cannot decode auth caps bl of length " << caps_info.caps.length() << dendl;
}
}
class MStatfs : public PaxosServiceMessage {
static const int HEAD_VERSION = 2;
- static const int COMPAT_VERSION = 0;
+ static const int COMPAT_VERSION = 1;
public:
uuid_d fsid;
boost::optional<int64_t> data_pool;
- MStatfs() : PaxosServiceMessage(CEPH_MSG_STATFS, 0, HEAD_VERSION) {}
+ MStatfs() : PaxosServiceMessage(CEPH_MSG_STATFS, 0, HEAD_VERSION, COMPAT_VERSION) {}
MStatfs(const uuid_d& f, ceph_tid_t t, boost::optional<int64_t> _data_pool,
version_t v) : PaxosServiceMessage(CEPH_MSG_STATFS, v,
HEAD_VERSION, COMPAT_VERSION),
return 0;
}
- mon->osdmon()->do_application_enable(poolid,
- pg_pool_t::APPLICATION_NAME_CEPHFS);
+ // if we're running as luminous, we have to set the pool application metadata
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
+ mon->osdmon()->pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ if (!mon->osdmon()->is_writeable()) {
+ // not allowed to write yet, so retry when we can
+ mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
+ return -EAGAIN;
+ }
+ mon->osdmon()->do_application_enable(poolid, pg_pool_t::APPLICATION_NAME_CEPHFS);
+ mon->osdmon()->propose_pending();
+ }
fsmap.modify_filesystem(
fs->fscid,
COMMAND_WITH_FLAG("osd crush rule list", "list crush rules", "osd", "r", "cli,rest",
FLAG(DEPRECATED))
COMMAND("osd crush rule ls", "list crush rules", "osd", "r", "cli,rest")
+COMMAND("osd crush rule ls-by-class " \
+ "name=class,type=CephString,goodchars=[A-Za-z0-9-_.]", \
+ "list all crush rules that reference the same <class>", \
+ "osd", "r", "cli,rest")
COMMAND("osd crush rule dump " \
"name=name,type=CephString,goodchars=[A-Za-z0-9-_.],req=false", \
"dump crush rule <name> (default all)", \
COMMAND("osd crush rule rm " \
"name=name,type=CephString,goodchars=[A-Za-z0-9-_.] ", \
"remove crush rule <name>", "osd", "rw", "cli,rest")
+COMMAND("osd crush rule rename " \
+ "name=srcname,type=CephString,goodchars=[A-Za-z0-9-_.] " \
+ "name=dstname,type=CephString,goodchars=[A-Za-z0-9-_.]", \
+ "rename crush rule <srcname> to <dstname>",
+ "osd", "rw", "cli,rest")
COMMAND("osd crush tree "
"name=shadow,type=CephChoices,strings=--show-shadow,req=false", \
"dump crush buckets and items in a tree view",
mgr_messenger->add_dispatcher_tail(this); // for auth ms_* calls
bootstrap();
+ // add features of myself into feature_map
+ session_map.feature_map.add_mon(con_self->get_features());
return 0;
}
if (f) {
f->dump_stream("fsid") << monmap->get_fsid();
- get_health_status(false, f, nullptr);
+ if (osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ get_health_status(false, f, nullptr);
+ } else {
+ list<string> health_str;
+ get_health(health_str, nullptr, f);
+ }
f->dump_unsigned("election_epoch", get_epoch());
{
f->open_array_section("quorum");
dout(30) << __func__ << " osd." << osd << " next=" << next
<< " " << creating_pgs_by_osd_epoch << dendl;
std::lock_guard<std::mutex> l(creating_pgs_lock);
+ if (creating_pgs_epoch <= creating_pgs.last_scan_epoch) {
+ dout(20) << __func__
+ << " not using stale creating_pgs@" << creating_pgs_epoch << dendl;
+ // the subscribers will be updated when the mapping is completed anyway
+ return next;
+ }
auto creating_pgs_by_epoch = creating_pgs_by_osd_epoch.find(osd);
if (creating_pgs_by_epoch == creating_pgs_by_osd_epoch.end())
return next;
osdmap.crush->list_rules(&ss);
rdata.append(ss.str());
}
+ } else if (prefix == "osd crush rule ls-by-class") {
+ string class_name;
+ cmd_getval(g_ceph_context, cmdmap, "class", class_name);
+ if (class_name.empty()) {
+ ss << "no class specified";
+ r = -EINVAL;
+ goto reply;
+ }
+ set<int> rules;
+ r = osdmap.crush->get_rules_by_class(class_name, &rules);
+ if (r < 0) {
+ ss << "failed to get rules by class '" << class_name << "'";
+ goto reply;
+ }
+ if (f) {
+ f->open_array_section("rules");
+ for (auto &rule: rules) {
+ f->dump_string("name", osdmap.crush->get_rule_name(rule));
+ }
+ f->close_section();
+ f->flush(rdata);
+ } else {
+ ostringstream rs;
+ for (auto &rule: rules) {
+ rs << osdmap.crush->get_rule_name(rule) << "\n";
+ }
+ rdata.append(rs.str());
+ }
} else if (prefix == "osd crush rule dump") {
string name;
cmd_getval(g_ceph_context, cmdmap, "name", name);
} else if (prefix == "osd crush class ls-osd") {
string name;
cmd_getval(g_ceph_context, cmdmap, "class", name);
- boost::scoped_ptr<Formatter> f(Formatter::create(format, "json-pretty", "json-pretty"));
set<int> osds;
osdmap.crush->get_devices_by_class(name, &osds);
- f->open_array_section("osds");
- for (auto& osd : osds)
- f->dump_int("osd", osd);
- f->close_section();
- f->flush(rdata);
+ if (f) {
+ f->open_array_section("osds");
+ for (auto &osd: osds)
+ f->dump_int("osd", osd);
+ f->close_section();
+ f->flush(rdata);
+ } else {
+ bool first = true;
+ for (auto &osd : osds) {
+ if (!first)
+ ds << "\n";
+ first = false;
+ ds << osd;
+ }
+ rdata.append(ds);
+ }
} else if (prefix == "osd erasure-code-profile ls") {
const auto &profiles = osdmap.get_erasure_code_profiles();
if (f)
_get_pending_crush(newcrush);
ostringstream err;
CrushTester tester(newcrush, err);
+ tester.set_min_x(0);
tester.set_max_x(50);
tester.set_rule(crush_rule);
+ auto start = ceph::coarse_mono_clock::now();
r = tester.test_with_fork(g_conf->mon_lease);
+ auto duration = ceph::coarse_mono_clock::now() - start;
if (r < 0) {
dout(10) << " tester.test_with_fork returns " << r
<< ": " << err.str() << dendl;
*ss << "crush test failed with " << r << ": " << err.str();
return r;
}
+ dout(10) << __func__ << " crush somke test duration: "
+ << duration << dendl;
}
unsigned size, min_size;
r = prepare_pool_size(pool_type, erasure_code_profile, &size, &min_size, ss);
{
dout(10) << __func__ << " id " << id << " uuid " << uuid << dendl;
assert(existing_id);
+ if (osdmap.is_destroyed(id)) {
+ ss << "ceph osd create has been deprecated. Please use ceph osd new "
+ "instead.";
+ return -EINVAL;
+ }
if (uuid.is_zero()) {
dout(10) << __func__ << " no uuid; assuming legacy `osd create`" << dendl;
dout(10) << " testing map" << dendl;
stringstream ess;
CrushTester tester(crush, ess);
+ tester.set_min_x(0);
tester.set_max_x(50);
+ auto start = ceph::coarse_mono_clock::now();
int r = tester.test_with_fork(g_conf->mon_lease);
+ auto duration = ceph::coarse_mono_clock::now() - start;
if (r < 0) {
dout(10) << " tester.test_with_fork returns " << r
<< ": " << ess.str() << dendl;
err = r;
goto reply;
}
- dout(10) << " crush test result " << ess.str() << dendl;
+ dout(10) << __func__ << " crush somke test duration: "
+ << duration << ", result: " << ess.str() << dendl;
}
pending_inc.crush = data;
get_last_committed() + 1));
return true;
+ } else if (prefix == "osd crush rule rename") {
+ string srcname;
+ string dstname;
+ cmd_getval(g_ceph_context, cmdmap, "srcname", srcname);
+ cmd_getval(g_ceph_context, cmdmap, "dstname", dstname);
+ if (srcname.empty() || dstname.empty()) {
+ ss << "must specify both source rule name and destination rule name";
+ err = -EINVAL;
+ goto reply;
+ }
+ if (srcname == dstname) {
+ ss << "destination rule name is equal to source rule name";
+ err = 0;
+ goto reply;
+ }
+
+ CrushWrapper newcrush;
+ _get_pending_crush(newcrush);
+ err = newcrush.rename_rule(srcname, dstname, &ss);
+ if (err < 0) {
+ // ss has reason for failure
+ goto reply;
+ }
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
+ getline(ss, rs);
+ wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs,
+ get_last_committed() + 1));
+ return true;
+
} else if (prefix == "osd setmaxosd") {
int64_t newmax;
if (!cmd_getval(g_ceph_context, cmdmap, "newmax", newmax)) {
} else {
ostringstream ss;
ss << delta_sum.stats.sum.num_objects_unfound
- << "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)";
+ << "/" << delta_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
psl->push_back(ss.str());
}
}
snprintf(b, sizeof(b), "%.3lf", pc);
ostringstream ss;
ss << pg_sum.stats.sum.num_objects_unfound
- << "/" << pg_sum.stats.sum.num_objects << " unfound (" << b << "%)";
+ << "/" << pg_sum.stats.sum.num_objects << " objects unfound (" << b << "%)";
auto& d = checks->add("OBJECT_UNFOUND", HEALTH_WARN, ss.str());
for (auto& p : pg_stat) {
}
if (!error_detail.empty()) {
ostringstream ss;
- ss << warn << " stuck requests are blocked > "
+ ss << error << " stuck requests are blocked > "
<< err_age << " sec";
auto& d = checks->add("REQUEST_STUCK", HEALTH_ERR, ss.str());
d.detail.swap(error_detail);
if (pools && pools->count(pg.first.pool()) == 0)
continue;
for (const auto acting : pg.second.acting) {
+ if (!osdmap.exists(acting)) {
+ continue;
+ }
if (acting >= (int)pgs_by_osd.size())
pgs_by_osd.resize(acting);
if (pgs_by_osd[acting] == 0) {
std::map<uint32_t,std::map<uint64_t,uint64_t>> m;
void add(uint32_t type, uint64_t features) {
+ if (type == CEPH_ENTITY_TYPE_MON) {
+ return;
+ }
m[type][features]++;
}
+ void add_mon(uint64_t features) {
+ m[CEPH_ENTITY_TYPE_MON][features]++;
+ }
+
void rm(uint32_t type, uint64_t features) {
+ if (type == CEPH_ENTITY_TYPE_MON) {
+ return;
+ }
auto p = m.find(type);
assert(p != m.end());
auto q = p->second.find(features);
f->open_object_section(ceph_entity_type_name(p.first));
for (auto& q : p.second) {
f->open_object_section("group");
- f->dump_unsigned("features", q.first);
+ std::stringstream ss;
+ ss << "0x" << std::hex << q.first << std::dec;
+ f->dump_string("features", ss.str());
f->dump_string("release", ceph_release_name(
ceph_release_from_features(q.first)));
f->dump_unsigned("num", q.second);
}
fm->enumerate_reset();
size_t count = used_blocks.count();
+ if (used_blocks.size() == count + 1) {
+ // this due to http://tracker.ceph.com/issues/21089
+ bufferlist fm_bpb_bl, fm_blocks_bl, fm_bpk_bl;
+ db->get(PREFIX_ALLOC, "bytes_per_block", &fm_bpb_bl);
+ db->get(PREFIX_ALLOC, "blocks", &fm_blocks_bl);
+ db->get(PREFIX_ALLOC, "blocks_per_key", &fm_bpk_bl);
+ uint64_t fm_blocks = 0;
+ uint64_t fm_bsize = 1;
+ uint64_t fm_blocks_per_key = 1;
+ try {
+ auto p = fm_blocks_bl.begin();
+ ::decode(fm_blocks, p);
+ auto q = fm_bpb_bl.begin();
+ ::decode(fm_bsize, q);
+ auto r = fm_bpk_bl.begin();
+ ::decode(fm_blocks_per_key, r);
+ } catch (buffer::error& e) {
+ }
+ uint64_t dev_bsize = bdev->get_block_size();
+ uint64_t bad_size = bdev->get_size() & ~fm_bsize;
+ if (used_blocks.test(bad_size / dev_bsize) == 0) {
+ // this is the last block of the device that we previously
+ // (incorrectly) truncated off of the effective device size. this
+ // prevented BitmapFreelistManager from marking it as used along with
+ // the other "past-eof" blocks in the last key slot. mark it used
+ // now.
+ derr << __func__ << " warning: fixing leaked block 0x" << std::hex
+ << bad_size << "~" << fm_bsize << std::dec << " due to old bug"
+ << dendl;
+ KeyValueDB::Transaction t = db->get_transaction();
+ // fix freelistmanager metadata (the internal 'blocks' count is
+ // rounded up to include the trailing key, past eof)
+ uint64_t new_blocks = bdev->get_size() / fm_bsize;
+ if (new_blocks / fm_blocks_per_key * fm_blocks_per_key != new_blocks) {
+ new_blocks = (new_blocks / fm_blocks_per_key + 1) *
+ fm_blocks_per_key;
+ }
+ if (new_blocks != fm_blocks) {
+ // the fm block count increased
+ derr << __func__ << " freelist block and key count changed, fixing 0x"
+ << std::hex << bdev->get_size() << "~"
+ << ((new_blocks * fm_bsize) - bdev->get_size()) << std::dec
+ << dendl;
+ bufferlist bl;
+ ::encode(new_blocks, bl);
+ t->set(PREFIX_ALLOC, "blocks", bl);
+ fm->allocate(bdev->get_size(),
+ (new_blocks * fm_bsize) - bdev->get_size(),
+ t);
+ } else {
+ // block count is the same, but size changed; fix just the size
+ derr << __func__ << " fixing just the stray block at 0x"
+ << std::hex << bad_size << "~" << fm_bsize << std::dec << dendl;
+ fm->allocate(bad_size, fm_bsize, t);
+ }
+ bufferlist sizebl;
+ ::encode(bdev->get_size(), sizebl);
+ t->set(PREFIX_ALLOC, "size", sizebl);
+ int r = db->submit_transaction_sync(t);
+ assert(r == 0);
+
+ used_blocks.set(bad_size / dev_bsize);
+ ++count;
+ }
+ }
if (used_blocks.size() != count) {
assert(used_blocks.size() > count);
- derr << __func__ << " error: leaked some space;"
- << (used_blocks.size() - count) * min_alloc_size
- << " bytes leaked" << dendl;
++errors;
+ used_blocks.flip();
+ size_t start = used_blocks.find_first();
+ while (start != decltype(used_blocks)::npos) {
+ size_t cur = start;
+ while (true) {
+ size_t next = used_blocks.find_next(cur);
+ if (next != cur + 1) {
+ derr << __func__ << " error: leaked extent 0x" << std::hex
+ << ((uint64_t)start * block_size) << "~"
+ << ((cur + 1 - start) * block_size) << std::dec
+ << dendl;
+ start = next;
+ break;
+ }
+ cur = next;
+ }
+ }
+ used_blocks.flip();
}
}
#include "common/ceph_time.h"
#include "common/version.h"
#include "common/io_priority.h"
+#include "common/pick_address.h"
#include "os/ObjectStore.h"
#ifdef HAVE_LIBFUSE
if (pool < 0 && isdigit(poolstr[0]))
pool = atoll(poolstr.c_str());
if (pool < 0) {
- ss << "Invalid pool" << poolstr;
+ ss << "Invalid pool '" << poolstr << "''";
return;
}
collect_sys_info(pm, cct);
+ std::string front_iface, back_iface;
+ /*
+ pick_iface(cct,
+ CEPH_PICK_ADDRESS_PUBLIC | CEPH_PICK_ADDRESS_CLUSTER,
+ &front_iface, &back_iface);
+ */
+ (*pm)["front_iface"] = pick_iface(cct,
+ client_messenger->get_myaddr().get_sockaddr_storage());
+ (*pm)["back_iface"] = pick_iface(cct,
+ cluster_messenger->get_myaddr().get_sockaddr_storage());
+
dout(10) << __func__ << " " << *pm << dendl;
}
if (!service.can_inc_scrubs_pending()) {
return;
}
+ if (!cct->_conf->osd_scrub_during_recovery && service.is_recovery_active()) {
+ dout(20) << __func__ << " not scheduling scrubs due to active recovery" << dendl;
+ return;
+ }
+
utime_t now = ceph_clock_now();
bool time_permit = scrub_time_permit(now);
break;
}
- if (!cct->_conf->osd_scrub_during_recovery && service.is_recovery_active()) {
- dout(10) << __func__ << "not scheduling scrub of " << scrub.pgid << " due to active recovery ops" << dendl;
- break;
- }
-
if ((scrub.deadline >= now) && !(time_permit && load_is_low)) {
dout(10) << __func__ << " not scheduling scrub for " << scrub.pgid << " due to "
<< (!time_permit ? "time not permit" : "high load") << dendl;
bool OSDService::is_recovery_active()
{
- Mutex::Locker l(recovery_lock);
- return recovery_ops_active > 0;
+ return local_reserver.has_reservation() || remote_reserver.has_reservation();
}
// =========================================================
mempool::osdmap_mapping::vector<
mempool::osdmap_mapping::vector<pg_t>> acting_rmap; // osd -> pg
//unused: mempool::osdmap_mapping::vector<std::vector<pg_t>> up_rmap; // osd -> pg
- epoch_t epoch;
+ epoch_t epoch = 0;
uint64_t num_pgs = 0;
void _init_mappings(const OSDMap& osdmap);
bool ret = true;
if (!scrubber.reserved) {
assert(scrubber.reserved_peers.empty());
- if (osd->inc_scrubs_pending()) {
- dout(20) << "sched_scrub: reserved locally, reserving replicas" << dendl;
+ if ((cct->_conf->osd_scrub_during_recovery || !osd->is_recovery_active()) &&
+ osd->inc_scrubs_pending()) {
+ dout(20) << __func__ << ": reserved locally, reserving replicas" << dendl;
scrubber.reserved = true;
scrubber.reserved_peers.insert(pg_whoami);
scrub_reserve_replicas();
} else {
- dout(20) << "sched_scrub: failed to reserve locally" << dendl;
+ dout(20) << __func__ << ": failed to reserve locally" << dendl;
ret = false;
}
}
<< dendl;
return;
}
- scrubber.reserved = osd->inc_scrubs_pending();
+ if ((cct->_conf->osd_scrub_during_recovery || !osd->is_recovery_active()) &&
+ osd->inc_scrubs_pending()) {
+ scrubber.reserved = true;
+ } else {
+ dout(20) << __func__ << ": failed to reserve remotely" << dendl;
+ scrubber.reserved = false;
+ }
if (op->get_req()->get_type() == MSG_OSD_SCRUB_RESERVE) {
const MOSDScrubReserve *m =
static_cast<const MOSDScrubReserve*>(op->get_req());
continue;
}
head = hoid.get_head();
+ // Make sure head_exists is correct for is_legacy() check
+ if (hoid.is_head())
+ snapset.head_exists = true;
continue;
}
if (hoid.snap < CEPH_MAXSNAP) {
[=](int r) {
if (r != -EAGAIN) {
get_parent()->send_message_osd_cluster(reply, conn.get());
+ } else {
+ reply->put();
}
}));
gather.activate();
for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
i != auth.attrs.end();
++i) {
+ // We check system keys seperately
+ if (i->first == OI_ATTR || i->first == SS_ATTR)
+ continue;
if (!candidate.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
i != candidate.attrs.end();
++i) {
+ // We check system keys seperately
+ if (i->first == OI_ATTR || i->first == SS_ATTR)
+ continue;
if (!auth.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
inconsistent_obj_wrapper &object_error)
{
eversion_t auth_version;
- bufferlist auth_bl;
+ bufferlist first_bl;
// Create list of shards with primary last so it will be auth copy all
// other things being equal.
}
string error_string;
auto& shard_info = shard_map[j->first];
+ if (j->first == get_parent()->whoami_shard())
+ shard_info.primary = true;
if (i->second.read_error) {
shard_info.set_read_error();
error_string += " read_error";
goto out;
}
+ // We won't pick an auth copy if the snapset is missing or won't decode.
+ if (obj.is_head() || obj.is_snapdir()) {
+ k = i->second.attrs.find(SS_ATTR);
+ if (k == i->second.attrs.end()) {
+ shard_info.set_ss_attr_missing();
+ error_string += " ss_attr_missing";
+ } else {
+ ss_bl.push_back(k->second);
+ try {
+ bufferlist::iterator bliter = ss_bl.begin();
+ ::decode(ss, bliter);
+ } catch (...) {
+ // invalid snapset, probably corrupt
+ shard_info.set_ss_attr_corrupted();
+ error_string += " ss_attr_corrupted";
+ }
+ }
+ }
+
k = i->second.attrs.find(OI_ATTR);
if (k == i->second.attrs.end()) {
// no object info on object, probably corrupt
goto out;
}
- if (oi.soid != obj) {
- shard_info.set_oi_attr_corrupted();
- error_string += " oi_attr_corrupted";
- goto out;
+ // This is automatically corrected in PG::_repair_oinfo_oid()
+ assert(oi.soid == obj);
+
+ if (first_bl.length() == 0) {
+ first_bl.append(bl);
+ } else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_bl)) {
+ object_error.set_object_info_inconsistency();
+ error_string += " object_info_inconsistency";
}
- if (auth_version != eversion_t()) {
- if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) {
- object_error.set_object_info_inconsistency();
- error_string += " object_info_inconsistency";
- }
+ if (i->second.size != be_get_ondisk_size(oi.size)) {
+ dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl;
+ shard_info.set_obj_size_oi_mismatch();
+ error_string += " obj_size_oi_mismatch";
}
- // Don't use this particular shard because it won't be able to repair data
- // XXX: For now we can't pick one shard for repair and another's object info
- if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch)
+ // Don't use this particular shard due to previous errors
+ // XXX: For now we can't pick one shard for repair and another's object info or snapset
+ if (shard_info.errors)
goto out;
- // We don't set errors here for snapset, but we won't pick an auth copy if the
- // snapset is missing or won't decode.
- if (obj.is_head() || obj.is_snapdir()) {
- k = i->second.attrs.find(SS_ATTR);
- if (k == i->second.attrs.end()) {
- goto out;
- }
- ss_bl.push_back(k->second);
- try {
- bufferlist::iterator bliter = ss_bl.begin();
- ::decode(ss, bliter);
- } catch (...) {
- // invalid snapset, probably corrupt
- goto out;
- }
- }
-
if (auth_version == eversion_t() || oi.version > auth_version ||
(oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
auth = j;
*auth_oi = oi;
auth_version = oi.version;
- auth_bl.clear();
- auth_bl.append(bl);
}
out:
set<pg_shard_t> object_errors;
if (auth == maps.end()) {
object_error.set_version(0);
- object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors);
+ object_error.set_auth_missing(*k, maps, shard_map, shallow_errors,
+ deep_errors, get_parent()->whoami_shard());
if (object_error.has_deep_errors())
++deep_errors;
else if (object_error.has_shallow_errors())
} else {
cur_missing.insert(j->first);
shard_map[j->first].set_missing();
+ shard_map[j->first].primary = (j->first == get_parent()->whoami_shard());
// Can't have any other errors if there is no information available
++shallow_errors;
errorstream << pgid << " shard " << j->first << " missing " << *k
<< " snapset.head_exists=false, but head exists";
++scrubber.shallow_errors;
head_error.set_head_mismatch();
+ // Fix head_exists locally so is_legacy() returns correctly
+ snapset->head_exists = true;
}
if (soid.is_snapdir() && snapset->head_exists) {
osd->clog->error() << mode << " " << info.pgid << " " << soid
<< " snapset.head_exists=true, but snapdir exists";
++scrubber.shallow_errors;
head_error.set_head_mismatch();
+ // For symmetry fix this too, but probably doesn't matter
+ snapset->head_exists = false;
}
if (get_osdmap()->require_osd_release >= CEPH_RELEASE_LUMINOUS) {
}
daemons[server['hostname']] = daemon
- image = images.get(service['id'])
+ image_id = service['id'].split(':')[-1]
+ image = images.get(image_id)
if image is None:
image = {
- 'id': service['id'],
+ 'id': image_id,
'pool_name': metadata['pool_name'],
'name': metadata['image_name'],
'optimized_paths': [],
'non_optimized_paths': []
}
- if status.get('lock_owner', 'false') == 'true':
- daemon['optimized_paths'] += 1
- image['optimized_paths'].append(server['hostname'])
- else:
- daemon['non_optimized_paths'] += 1
- image['non_optimized_paths'].append(server['hostname'])
- images[service['id']] = image
+ images[image_id] = image
+ if status.get('lock_owner', 'false') == 'true':
+ daemon['optimized_paths'] += 1
+ image['optimized_paths'].append(server['hostname'])
+ else:
+ daemon['non_optimized_paths'] += 1
+ image['non_optimized_paths'].append(server['hostname'])
return {
'daemons': [daemons[k] for k in sorted(daemons, key=daemons.get)],
std::string get_v4_canonical_qs(const req_info& info, const bool using_qs)
{
- if (info.request_params.empty()) {
+ const std::string *params = &info.request_params;
+ std::string copy_params;
+ if (params->empty()) {
/* Optimize the typical flow. */
return std::string();
}
+ if (params->find_first_of('+') != std::string::npos) {
+ copy_params = *params;
+ boost::replace_all(copy_params, "+", " ");
+ params = ©_params;
+ }
/* Handle case when query string exists. Step 3 described in: http://docs.
* aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html */
std::map<std::string, std::string> canonical_qs_map;
- for (const auto& s : get_str_vec<5>(info.request_params, "&")) {
+ for (const auto& s : get_str_vec<5>(*params, "&")) {
boost::string_view key, val;
const auto parsed_pair = parse_key_value(s);
if (parsed_pair) {
std::begin(parsing_buf) + consumed);
}
+ size_t stream_pos_was = stream_pos - parsing_buf.size();
+
size_t to_extract = \
- std::min(chunk_meta.get_data_size(stream_pos), buf_max);
+ std::min(chunk_meta.get_data_size(stream_pos_was), buf_max);
+ dout(30) << "AWSv4ComplMulti: stream_pos_was=" << stream_pos_was << ", to_extract=" << to_extract << dendl;
/* It's quite probable we have a couple of real data bytes stored together
* with meta-data in the parsing_buf. We need to extract them and move to
if (to_extract > 0 && parsing_buf.size() > 0) {
const auto data_len = std::min(to_extract, parsing_buf.size());
const auto data_end_iter = std::begin(parsing_buf) + data_len;
+ dout(30) << "AWSv4ComplMulti: to_extract=" << to_extract << ", data_len=" << data_len << dendl;
std::copy(std::begin(parsing_buf), data_end_iter, buf);
parsing_buf.erase(std::begin(parsing_buf), data_end_iter);
* buffering. */
while (to_extract > 0) {
const size_t received = io_base_t::recv_body(buf + buf_pos, to_extract);
+ dout(30) << "AWSv4ComplMulti: to_extract=" << to_extract << ", received=" << received << dendl;
if (received == 0) {
break;
store->set_atomic(s->obj_ctx, obj);
op_ret = get_obj_attrs(store, s, obj, attrs);
+ if (op_ret < 0) {
+ ldout(s->cct, 0) << "ERROR: failed to get obj attrs, obj=" << obj
+ << " ret=" << op_ret << dendl;
+ return;
+ }
+
auto tags = attrs.find(RGW_ATTR_TAGS);
if(tags != attrs.end()){
has_tags = true;
rgw::IAM::s3GetObjectAcl :
rgw::IAM::s3GetObjectVersionAcl);
} else {
- perm = verify_bucket_permission(s, rgw::IAM::s3GetObjectAcl);
+ perm = verify_bucket_permission(s, rgw::IAM::s3GetBucketAcl);
}
if (!perm)
return -EACCES;
ldout(cct, 5) << "ERROR: sync_all_users() returned ret=" << ret << dendl;
}
+ if (stats->going_down())
+ break;
+
lock.Lock();
cond.WaitInterval(lock, utime_t(cct->_conf->rgw_user_quota_sync_interval, 0));
lock.Unlock();
}
int len = 0;
- if (cl) {
+ {
ACCOUNTING_IO(s)->set_account(true);
bufferptr bp(cl);
endif(WITH_RBD)
add_ceph_test(run-cli-tests ${CMAKE_CURRENT_SOURCE_DIR}/run-cli-tests)
add_ceph_test(test_objectstore_memstore.sh ${CMAKE_CURRENT_SOURCE_DIR}/test_objectstore_memstore.sh)
-add_ceph_test(test_pidfile.sh ${CMAKE_CURRENT_SOURCE_DIR}/test_pidfile.sh)
+
+# buggy, see http://tracker.ceph.com/issues/20975
+#add_ceph_test(test_pidfile.sh ${CMAKE_CURRENT_SOURCE_DIR}/test_pidfile.sh)
+
add_ceph_test(test_subman.sh ${CMAKE_CURRENT_SOURCE_DIR}/test_subman.sh)
add_ceph_test(smoke.sh ${CMAKE_CURRENT_SOURCE_DIR}/smoke.sh)
add_ceph_test(unittest_bufferlist.sh ${CMAKE_SOURCE_DIR}/src/unittest_bufferlist.sh)
$ rbd snap create img@snap
$ rbd create --size 1 anotherimg
$ ceph osd pool create custom 8 >/dev/null 2>&1
+ $ rbd pool init custom
$ rbd create --size 1 custom/img
$ rbd snap create custom/img@snap
$ rbd snap create custom/img@anothersnap
struct sockaddr_in a_one;
struct sockaddr_in6 a_two;
struct sockaddr_in net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
struct sockaddr_in a_one;
struct sockaddr_in6 a_two;
struct sockaddr_in net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
ipv4(&net, "10.11.12.42");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 24);
- ASSERT_EQ((struct sockaddr*)&a_one, result);
+ ASSERT_EQ((struct sockaddr*)&a_one, result->ifa_addr);
}
TEST(CommonIPAddr, TestV4_Prefix25)
struct sockaddr_in a_one;
struct sockaddr_in a_two;
struct sockaddr_in net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
ipv4(&net, "10.11.12.128");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 25);
- ASSERT_EQ((struct sockaddr*)&a_two, result);
+ ASSERT_EQ((struct sockaddr*)&a_two, result->ifa_addr);
}
TEST(CommonIPAddr, TestV4_Prefix16)
struct sockaddr_in a_one;
struct sockaddr_in a_two;
struct sockaddr_in net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
ipv4(&net, "10.2.0.0");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 16);
- ASSERT_EQ((struct sockaddr*)&a_two, result);
+ ASSERT_EQ((struct sockaddr*)&a_two, result->ifa_addr);
}
TEST(CommonIPAddr, TestV4_PrefixTooLong)
struct ifaddrs one;
struct sockaddr_in a_one;
struct sockaddr_in net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
struct sockaddr_in6 a_one;
struct sockaddr_in a_two;
struct sockaddr_in net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
ipv4(&net, "255.0.1.2");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 0);
- ASSERT_EQ((struct sockaddr*)&a_two, result);
+ ASSERT_EQ((struct sockaddr*)&a_two, result->ifa_addr);
}
TEST(CommonIPAddr, TestV6_Simple)
struct sockaddr_in a_one;
struct sockaddr_in6 a_two;
struct sockaddr_in6 net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
ipv6(&net, "2001:1234:5678:90ab::dead:beef");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 64);
- ASSERT_EQ((struct sockaddr*)&a_two, result);
+ ASSERT_EQ((struct sockaddr*)&a_two, result->ifa_addr);
}
TEST(CommonIPAddr, TestV6_Prefix57)
struct sockaddr_in6 a_one;
struct sockaddr_in6 a_two;
struct sockaddr_in6 net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
ipv6(&net, "2001:1234:5678:90ab::dead:beef");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 57);
- ASSERT_EQ((struct sockaddr*)&a_two, result);
+ ASSERT_EQ((struct sockaddr*)&a_two, result->ifa_addr);
}
TEST(CommonIPAddr, TestV6_PrefixTooLong)
struct ifaddrs one;
struct sockaddr_in6 a_one;
struct sockaddr_in6 net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
memset(&net, '0', sizeof(net));
struct sockaddr_in a_one;
struct sockaddr_in6 a_two;
struct sockaddr_in6 net;
- const struct sockaddr *result;
+ const struct ifaddrs *result;
one.ifa_next = &two;
one.ifa_addr = (struct sockaddr*)&a_one;
ipv6(&net, "ff00::1");
result = find_ip_in_subnet(&one, (struct sockaddr*)&net, 0);
- ASSERT_EQ((struct sockaddr*)&a_two, result);
+ ASSERT_EQ((struct sockaddr*)&a_two, result->ifa_addr);
}
TEST(CommonIPAddr, ParseNetwork_Empty)
}
int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
- ObjectStore::Sequencer &osr)
+ ObjectStore::Sequencer &osr, bool corrupt)
{
if (ghobj.hobj.is_snapdir()) {
cerr << "Can't set the size of a snapdir" << std::endl;
::encode(oi, attr, -1); /* fixme: using full features */
ObjectStore::Transaction t;
t.setattr(coll, ghobj, OI_ATTR, attr);
- t.truncate(coll, ghobj, setsize);
+ // Only modify object info if we want to corrupt it
+ if (!corrupt)
+ t.truncate(coll, ghobj, setsize);
if (is_snap) {
bufferlist snapattr;
snapattr.clear();
}
ret = print_obj_info(fs, coll, ghobj, formatter);
goto out;
- } else if (objcmd == "set-size") {
+ } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
+ // Undocumented testing feature
+ bool corrupt = (objcmd == "corrupt-size");
// Extra arg
if (vm.count("arg1") == 0 || vm.count("arg2")) {
usage(desc);
goto out;
}
uint64_t size = atoll(arg1.c_str());
- ret = set_size(fs, coll, ghobj, size, formatter, *osr);
+ ret = set_size(fs, coll, ghobj, size, formatter, *osr, corrupt);
goto out;
} else if (objcmd == "clear-snapset") {
// UNDOCUMENTED: For testing zap SnapSet
f.dump_string("error", "oi_attr_missing");
if (err.has_oi_attr_corrupted())
f.dump_string("error", "oi_attr_corrupted");
+ if (err.has_obj_size_oi_mismatch())
+ f.dump_string("error", "obj_size_oi_mismatch");
+ if (err.has_ss_attr_missing())
+ f.dump_string("error", "ss_attr_missing");
+ if (err.has_ss_attr_corrupted())
+ f.dump_string("error", "ss_attr_corrupted");
f.close_section();
}
::decode(oi, bliter); // Can't be corrupted
f.dump_stream("object_info") << oi;
}
- if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) {
+ if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()
+ || inc.union_shards.has_oi_attr_missing()
+ || inc.union_shards.has_oi_attr_corrupted()
+ || inc.union_shards.has_ss_attr_missing()
+ || inc.union_shards.has_ss_attr_corrupted()) {
f.open_array_section("attrs");
for (auto kv : shard.attrs) {
f.open_object_section("attr");
f.open_object_section("shard");
auto& osd_shard = shard_info.first;
f.dump_int("osd", osd_shard.osd);
+ f.dump_bool("primary", shard_info.second.primary);
auto shard = osd_shard.shard;
if (shard != shard_id_t::NO_SHARD)
f.dump_unsigned("shard", shard);
m_remote_image_ctx->stripe_unit);
image_options.set(RBD_IMAGE_OPTION_STRIPE_COUNT,
m_remote_image_ctx->stripe_count);
+ image_options.set(RBD_IMAGE_OPTION_DATA_POOL,
+ m_remote_image_ctx->data_ctx.get_pool_name());
librbd::image::CreateRequest<I> *req = librbd::image::CreateRequest<I>::create(
m_local_io_ctx, m_local_image_name, m_local_image_id,